47#ifndef __ARCH_ARM_INSTS_FPLIB_HH__
48#define __ARCH_ARM_INSTS_FPLIB_HH__
73 return (
FPRounding)((uint32_t)fpscr >> 22 & 3);
81T
fplibAdd(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
84int fplibCompare(T op1, T op2,
bool signal_nans, FPSCR &fpscr, FPCR fpcr = 0);
98template <
class T1,
class T2>
102T
fplibDiv(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
104template <
class T1,
class T2>
105T2
fplibDot(T1 op1_a, T1 op1_b, T1 op2_a, T1 op2_b, FPSCR &fpscr,
114T
fplibMax(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
120T
fplibMin(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
126T
fplibMul(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
130template <
class T1,
class T2>
160 FPSCR &fpscr, FPCR fpcr = 0);
169T
fplibSub(T op1, T op2, FPSCR &fpscr, FPCR fpcr = 0);
181template <
class T1,
class T2>
187 FPSCR &fpscr, FPCR fpcr = 0);
210uint16_t
fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
212uint32_t
fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
214uint64_t
fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
216int fplibCompare(uint16_t op1, uint16_t op2,
bool signal_nans, FPSCR &fpscr,
219int fplibCompare(uint32_t op1, uint32_t op2,
bool signal_nans, FPSCR &fpscr,
222int fplibCompare(uint64_t op1, uint64_t op2,
bool signal_nans, FPSCR &fpscr,
225bool fplibCompareEQ(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
227bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
229bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
231bool fplibCompareGE(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
233bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
235bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
237bool fplibCompareGT(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
239bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
241bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
243bool fplibCompareUN(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
245bool fplibCompareUN(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
247bool fplibCompareUN(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
267uint16_t
fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
269uint32_t
fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
271uint64_t
fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
273uint32_t
fplibDot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a,
274 uint16_t op2_b, FPSCR &fpscr, FPCR fpcr);
280template <> uint16_t
fplibLogB(uint16_t
op, FPSCR &fpscr, FPCR fpcr);
281template <> uint32_t
fplibLogB(uint32_t
op, FPSCR &fpscr, FPCR fpcr);
282template <> uint64_t
fplibLogB(uint64_t
op, FPSCR &fpscr, FPCR fpcr);
284uint16_t
fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
286uint32_t
fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
288uint64_t
fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
290uint16_t
fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
292uint32_t
fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
294uint64_t
fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
296uint16_t
fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
298uint32_t
fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
300uint64_t
fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
302uint16_t
fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
304uint32_t
fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
306uint64_t
fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
308uint16_t
fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
310uint32_t
fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
312uint64_t
fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
314uint16_t
fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2,
315 FPSCR &fpscr, FPCR fpcr);
317uint32_t
fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2,
318 FPSCR &fpscr, FPCR fpcr);
320uint64_t
fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2,
321 FPSCR &fpscr, FPCR fpcr);
323uint32_t
fplibMulAddH(uint32_t addend, uint16_t op1, uint16_t op2,
324 FPSCR &fpscr, FPCR fpcr);
326uint16_t
fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
328uint32_t
fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
330uint64_t
fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
368uint16_t
fplibRecpX(uint16_t
op, FPSCR &fpscr, FPCR fpcr);
370uint32_t
fplibRecpX(uint32_t
op, FPSCR &fpscr, FPCR fpcr);
372uint64_t
fplibRecpX(uint64_t
op, FPSCR &fpscr, FPCR fpcr);
375 FPSCR &fpscr, FPCR fpcr);
378 FPSCR &fpscr, FPCR fpcr);
381 FPSCR &fpscr, FPCR fpcr);
384 int intsize, FPSCR &fpscr, FPCR fpcr);
387 int intsize, FPSCR &fpscr, FPCR fpcr);
389uint16_t
fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
391uint32_t
fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
393uint64_t
fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
395uint16_t
fplibSqrt(uint16_t
op, FPSCR &fpscr, FPCR fpcr);
397uint32_t
fplibSqrt(uint32_t
op, FPSCR &fpscr, FPCR fpcr);
399uint64_t
fplibSqrt(uint64_t
op, FPSCR &fpscr, FPCR fpcr);
401uint16_t
fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
403uint32_t
fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
405uint64_t
fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
407uint16_t
fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2,
408 FPSCR &fpscr, FPCR fpcr);
410uint32_t
fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2,
411 FPSCR &fpscr, FPCR fpcr);
413uint64_t
fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2,
414 FPSCR &fpscr, FPCR fpcr);
416uint16_t
fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
418uint32_t
fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
420uint64_t
fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
422uint16_t
fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
424uint32_t
fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr);
426uint64_t
fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr);
429 FPSCR &fpscr, FPCR fpcr);
432 FPSCR &fpscr, FPCR fpcr);
435 FPSCR &fpscr, FPCR fpcr);
438 FPSCR &fpscr, FPCR fpcr);
441 FPSCR &fpscr, FPCR fpcr);
444 FPSCR &fpscr, FPCR fpcr);
447 FPSCR &fpscr, FPCR fpcr);
450 FPSCR &fpscr, FPCR fpcr);
453 FPSCR &fpscr, FPCR fpcr);
456 FPSCR &fpscr, FPCR fpcr);
477uint16_t
fplibBfAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
478uint16_t
fplibBfMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
479uint16_t
fplibBfMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
480uint16_t
fplibBfMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
481uint16_t
fplibBfMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
482uint16_t
fplibBfMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
483uint32_t
fplibBfMulH(uint16_t op1, uint16_t op2, FPSCR &fpscr);
484uint16_t
fplibBfMulAdd(uint16_t addend, uint16_t op1, uint16_t op2,
485 FPSCR &fpscr, FPCR fpcr);
486uint32_t
fplibBfMulAddH(uint32_t addend, uint16_t op1, uint16_t op2,
487 FPSCR &fpscr, FPCR fpcr);
489uint16_t
fplibBfSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr);
490uint32_t
fplibAdd_Bf16(uint32_t op1, uint32_t op2, FPSCR &fpscr);
493uint32_t
fplibBfdotAdd(uint32_t addend, uint16_t op1_a, uint16_t op1_b,
494 uint16_t op2_a, uint16_t op2_b,
495 FPSCR &fpscr, FPCR fpcr);
uint16_t fplib32RSqrtStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
uint16_t fplib32RecipStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static FPRounding FPCRRounding(FPSCR &fpscr)
uint32_t fplibMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibBfMulH(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibBfMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibAdd_Bf16(uint32_t op1, uint32_t op2, FPSCR &fpscr)
uint16_t fplibBfMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibExpA(uint16_t op)
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibLogB(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibConvertBF(uint32_t op, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibBfdotAdd(uint32_t addend, uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibBfMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibAbs(uint16_t op, FPCR fpcr)
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibRoundIntN(uint32_t op, FPRounding rounding, bool exact, int intsize, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibDot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b, FPSCR &fpscr, FPCR fpcr)
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfNeg(uint16_t op, FPCR fpcr)
uint16_t fplibInfinity(int sgn)
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibDefaultNaN(FPCR fpcr)
uint16_t fplibNeg(uint16_t op, FPCR fpcr)
uint16_t fplibBfMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
Copyright (c) 2024 Arm Limited All rights reserved.