64#define FPLIB_FPEXEC 512
77#define FP16_EXP_BITS 5
78#define FP32_EXP_BITS 8
79#define FP64_EXP_BITS 11
81#define FP16_EXP_BIAS 15
82#define FP32_EXP_BIAS 127
83#define FP64_EXP_BIAS 1023
85#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
86#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
87#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
89#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
90#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
91#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
93#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
94#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
95#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
97#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
98#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
99#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
101static inline uint16_t
107static inline uint16_t
113static inline uint32_t
119static inline uint32_t
125static inline uint64_t
131static inline uint64_t
143 }
else if (
shift < 64) {
144 *r1 = x1 << shift | x0 >> (64 -
shift);
146 }
else if (
shift < 128) {
147 *r1 = x0 << (
shift - 64);
161 }
else if (
shift < 64) {
164 }
else if (
shift < 128) {
176 uint32_t
mask = ((uint32_t)1 << 31) - 1;
181 uint64_t p0 =
a0 *
b0;
182 uint64_t p2 =
a1 *
b1;
183 uint64_t p1 = (
a0 +
a1) * (
b0 +
b1) - p0 - p2;
185 uint64_t s1 = (s0 >> 31) + p1;
186 uint64_t s2 = (s1 >> 31) + p2;
187 *x0 = (s0 &
mask) | (s1 &
mask) << 31 | s2 << 62;
192void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t
a, uint32_t
b)
194 uint64_t
t0 = (uint64_t)(uint32_t)
a *
b;
195 uint64_t
t1 = (
t0 >> 32) + (
a >> 32) *
b;
196 *x0 =
t1 << 32 | (uint32_t)
t0;
201add128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
205 *x1 =
a1 +
b1 + (*x0 <
a0);
209sub128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
213 *x1 =
a1 -
b1 - (*x0 >
a0);
222static inline uint16_t
232 if (!(mnt >> (16 -
shift))) {
240static inline uint32_t
250 if (!(mnt >> (32 -
shift))) {
258static inline uint64_t
268 if (!(mnt >> (64 -
shift))) {
294 if (!(x1 >> (64 -
shift))) {
295 x1 = x1 << shift | x0 >> (64 -
shift);
305static inline uint16_t
311static inline uint32_t
317static inline uint64_t
323static inline uint16_t
329static inline uint32_t
335static inline uint64_t
341static inline uint16_t
347static inline uint32_t
353static inline uint64_t
359static inline uint16_t
365static inline uint32_t
371static inline uint64_t
377static inline uint16_t
384static inline uint32_t
391static inline uint64_t
564static inline uint16_t
575static inline uint32_t
586static inline uint64_t
725 if (op1_nan && op2_nan && op3_nan) {
726 if (op1_snan || op2_snan || op3_snan) {
732 }
else if (op2_nan && (op1_nan || op3_nan)) {
733 if (op1_snan || op2_snan || op3_snan) {
739 }
else if (op3_nan && op1_nan) {
740 if (op1_snan || op2_snan || op3_snan) {
785 if (op1_nan && op2_nan && op3_nan) {
786 if (op1_snan || op2_snan || op3_snan) {
792 }
else if (op2_nan && (op1_nan || op3_nan)) {
793 if (op1_snan || op2_snan || op3_snan) {
799 }
else if (op3_nan && op1_nan) {
800 if (op1_snan || op2_snan || op3_snan) {
845 if (op1_nan && op2_nan && op3_nan) {
846 if (op1_snan || op2_snan || op3_snan) {
852 }
else if (op2_nan && (op1_nan || op3_nan)) {
853 if (op1_snan || op2_snan || op3_snan) {
859 }
else if (op3_nan && op1_nan) {
860 if (op1_snan || op2_snan || op3_snan) {
945 int mode,
int *flags)
996 if (op1_nan && op2_nan && op3_nan) {
997 if (op1_snan || op2_snan || op3_snan) {
1003 }
else if (op2_nan && (op1_nan || op3_nan)) {
1004 if (op1_snan || op2_snan || op3_snan) {
1010 }
else if (op3_nan && op1_nan) {
1011 if (op1_snan || op2_snan || op3_snan) {
1043 int biased_exp, biased_exp_afp;
1045 uint16_t int_mant, int_mant_afp;
1047 int error, error_afp;
1063 biased_exp_afp = exp;
1064 int_mant_afp = mnt >> 2;
1065 error_afp = mnt & 3;
1068 int_mant = mnt >> 2;
1072 int_mant =
lsr16(mnt, 3 - exp);
1073 error = (
lsr16(mnt, 1 - exp) & 3) | !!(mnt & (
lsl16(1, 1 - exp) - 1));
1085 (error_afp == 2 && (int_mant_afp & 1)))) ||
1099 (error == 2 && (int_mant & 1)))) ||
1120 if (biased_exp_afp < 1) {
1156 return fp16_pack(sgn, biased_exp, int_mant);
1170 int biased_exp, biased_exp_afp;
1172 uint32_t int_mant, int_mant_afp;
1174 int error, error_afp;
1190 biased_exp_afp = exp;
1191 int_mant_afp = mnt >> 2;
1192 error_afp = mnt & 3;
1195 int_mant = mnt >> 2;
1199 int_mant =
lsr32(mnt, 3 - exp);
1200 error = (
lsr32(mnt, 1 - exp) & 3) | !!(mnt & (
lsl32(1, 1 - exp) - 1));
1215 (error_afp == 2 && (int_mant_afp & 1)))) ||
1229 (error == 2 && (int_mant & 1)))) ||
1250 if (biased_exp_afp < 1) {
1278 return fp32_pack(sgn, biased_exp, int_mant);
1291 int biased_exp, biased_exp_afp;
1293 uint64_t int_mant, int_mant_afp;
1295 int error, error_afp;
1309 biased_exp_afp = exp;
1310 int_mant_afp = mnt >> 2;
1311 error_afp = mnt & 3;
1314 int_mant = mnt >> 2;
1318 int_mant =
lsr64(mnt, 3 - exp);
1319 error = (
lsr64(mnt, 1 - exp) & 3) | !!(mnt & (
lsl64(1, 1 - exp) - 1));
1331 (error_afp == 2 && (int_mant_afp & 1)))) ||
1345 (error == 2 && (int_mant & 1)))) ||
1366 if (biased_exp_afp < 1) {
1394 return fp64_pack(sgn, biased_exp, int_mant);
1406 int a_sgn, a_exp, b_sgn, b_exp;
1407 uint16_t a_mnt, b_mnt;
1419 return a ==
b || (!a_mnt && !b_mnt);
1425 int a_sgn, a_exp, b_sgn, b_exp;
1426 uint16_t a_mnt, b_mnt;
1436 if (!a_mnt && !b_mnt)
1441 return a_sgn ^ (a_exp > b_exp);
1443 return a_sgn ^ (a_mnt > b_mnt);
1450 int a_sgn, a_exp, b_sgn, b_exp;
1451 uint16_t a_mnt, b_mnt;
1461 if (!a_mnt && !b_mnt)
1466 return a_sgn ^ (a_exp > b_exp);
1468 return a_sgn ^ (a_mnt > b_mnt);
1475 int a_sgn, a_exp, b_sgn, b_exp;
1476 uint16_t a_mnt, b_mnt;
1494 int a_sgn, a_exp, b_sgn, b_exp;
1495 uint32_t a_mnt, b_mnt;
1515 return a ==
b || (!a_mnt && !b_mnt);
1521 int a_sgn, a_exp, b_sgn, b_exp;
1522 uint32_t a_mnt, b_mnt;
1540 if (!a_mnt && !b_mnt)
1545 return a_sgn ^ (a_exp > b_exp);
1547 return a_sgn ^ (a_mnt > b_mnt);
1554 int a_sgn, a_exp, b_sgn, b_exp;
1555 uint32_t a_mnt, b_mnt;
1573 if (!a_mnt && !b_mnt)
1578 return a_sgn ^ (a_exp > b_exp);
1580 return a_sgn ^ (a_mnt > b_mnt);
1587 int a_sgn, a_exp, b_sgn, b_exp;
1588 uint32_t a_mnt, b_mnt;
1614 int a_sgn, a_exp, b_sgn, b_exp;
1615 uint64_t a_mnt, b_mnt;
1635 return a ==
b || (!a_mnt && !b_mnt);
1641 int a_sgn, a_exp, b_sgn, b_exp;
1642 uint64_t a_mnt, b_mnt;
1660 if (!a_mnt && !b_mnt)
1665 return a_sgn ^ (a_exp > b_exp);
1667 return a_sgn ^ (a_mnt > b_mnt);
1674 int a_sgn, a_exp, b_sgn, b_exp;
1675 uint64_t a_mnt, b_mnt;
1693 if (!a_mnt && !b_mnt)
1698 return a_sgn ^ (a_exp > b_exp);
1700 return a_sgn ^ (a_mnt > b_mnt);
1707 int a_sgn, a_exp, b_sgn, b_exp;
1708 uint64_t a_mnt, b_mnt;
1734 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1735 uint16_t a_mnt, b_mnt,
x, x_mnt;
1754 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1760 if (a_exp >= b_exp) {
1761 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1762 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1765 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1766 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1771 if (a_sgn == b_sgn) {
1772 x_mnt = a_mnt + b_mnt;
1773 }
else if (a_mnt >= b_mnt) {
1774 x_mnt = a_mnt - b_mnt;
1777 x_mnt = b_mnt - a_mnt;
1795 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1796 uint32_t a_mnt, b_mnt,
x, x_mnt;
1822 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1828 if (a_exp >= b_exp) {
1829 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
1830 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
1833 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
1834 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
1839 if (a_sgn == b_sgn) {
1840 x_mnt = a_mnt + b_mnt;
1841 }
else if (a_mnt >= b_mnt) {
1842 x_mnt = a_mnt - b_mnt;
1845 x_mnt = b_mnt - a_mnt;
1867 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1868 uint64_t a_mnt, b_mnt,
x, x_mnt;
1894 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1900 if (a_exp >= b_exp) {
1901 b_mnt = (
lsr64(b_mnt, a_exp - b_exp) |
1902 !!(b_mnt & (
lsl64(1, a_exp - b_exp) - 1)));
1905 a_mnt = (
lsr64(a_mnt, b_exp - a_exp) |
1906 !!(a_mnt & (
lsl64(1, b_exp - a_exp) - 1)));
1911 if (a_sgn == b_sgn) {
1912 x_mnt = a_mnt + b_mnt;
1913 }
else if (a_mnt >= b_mnt) {
1914 x_mnt = a_mnt - b_mnt;
1917 x_mnt = b_mnt - a_mnt;
1934 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1935 uint16_t a_mnt, b_mnt,
x, x_mnt;
1954 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1960 if (a_exp >= b_exp) {
1961 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1962 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1965 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1966 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1971 if (a_sgn == b_sgn) {
1972 x_mnt = a_mnt + b_mnt;
1973 }
else if (a_mnt >= b_mnt) {
1974 x_mnt = a_mnt - b_mnt;
1977 x_mnt = b_mnt - a_mnt;
1995 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1996 uint16_t a_mnt, b_mnt,
x;
2013 }
else if (!a_mnt || !b_mnt) {
2018 x_sgn = a_sgn ^ b_sgn;
2020 x_mnt = (uint32_t)a_mnt * b_mnt;
2032 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
2033 uint32_t a_mnt, b_mnt,
x;
2057 }
else if (!a_mnt || !b_mnt) {
2062 x_sgn = a_sgn ^ b_sgn;
2064 x_mnt = (uint64_t)a_mnt * b_mnt;
2081 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
2082 uint64_t a_mnt, b_mnt,
x;
2083 uint64_t x0_mnt, x1_mnt;
2106 }
else if (!a_mnt || !b_mnt) {
2111 x_sgn = a_sgn ^ b_sgn;
2113 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
2117 x0_mnt = x1_mnt << 1 | !!x0_mnt;
2124 int mode,
int *flags)
2126 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
2127 uint16_t a_mnt, b_mnt, c_mnt,
x;
2128 uint32_t x_mnt, y_mnt;
2156 (a_sgn != (b_sgn ^ c_sgn)))) {
2164 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
2172 y_sgn = b_sgn ^ c_sgn;
2174 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
2180 if (x_exp >= y_exp) {
2181 y_mnt = (
lsr32(y_mnt, x_exp - y_exp) |
2182 !!(y_mnt & (
lsl32(1, x_exp - y_exp) - 1)));
2185 x_mnt = (
lsr32(x_mnt, y_exp - x_exp) |
2186 !!(x_mnt & (
lsl32(1, y_exp - x_exp) - 1)));
2189 if (x_sgn == y_sgn) {
2190 x_mnt = x_mnt + y_mnt;
2191 }
else if (x_mnt >= y_mnt) {
2192 x_mnt = x_mnt - y_mnt;
2195 x_mnt = y_mnt - x_mnt;
2205 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
2212 int mode,
int *flags,
bool rm_odd=
false)
2214 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
2215 uint32_t a_mnt, b_mnt, c_mnt,
x;
2216 uint64_t x_mnt, y_mnt;
2244 (a_sgn != (b_sgn ^ c_sgn)))) {
2262 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
2270 y_sgn = b_sgn ^ c_sgn;
2272 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
2278 if (x_exp >= y_exp) {
2279 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
2280 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
2283 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
2284 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
2287 if (x_sgn == y_sgn) {
2288 x_mnt = x_mnt + y_mnt;
2289 }
else if (x_mnt >= y_mnt) {
2290 x_mnt = x_mnt - y_mnt;
2293 x_mnt = y_mnt - x_mnt;
2307 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
2319 int mode,
int *flags)
2321 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
2322 uint64_t a_mnt, b_mnt, c_mnt,
x;
2323 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
2351 (a_sgn != (b_sgn ^ c_sgn)))) {
2369 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
2378 y_sgn = b_sgn ^ c_sgn;
2380 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
2381 if (!y0_mnt && !y1_mnt) {
2386 if (x_exp >= y_exp) {
2389 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
2390 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
2391 y0_mnt |= !!(
t0 |
t1);
2396 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
2397 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
2398 x0_mnt |= !!(
t0 |
t1);
2401 if (x_sgn == y_sgn) {
2402 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
2403 }
else if (
cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
2404 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
2407 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
2410 if (!x0_mnt && !x1_mnt) {
2417 x0_mnt = x1_mnt << 1 | !!x0_mnt;
2424 int mode,
int *flags,
bool rm_odd=
false)
2426 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
2427 uint16_t b_mnt, c_mnt;
2429 uint64_t x_mnt, y_mnt;
2457 (a_sgn != (b_sgn ^ c_sgn)))) {
2474 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
2482 y_sgn = b_sgn ^ c_sgn;
2485 y_mnt = (uint64_t)b_mnt * c_mnt << (3 +
2492 if (x_exp >= y_exp) {
2493 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
2494 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
2497 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
2498 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
2501 if (x_sgn == y_sgn) {
2502 x_mnt = x_mnt + y_mnt;
2503 }
else if (x_mnt >= y_mnt) {
2504 x_mnt = x_mnt - y_mnt;
2507 x_mnt = y_mnt - x_mnt;
2521 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
2532fp32_dot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b,
2533 int mode,
int *flags)
2535 int a1_sgn, b1_sgn, a2_sgn, b2_sgn, a1_exp, b1_exp, a2_exp, b2_exp;
2536 int pa_sgn, pb_sgn, pa_exp, pb_exp, x_sgn, x_exp;
2537 uint16_t a1_mnt, b1_mnt, a2_mnt, b2_mnt;
2539 uint64_t pa_mnt, pb_mnt, x_mnt;
2557 bool a1_zero = !a1_exp && !a1_mnt;
2558 bool a2_zero = !a2_exp && !a2_mnt;
2559 bool b1_zero = !b1_exp && !b1_mnt;
2560 bool b2_zero = !b2_exp && !b2_mnt;
2564 pa_sgn = a1_sgn ^ a2_sgn;
2565 pb_sgn = b1_sgn ^ b2_sgn;
2566 bool pa_inf = a1_inf || a2_inf;
2567 bool pb_inf = b1_inf || b2_inf;
2568 bool pa_zero = a1_zero || a2_zero;
2569 bool pb_zero = b1_zero || b2_zero;
2574 ((a1_inf && a2_zero) || (a1_zero && a2_inf) || (b1_inf && b2_zero) ||
2575 (b1_zero && b2_inf) || (pa_inf && pb_inf && pa_sgn != pb_sgn));
2584 if ((pa_inf && !pa_sgn) || (pb_inf && !pb_sgn)) {
2586 }
else if ((pa_inf && pa_sgn) || (pb_inf && pb_sgn)) {
2592 if (pa_zero && pb_zero && (pa_sgn == pb_sgn)) {
2601 pa_mnt = (uint64_t)a1_mnt * a2_mnt
2606 pb_mnt = (uint64_t)b1_mnt * b2_mnt
2608 if (!pb_mnt && pa_mnt) {
2611 if (pb_mnt && !pa_mnt) {
2616 if (pa_exp >= pb_exp) {
2617 pb_mnt = (
lsr64(pb_mnt, pa_exp - pb_exp) |
2618 !!(pb_mnt & (
lsl64(1, pa_exp - pb_exp) - 1)));
2621 pa_mnt = (
lsr64(pa_mnt, pb_exp - pa_exp) |
2622 !!(pa_mnt & (
lsl64(1, pb_exp - pa_exp) - 1)));
2627 if (pa_sgn == pb_sgn) {
2628 x_mnt = pa_mnt + pb_mnt;
2629 }
else if (pa_mnt >= pb_mnt) {
2630 x_mnt = pa_mnt - pb_mnt;
2633 x_mnt = pb_mnt - pa_mnt;
2644 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
2652 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
2653 uint16_t a_mnt, b_mnt,
x;
2664 (!a_mnt && !b_mnt)) {
2678 x_sgn = a_sgn ^ b_sgn;
2681 x_mnt |= (x_mnt * b_mnt !=
2686 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
2694 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
2695 uint32_t a_mnt, b_mnt,
x;
2704 if (b_exp || b_mnt) {
2716 (!a_mnt && !b_mnt)) {
2730 x_sgn = a_sgn ^ b_sgn;
2733 x_mnt |= (x_mnt * b_mnt !=
2738 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
2746 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp,
c;
2747 uint64_t a_mnt, b_mnt,
x, x_mnt, x0_mnt, x1_mnt;
2755 if (b_exp || b_mnt) {
2767 (!a_mnt && !b_mnt)) {
2782 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
2783 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
2784 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
2785 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
2786 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
2787 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
2790 x_sgn = a_sgn ^ b_sgn;
2792 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
2793 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
2797 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
2798 c =
cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
2854 b =
b < -300 ? -300 :
b;
2855 b =
b > 300 ? 300 :
b;
2895 b =
b < -300 ? -300 :
b;
2896 b =
b > 300 ? 300 :
b;
2936 b =
b < -3000 ? -3000 :
b;
2937 b =
b > 3000 ? 3000 :
b;
2950 int a_sgn, a_exp, x_sgn, x_exp;
2951 uint16_t a_mnt, x_mnt;
2977 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2980 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2983 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2986 x_exp = (a_exp + 27) >> 1;
2987 x_mnt = ((
x - (1 << 18)) >> 19) + 1;
2988 t1 = (uint32_t)x_mnt * x_mnt;
2989 t0 = (uint32_t)a_mnt << 9;
3002 int a_sgn, a_exp, x_sgn, x_exp;
3003 uint32_t a_mnt,
x, x_mnt;
3036 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
3039 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
3042 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
3045 x = ((((uint64_t)a_mnt << 32) /
x) >> 2) + (
x >> 1);
3048 x_exp = (a_exp + 147) >> 1;
3049 x_mnt = ((
x - (1 << 5)) >> 6) + 1;
3050 t1 = (uint64_t)x_mnt * x_mnt;
3051 t0 = (uint64_t)a_mnt << 19;
3064 int a_sgn, a_exp, x_sgn, x_exp,
c;
3065 uint64_t a_mnt, x_mnt,
r, x0, x1;
3098 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
3101 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
3104 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
3107 x = ((a_mnt /
x) >> 2) + (
x >> 1);
3110 r = ((uint64_t)1 << 62) /
x;
3114 lsr128(&x0, &x1, x0, x1, 31);
3117 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
3118 lsl128(&x0, &x1, x0, x1, 5);
3119 lsr128(&x0, &x1, x0, x1, 56);
3121 x0 = ((uint64_t)
x << 31) + (x0 >> 1);
3124 x_exp = (a_exp + 1053) >> 1;
3126 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
3128 lsl128(&x0, &x1, x0, x1, 19);
3141 int mode = fpscr.rMode;
3172 bool underflow =
false;
3189 if ((flags &
FPLIB_IXC) && !(underflow && fpscr.fz)) {
3346fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
3356fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
3366fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
3381 int sgn1, exp1, sgn2, exp2, result;
3382 uint16_t mnt1, mnt2;
3393 if (op1 == op2 || (!mnt1 && !mnt2)) {
3395 }
else if (sgn1 != sgn2) {
3396 result = sgn1 ? 8 : 2;
3397 }
else if (exp1 != exp2) {
3398 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
3400 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
3416 int sgn1, exp1, sgn2, exp2, result;
3417 uint32_t mnt1, mnt2;
3435 if (op1 == op2 || (!mnt1 && !mnt2)) {
3437 }
else if (sgn1 != sgn2) {
3438 result = sgn1 ? 8 : 2;
3439 }
else if (exp1 != exp2) {
3440 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
3442 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
3458 int sgn1, exp1, sgn2, exp2, result;
3459 uint64_t mnt1, mnt2;
3477 if (op1 == op2 || (!mnt1 && !mnt2)) {
3479 }
else if (sgn1 != sgn2) {
3480 result = sgn1 ? 8 : 2;
3481 }
else if (exp1 != exp2) {
3482 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
3484 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
3615 bool alt_hp = fpscr.ahp;
3620 }
else if (fpscr.dn) {
3630 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
3643 rounding, (
mode & 0xfcf) | alt_hp << 4, &flags);
3671 bool alt_hp = fpscr.ahp;
3676 }
else if (fpscr.dn) {
3686 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
3699 rounding, (
mode & 0xfcf) | alt_hp << 4, &flags);
3783 rounding,
mode, &flags);
3924fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
3934fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
3944fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
3954fplibDot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b,
3955 FPSCR &fpscr, FPCR fpcr)
3968 static uint16_t coeff[32] = {
4003 coeff[
op & ((1 << 5) - 1)]);
4010 static uint32_t coeff[64] = {
4077 coeff[
op & ((1 << 6) - 1)]);
4084 static uint64_t coeff[64] = {
4151 coeff[
op & ((1 << 6) - 1)]);
4161 uint16_t mnt, result;
4168 result = (uint16_t)1 << 15;
4169 }
else if (exp == 0 && mnt == 0) {
4171 result = (uint16_t)1 << 15;
4173 result = ((uint16_t)1 << 15) - 1;
4178 result =
static_cast<uint16_t
>((int16_t)unbias_exp);
4193 uint32_t mnt, result;
4200 result = (uint32_t)1 << 31;
4201 }
else if (exp == 0 && mnt == 0) {
4203 result = (uint32_t)1 << 31;
4205 result = ((uint32_t)1 << 31) - 1;
4216 result =
static_cast<uint32_t
>((int32_t)unbias_exp);
4231 uint64_t mnt, result;
4238 result = (uint64_t)1 << 63;
4239 }
else if (exp == 0 && mnt == 0) {
4241 result = (uint64_t)1 << 63;
4243 result = ((uint64_t)1 << 63) - 1;
4254 result =
static_cast<uint64_t
>((int64_t)unbias_exp);
4263fp16_min(uint16_t op1, uint16_t op2,
int mode,
int *flags,
bool altfpminmax)
4265 int sgn1, exp1, sgn2, exp2;
4266 uint16_t mnt1, mnt2,
x, result;
4273 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4280 if (!exp2 && !mnt2) {
4293 if (sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) {
4294 sgn = sgn1; exp = exp1; mnt = mnt1;
4296 sgn = sgn2; exp = exp2; mnt = mnt2;
4301 }
else if (!exp && !mnt) {
4316fp32_min(uint32_t op1, uint32_t op2,
int mode,
int *flags,
bool altfpminmax)
4318 int sgn1, exp1, sgn2, exp2;
4319 uint32_t mnt1, mnt2,
x, result;
4326 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4333 if (!exp2 && !mnt2) {
4353 if (sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) {
4354 sgn = sgn1; exp = exp1; mnt = mnt1;
4356 sgn = sgn2; exp = exp2; mnt = mnt2;
4361 }
else if (!exp && !mnt) {
4376fp64_min(uint64_t op1, uint64_t op2,
int mode,
int *flags,
bool altfpminmax)
4378 int sgn1, exp1, sgn2, exp2;
4379 uint64_t mnt1, mnt2,
x, result;
4386 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4393 if (!exp2 && !mnt2) {
4413 if (sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) {
4414 sgn = sgn1; exp = exp1; mnt = mnt1;
4416 sgn = sgn2; exp = exp2; mnt = mnt2;
4421 }
else if (!exp && !mnt) {
4436fp16_max(uint16_t op1, uint16_t op2,
int mode,
int *flags,
bool altfpminmax)
4438 int sgn1, exp1, sgn2, exp2;
4439 uint16_t mnt1, mnt2,
x, result;
4446 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4453 if (!exp2 && !mnt2) {
4466 if (sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) {
4467 sgn = sgn1; exp = exp1; mnt = mnt1;
4469 sgn = sgn2; exp = exp2; mnt = mnt2;
4474 }
else if (!exp && !mnt) {
4489fp32_max(uint32_t op1, uint32_t op2,
int mode,
int *flags,
bool altfpminmax)
4491 int sgn1, exp1, sgn2, exp2;
4492 uint32_t mnt1, mnt2,
x, result;
4499 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4506 if (!exp2 && !mnt2) {
4526 if (sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) {
4527 sgn = sgn1; exp = exp1; mnt = mnt1;
4529 sgn = sgn2; exp = exp2; mnt = mnt2;
4534 }
else if (!exp && !mnt) {
4549fp64_max(uint64_t op1, uint64_t op2,
int mode,
int *flags,
bool altfpminmax)
4551 int sgn1, exp1, sgn2, exp2;
4552 uint64_t mnt1, mnt2,
x, result;
4559 if (!exp1 && !mnt1 && !exp2 && !mnt2 && (sgn1 != sgn2)) {
4566 if (!exp2 && !mnt2) {
4586 if (sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) {
4587 sgn = sgn1; exp = exp1; mnt = mnt1;
4589 sgn = sgn2; exp = exp2; mnt = mnt2;
4594 }
else if (!exp && !mnt) {
4646fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
4657fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
4668fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
4690 uint16_t result =
fp16_max(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4708 uint32_t result =
fp32_max(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4726 uint64_t result =
fp64_max(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4733fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
4744fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
4755fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
4777 uint16_t result =
fp16_min(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4795 uint32_t result =
fp32_min(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4813 uint64_t result =
fp64_min(op1, op2,
modeConv(fpscr, fpcr), &flags,
false);
4820fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
4830fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
4840fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
4850fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
4854 int sgn1, exp1, sgn2, exp2;
4855 uint16_t mnt1, mnt2, result;
4867 }
else if (!mnt1 || !mnt2) {
4881fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
4885 int sgn1, exp1, sgn2, exp2;
4886 uint32_t mnt1, mnt2, result;
4898 }
else if (!mnt1 || !mnt2) {
4919fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
4923 int sgn1, exp1, sgn2, exp2;
4924 uint64_t mnt1, mnt2, result;
4936 }
else if (!mnt1 || !mnt2) {
4986 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
4987 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
4988 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
4989 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
4990 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
4991 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
4992 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
4993 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
4994 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
4995 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
4996 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
4997 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
4998 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
4999 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
5000 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
5001 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
5016 uint16_t mnt, result;
5058 uint32_t mnt, result;
5100 uint64_t mnt, result;
5141 int sgn1, exp1, sgn2, exp2;
5142 uint16_t mnt1, mnt2, result;
5176 int sgn1, exp1, sgn2, exp2;
5177 uint32_t mnt1, mnt2, result;
5211 int sgn1, exp1, sgn2, exp2;
5212 uint64_t mnt1, mnt2, result;
5248 uint16_t mnt, result;
5261 bool overflow_to_inf =
false;
5264 overflow_to_inf =
true;
5267 overflow_to_inf = !sgn;
5270 overflow_to_inf = sgn;
5273 overflow_to_inf =
false;
5276 panic(
"Unrecognized FP rounding mode");
5289 uint16_t fraction = (((uint32_t)1 << 19) /
5290 (mnt >> (
FP16_BITS - 10) | 1) + 1) >> 1;
5292 if (result_exp == 0) {
5294 }
else if (result_exp == -1) {
5298 result =
fp16_pack(sgn, result_exp, fraction);
5319 uint32_t mnt, result;
5332 bool overflow_to_inf =
false;
5335 overflow_to_inf =
true;
5338 overflow_to_inf = !sgn;
5341 overflow_to_inf = sgn;
5344 overflow_to_inf =
false;
5347 panic(
"Unrecognized FP rounding mode");
5360 uint32_t fraction = (((uint32_t)1 << 19) /
5361 (mnt >> (
FP32_BITS - 10) | 1) + 1) >> 1;
5363 if (result_exp == 0) {
5365 }
else if (result_exp == -1) {
5369 result =
fp32_pack(sgn, result_exp, fraction);
5390 uint64_t mnt, result;
5403 bool overflow_to_inf =
false;
5406 overflow_to_inf =
true;
5409 overflow_to_inf = !sgn;
5412 overflow_to_inf = sgn;
5415 overflow_to_inf =
false;
5418 panic(
"Unrecognized FP rounding mode");
5431 uint64_t fraction = (((uint32_t)1 << 19) /
5432 (mnt >> (
FP64_BITS - 10) | 1) + 1) >> 1;
5434 if (result_exp == 0) {
5436 }
else if (result_exp == -1) {
5440 result =
fp64_pack(sgn, result_exp, fraction);
5459 int sgn1, exp1, sgn2, exp2;
5460 uint16_t mnt1, mnt2, result;
5494 int sgn1, exp1, sgn2, exp2;
5495 uint32_t mnt1, mnt2, result;
5529 int sgn1, exp1, sgn2, exp2;
5530 uint64_t mnt1, mnt2, result;
5565 uint16_t mnt, result;
5597 uint32_t mnt, result;
5629 uint64_t mnt, result;
5658 uint16_t mnt, result;
5661 int unpack_mode =
mode;
5674 }
else if (exp >= expint) {
5679 uint16_t
x = expint - exp >=
FP16_BITS ? 0 : mnt >> (expint - exp);
5681 ((mnt << 1 >> (expint - exp - 1) & 3) |
5682 ((uint16_t)(mnt << 2 << (
FP16_BITS + exp - expint)) != 0));
5685 x += (
err == 3 || (
err == 2 && (
x & 1)));
5699 panic(
"Unrecognized FP rounding mode");
5728 uint32_t mnt, result;
5731 int unpack_mode =
mode;
5744 }
else if (exp >= expint) {
5749 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
5751 ((mnt << 1 >> (expint - exp - 1) & 3) |
5752 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
5755 x += (
err == 3 || (
err == 2 && (
x & 1)));
5769 panic(
"Unrecognized FP rounding mode");
5798 uint64_t mnt, result;
5801 int unpack_mode =
mode;
5814 }
else if (exp >= expint) {
5819 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
5821 ((mnt << 1 >> (expint - exp - 1) & 3) |
5822 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
5825 x += (
err == 3 || (
err == 2 && (
x & 1)));
5839 panic(
"Unrecognized FP rounding mode");
5862 FPSCR &fpscr, FPCR fpcr)
5868 uint32_t mnt, result;
5871 int unpack_mode =
mode;
5886 }
else if (exp >= expint) {
5889 bool overflow = (exp > (
FP32_EXP_BIAS + intsize - 2) && !sgn) ||
5899 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
5901 ((mnt << 1 >> (expint - exp - 1) & 3) |
5902 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
5905 x += (
err == 3 || (
err == 2 && (
x & 1)));
5919 panic(
"Unrecognized FP rounding mode");
5922 bool overflow = (
x > (((uint32_t)1 << (intsize - 1)) - 1) && !sgn) ||
5923 (
x > ((uint32_t)1 << (intsize - 1)) && sgn);
5950 FPSCR &fpscr, FPCR fpcr)
5956 uint64_t mnt, result;
5959 int unpack_mode =
mode;
5974 }
else if (exp >= expint) {
5977 bool overflow = (exp > (
FP64_EXP_BIAS + intsize - 2) && !sgn) ||
5981 if (overflow && intsize >= 0) {
5987 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
5989 ((mnt << 1 >> (expint - exp - 1) & 3) |
5990 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
5993 x += (
err == 3 || (
err == 2 && (
x & 1)));
6007 panic(
"Unrecognized FP rounding mode");
6010 bool overflow = (
x > (((uint64_t)1 << (intsize - 1)) - 1) && !sgn) ||
6011 (
x > ((uint64_t)1 << (intsize - 1)) && sgn);
6100fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
6110fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr, FPCR fpcr)
6120fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr, FPCR fpcr)
6133 static uint16_t coeff[2][8] = {
6168 static uint32_t coeff[2][8] = {
6203 static uint64_t coeff[2][8] = {
6205 0x3ff0000000000000ULL,
6206 0xbfc5555555555543ULL,
6207 0x3f8111111110f30cULL,
6208 0xbf2a01a019b92fc6ULL,
6209 0x3ec71de351f3d22bULL,
6210 0xbe5ae5e2b60f7b91ULL,
6211 0x3de5d8408868552fULL,
6212 0x0000000000000000ULL
6215 0x3ff0000000000000ULL,
6216 0xbfe0000000000000ULL,
6217 0x3fa5555555555536ULL,
6218 0xbf56c16c16c13a0bULL,
6219 0x3efa01a019b1e8d8ULL,
6220 0xbe927e4f7282f468ULL,
6221 0x3e21ee96d2641b13ULL,
6222 0xbda8f76380fbb401ULL
6247 result = (result & ~(1ULL << (
FP16_BITS - 1))) |
6295 static constexpr uint16_t fpOne =
6298 return fpOne ^ ((op2 >> 1) << (
FP16_BITS - 1));
6309 static constexpr uint32_t fpOne =
6312 return fpOne ^ ((op2 >> 1) << (
FP32_BITS - 1));
6323 static constexpr uint64_t fpOne =
6326 return fpOne ^ ((op2 >> 1) << (
FP64_BITS - 1));
6343 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
6347 err = (exp > expmax - 2 ? 0 :
6353 x += (
err == 3 || (
err == 2 && (
x & 1)));
6367 panic(
"Unrecognized FP rounding mode");
6370 if (
u ? sgn &&
x :
x > (1ULL << (
FP64_BITS - 1)) - !sgn) {
6372 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
6379 return sgn ? -
x :
x;
6389 (uint64_t)-
x <= (uint64_t)1 << (
FP32_BITS - 1))) {
6403 (uint64_t)-
x <= (uint64_t)1 << (
FP16_BITS - 1))) {
6413 FPSCR &fpscr, FPCR fpcr)
6417 uint16_t mnt, result;
6436 u, rounding, &flags);
6447 FPSCR &fpscr, FPCR fpcr)
6472 u, rounding, &flags);
6483 FPSCR &fpscr, FPCR fpcr)
6487 uint32_t mnt, result;
6506 u, rounding, &flags);
6517 FPSCR &fpscr, FPCR fpcr)
6538 result =
FPToFixed_32(sgn, exp + fbits, mnt,
u, rounding, &flags);
6553 uint32_t sgn =
bits(
op, 63);
6554 int32_t exp =
bits(
op, 62, 52);
6555 uint64_t mnt =
bits(
op, 51, 0);
6567 }
else if (exp == 0x7ff) {
6579 }
else if (mnt_shft >= 0) {
6580 result =
lsl64(mnt, mnt_shft);
6581 }
else if (mnt_shft < 0) {
6583 result =
lsr64(mnt, abs(mnt_shft));
6585 uint64_t max_result = (1UL << (
FP32_BITS - 1)) -!sgn;
6593 result = sgn ? -result : result;
6595 if (sgn == 1 && result == 0)
6614 FPSCR &fpscr, FPCR fpcr)
6639 u, rounding, &flags);
6650 FPSCR &fpscr, FPCR fpcr)
6674 u, rounding, &flags);
6685 FPSCR &fpscr, FPCR fpcr)
6689 uint64_t mnt, result;
6705 result =
FPToFixed_64(sgn, exp + fbits, mnt,
u, rounding, &flags);
6718 uint64_t x_mnt = x_sgn ? -
a :
a;
6738 uint64_t x_mnt = x_sgn ? -
a :
a;
6758 uint64_t x_mnt = x_sgn ? -
a :
a;
6773 FPSCR &fpscr, FPCR fpcr)
6777 (
int)rounding | (
modeConv(fpscr, fpcr) & 0xFFC),
6786 FPSCR &fpscr, FPCR fpcr)
6790 (
int)rounding | (
modeConv(fpscr, fpcr) & 0xFFC),
6799 FPSCR &fpscr, FPCR fpcr)
6803 (
int)rounding | (
modeConv(fpscr, fpcr) & 0xFFC),
6858 int sgn1, exp1, sgn2, exp2;
6859 uint16_t mnt1, mnt2, result;
6887 int sgn1, exp1, sgn2, exp2;
6888 uint16_t mnt1, mnt2, result;
6921static inline uint16_t
6927static inline uint16_t
6933static inline uint16_t
6939static inline uint16_t
6945static inline uint16_t
6951static inline uint16_t
7014static inline uint16_t
7078 if (op1_nan && op2_nan && op3_nan) {
7079 if (op1_snan || op2_snan || op3_snan) {
7085 }
else if (op2_nan && (op1_nan || op3_nan)) {
7086 if (op1_snan || op2_snan || op3_snan) {
7092 }
else if (op3_nan && op1_nan) {
7093 if (op1_snan || op2_snan || op3_snan) {
7124 int biased_exp, biased_exp_afp;
7126 uint32_t int_mant, int_mant_afp;
7128 int error, error_afp;
7144 biased_exp_afp = exp;
7145 int_mant_afp = mnt >> 2;
7146 error_afp = mnt & 3;
7149 int_mant = mnt >> 2;
7153 int_mant =
lsr32(mnt, 3 - exp);
7154 error = (
lsr32(mnt, 1 - exp) & 3) | !!(mnt & (
lsl32(1, 1 - exp) - 1));
7169 (error_afp == 2 && (int_mant_afp & 1)))) ||
7183 (error == 2 && (int_mant & 1)))) ||
7204 if (biased_exp_afp < 1) {
7232 return bf16_pack(sgn, biased_exp, int_mant);
7244 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
7245 uint32_t a_mnt, b_mnt,
x, x_mnt;
7271 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
7277 if (a_exp >= b_exp) {
7278 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
7279 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
7282 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
7283 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
7288 if (a_sgn == b_sgn) {
7289 x_mnt = a_mnt + b_mnt;
7290 }
else if (a_mnt >= b_mnt) {
7291 x_mnt = a_mnt - b_mnt;
7294 x_mnt = b_mnt - a_mnt;
7314 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
7315 uint32_t a_mnt, b_mnt,
x;
7339 }
else if (!a_mnt || !b_mnt) {
7344 x_sgn = a_sgn ^ b_sgn;
7346 x_mnt = (uint64_t)a_mnt * b_mnt;
7360 int mode,
int *flags)
7362 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
7363 uint32_t a_mnt, b_mnt, c_mnt,
x;
7364 uint64_t x_mnt, y_mnt;
7392 (a_sgn != (b_sgn ^ c_sgn)))) {
7410 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
7418 y_sgn = b_sgn ^ c_sgn;
7420 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
7426 if (x_exp >= y_exp) {
7427 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
7428 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
7431 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
7432 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
7435 if (x_sgn == y_sgn) {
7436 x_mnt = x_mnt + y_mnt;
7437 }
else if (x_mnt >= y_mnt) {
7438 x_mnt = x_mnt - y_mnt;
7441 x_mnt = y_mnt - x_mnt;
7451 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
7460bf16_dot(uint32_t op1_a, uint32_t op1_b, uint32_t op2_a, uint32_t op2_b,
7461 int mode,
int* flags)
7463 int a1_sgn, b1_sgn, a2_sgn, b2_sgn, a1_exp, b1_exp, a2_exp, b2_exp;
7464 int pa_sgn, pb_sgn, pa_exp, pb_exp, x_sgn, x_exp;
7465 uint32_t a1_mnt, b1_mnt, a2_mnt, b2_mnt;
7466 uint64_t pa_mnt, pb_mnt, x_mnt,
x;
7483 bool a1_zero = !a1_exp && !a1_mnt;
7484 bool a2_zero = !a2_exp && !a2_mnt;
7485 bool b1_zero = !b1_exp && !b1_mnt;
7486 bool b2_zero = !b2_exp && !b2_mnt;
7490 pa_sgn = a1_sgn ^ a2_sgn;
7491 pb_sgn = b1_sgn ^ b2_sgn;
7492 bool pa_inf = a1_inf || a2_inf;
7493 bool pb_inf = b1_inf || b2_inf;
7494 bool pa_zero = a1_zero || a2_zero;
7495 bool pb_zero = b1_zero || b2_zero;
7499 bool invalidop = ((a1_inf && a2_zero) || (a1_zero && a2_inf) ||
7500 (b1_inf && b2_zero) || (b1_zero && b2_inf) ||
7501 (pa_inf && pb_inf && pa_sgn != pb_sgn));
7510 if ((pa_inf && !pa_sgn) || (pb_inf && !pb_sgn)) {
7512 }
else if ((pa_inf && pa_sgn) || (pb_inf && pb_sgn)) {
7518 if (pa_zero && pb_zero && (pa_sgn == pb_sgn)) {
7525 pa_mnt = (uint64_t)a1_mnt * a2_mnt << 3;
7528 pb_mnt = (uint64_t)b1_mnt * b2_mnt << 3;
7529 if (!pb_mnt && pa_mnt) {
7532 if (pb_mnt && !pa_mnt) {
7537 if (pa_exp >= pb_exp) {
7538 pb_mnt = (
lsr64(pb_mnt, pa_exp - pb_exp) |
7539 !!(pb_mnt & (
lsl64(1, pa_exp - pb_exp) - 1)));
7542 pa_mnt = (
lsr64(pa_mnt, pb_exp - pa_exp) |
7543 !!(pa_mnt & (
lsl64(1, pb_exp - pa_exp) - 1)));
7548 if (pa_sgn == pb_sgn) {
7549 x_mnt = pa_mnt + pb_mnt;
7550 }
else if (pa_mnt >= pb_mnt) {
7551 x_mnt = pa_mnt - pb_mnt;
7554 x_mnt = pb_mnt - pa_mnt;
7563 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
7625 rounding,
mode, &flags);
7646 uint32_t result =
fp32_max((uint32_t)op1 << 16, (uint32_t)op2 << 16,
7647 modeConv(fpscr, fpcr), &flags, fpcr.ah);
7648 result = result >> 16;
7665 uint32_t result =
fp32_max((uint32_t)op1 << 16, (uint32_t)op2 << 16,
7666 modeConv(fpscr, fpcr), &flags,
false);
7667 result = result >> 16;
7676 uint32_t result =
fp32_min((uint32_t)op1 << 16, (uint32_t)op2 << 16,
7677 modeConv(fpscr, fpcr), &flags, fpcr.ah);
7678 result = result >> 16;
7695 uint32_t result =
fp32_min((uint32_t)op1 << 16, (uint32_t)op2 << 16,
7696 modeConv(fpscr, fpcr), &flags,
false);
7697 result = result >> 16;
7715 uint32_t result =
fp32_mul((uint32_t)op1 << 16, (uint32_t)op2 << 16,
7723 FPSCR &fpscr, FPCR fpcr)
7734 FPSCR &fpscr, FPCR fpcr)
7744 addend, (uint32_t)op1 << 16, (uint32_t)op2 << 16, 0,
mode, &flags);
7782 uint16_t op2_a, uint16_t op2_b, FPSCR &fpscr, FPCR fpcr)
7789 (uint32_t)op1_a << 16, (uint32_t)op1_b << 16,
7790 (uint32_t)op2_a << 16, (uint32_t)op2_b << 16,
mode, &flags);
7791 uint32_t result =
fp32_add(addend, product, 0,
mode, &flags);
7800 (uint32_t)op1_a << 16, (uint32_t)op2_a << 16,
mode, &flags,
true);
7802 (uint32_t)op1_b << 16, (uint32_t)op2_b << 16,
mode, &flags,
true);
7803 uint32_t product =
fp32_add(product1, product2, 0,
mode, &flags,
true);
7804 uint32_t result =
fp32_add(addend, product, 0,
mode, &flags,
true);
Floating-point library code, which will gradually replace vfp.hh.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
static uint64_t fp64_FPOnePointFive(int sgn)
uint16_t fplib32RSqrtStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t bf16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
uint16_t fplib32RecipStep(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp64_is_infinity(int exp, uint64_t mnt)
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
static constexpr int BF16_EXP_INF
static FPRounding FPCRRounding(FPSCR &fpscr)
uint32_t fplibMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
static uint16_t bf16_process_NaN(uint16_t a, int mode, int *flags)
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags, bool rm_odd=false)
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
static uint16_t BF16_MANT(uint16_t x)
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
static int modeConv(FPSCR fpscr)
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint32_t fplibBfMulH(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t bf16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
uint16_t fplibBfMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
static uint32_t fp32_FPThree(int sgn)
static uint16_t bf16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
static uint32_t fp16_process_NaNs4(uint16_t a, uint16_t b, uint16_t c, uint16_t d, int mode, int *flags)
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
static uint16_t bf16_infinity(int sgn)
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
static constexpr int BF16_BITS
uint32_t fplibAdd_Bf16(uint32_t op1, uint32_t op2, FPSCR &fpscr)
static uint16_t fp16_zero(int sgn)
static uint16_t fp16_defaultNaN(int mode)
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags, bool rm_odd=false)
static uint16_t fp16_max_normal(int sgn)
uint16_t fplibBfMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint16_t lsl16(uint16_t x, uint32_t shift)
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
static void bf16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags, bool rm_odd=false)
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
static uint16_t bf16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
static uint16_t fp16_max(uint16_t op1, uint16_t op2, int mode, int *flags, bool altfpminmax)
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
uint16_t fplibBfMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr, FPCR fpcr)
static uint32_t lsr32(uint32_t x, uint32_t shift)
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint32_t fp32_convert_default_nan(uint16_t op)
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
static int bf16_is_denormal(int exp, uint16_t mnt)
static uint64_t fp64_defaultNaN(int mode)
uint16_t fplibExpA(uint16_t op)
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint32_t fp32_FPTwo(int sgn)
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
uint32_t fp32_dot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b, int mode, int *flags)
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
static uint32_t fp32_max(uint32_t op1, uint32_t op2, int mode, int *flags, bool altfpminmax)
static uint32_t fp32_muladdh(uint32_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags, bool rm_odd=false)
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint16_t bf16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint16_t fp16_FPOnePointFive(int sgn)
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_min(uint16_t op1, uint16_t op2, int mode, int *flags, bool altfpminmax)
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
uint16_t fplibLogB(uint16_t op, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
static uint32_t fp32_infinity(int sgn)
static uint64_t lsr64(uint64_t x, uint32_t shift)
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags, bool rm_odd=false)
static uint16_t bf16_max_normal(int sgn)
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
static int fp64_is_NaN(int exp, uint64_t mnt)
static uint16_t fp16_FPTwo(int sgn)
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
static int fp32_is_NaN(int exp, uint32_t mnt)
static uint64_t fp64_FPTwo(int sgn)
static uint16_t lsr16(uint16_t x, uint32_t shift)
static uint32_t bf16_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
uint16_t fplibConvertBF(uint32_t op, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_infinity(int sgn)
static int bf16_is_infinity(int exp, uint16_t mnt)
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
static void set_fpscr0(FPSCR &fpscr, int flags)
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint32_t lsl32(uint32_t x, uint32_t shift)
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr, FPCR fpcr)
static uint32_t fp32_FPOnePointFive(int sgn)
uint32_t fplibBfdotAdd(uint32_t addend, uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b, FPSCR &fpscr, FPCR fpcr)
static constexpr int BF16_EXP_BITS
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr, FPCR fpcr)
static int fp32_is_denormal(int exp, uint32_t mnt)
uint16_t fplibBfMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_infinity(int sgn)
static int BF16_EXP(uint16_t x)
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
static int fp16_is_infinity(int exp, uint16_t mnt)
static uint64_t fp64_FPThree(int sgn)
static constexpr int BF16_EXP_BIAS
static int fp16_is_denormal(int exp, uint16_t mnt)
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t bf16_defaultNaN(int mode)
uint32_t bf16_dot(uint32_t op1_a, uint32_t op1_b, uint32_t op2_a, uint32_t op2_b, int mode, int *flags)
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
static int bf16_is_signalling_NaN(int exp, uint16_t mnt)
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
static int fp16_is_NaN(int exp, uint16_t mnt)
uint16_t fplibBfAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_max(uint64_t op1, uint64_t op2, int mode, int *flags, bool altfpminmax)
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
uint32_t fplibBfMulAddH(uint32_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint32_t fp32_zero(int sgn)
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr, FPCR fpcr)
static uint16_t bf16_mul(uint16_t a, uint16_t b, int mode, int *flags)
static int fp64_is_denormal(int exp, uint64_t mnt)
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibBfSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibAbs(uint16_t op, FPCR fpcr)
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
static const uint8_t recip_sqrt_estimate[256]
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
static uint32_t fp32_min(uint32_t op1, uint32_t op2, int mode, int *flags, bool altfpminmax)
static uint32_t fp32_defaultNaN(int mode)
static uint32_t fp32_process_NaNs4(uint32_t a, uint32_t b, uint32_t c, uint32_t d, int mode, int *flags)
static uint64_t lsl64(uint64_t x, uint32_t shift)
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static int fp32_is_infinity(int exp, uint32_t mnt)
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_zero(int sgn)
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr, FPCR fpcr)
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
uint16_t fplibBfMax(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_FPThree(int sgn)
uint32_t fplibRoundIntN(uint32_t op, FPRounding rounding, bool exact, int intsize, FPSCR &fpscr, FPCR fpcr)
static uint16_t bf16_zero(int sgn)
static uint32_t fp32_process_NaNs3H(uint32_t a, uint16_t b, uint16_t c, int mode, int *flags)
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static void bf16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
uint32_t fplibDot(uint16_t op1_a, uint16_t op1_b, uint16_t op2_a, uint16_t op2_b, FPSCR &fpscr, FPCR fpcr)
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static int bf16_is_NaN(int exp, uint16_t mnt)
static uint16_t fp16_halved_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr, FPCR fpcr)
static uint64_t fp64_max_normal(int sgn)
uint16_t fplibBfNeg(uint16_t op, FPCR fpcr)
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
uint16_t fplibInfinity(int sgn)
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
uint16_t fplibDefaultNaN(FPCR fpcr)
static int bf16_is_quiet_NaN(int exp, uint16_t mnt)
static uint16_t bf16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
static uint64_t fp64_min(uint64_t op1, uint64_t op2, int mode, int *flags, bool altfpminmax)
static void set_fpscr(FPSCR &fpscr, int flags)
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
static constexpr int BF16_MANT_BITS
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
uint16_t fplibNeg(uint16_t op, FPCR fpcr)
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
uint16_t fplibBfMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr, FPCR fpcr)
static uint32_t fp32_max_normal(int sgn)
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Copyright (c) 2024 Arm Limited All rights reserved.