59 #define FPLIB_IDC 128 // Input Denormal
60 #define FPLIB_IXC 16 // Inexact
61 #define FPLIB_UFC 8 // Underflow
62 #define FPLIB_OFC 4 // Overflow
63 #define FPLIB_DZC 2 // Division by Zero
64 #define FPLIB_IOC 1 // Invalid Operation
70 #define FP16_EXP_BITS 5
71 #define FP32_EXP_BITS 8
72 #define FP64_EXP_BITS 11
74 #define FP16_EXP_BIAS 15
75 #define FP32_EXP_BIAS 127
76 #define FP64_EXP_BIAS 1023
78 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
79 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
80 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
82 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
83 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
84 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
86 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
87 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
88 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
90 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
91 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
92 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
94 static inline uint16_t
100 static inline uint16_t
103 return shift < 16 ? x >>
shift : 0;
106 static inline uint32_t
112 static inline uint32_t
115 return shift < 32 ? x >>
shift : 0;
118 static inline uint64_t
124 static inline uint64_t
127 return shift < 64 ? x >>
shift : 0;
131 lsl128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
136 }
else if (
shift < 64) {
137 *r1 = x1 << shift | x0 >> (64 -
shift);
139 }
else if (
shift < 128) {
140 *r1 = x0 << (
shift - 64);
149 lsr128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
154 }
else if (
shift < 64) {
157 }
else if (
shift < 128) {
169 uint32_t
mask = ((uint32_t)1 << 31) - 1;
174 uint64_t p0 =
a0 *
b0;
175 uint64_t p2 =
a1 *
b1;
176 uint64_t p1 = (
a0 +
a1) * (
b0 +
b1) - p0 - p2;
178 uint64_t s1 = (s0 >> 31) + p1;
179 uint64_t s2 = (s1 >> 31) + p2;
180 *x0 = (s0 &
mask) | (s1 &
mask) << 31 | s2 << 62;
185 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t
a, uint32_t
b)
187 uint64_t
t0 = (uint64_t)(uint32_t)
a *
b;
188 uint64_t
t1 = (
t0 >> 32) + (
a >> 32) *
b;
189 *x0 =
t1 << 32 | (uint32_t)
t0;
194 add128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
198 *x1 =
a1 +
b1 + (*x0 <
a0);
202 sub128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
206 *x1 =
a1 -
b1 - (*x0 >
a0);
212 return (a1 < b1 ? -1 : a1 >
b1 ? 1 : a0 < b0 ? -1 : a0 >
b0 ? 1 : 0);
215 static inline uint16_t
225 if (!(mnt >> (16 -
shift))) {
233 static inline uint32_t
243 if (!(mnt >> (32 -
shift))) {
251 static inline uint64_t
261 if (!(mnt >> (64 -
shift))) {
287 if (!(x1 >> (64 -
shift))) {
288 x1 = x1 << shift | x0 >> (64 -
shift);
298 static inline uint16_t
304 static inline uint32_t
310 static inline uint64_t
316 static inline uint16_t
322 static inline uint32_t
328 static inline uint64_t
334 static inline uint16_t
340 static inline uint32_t
346 static inline uint64_t
352 static inline uint16_t
358 static inline uint32_t
364 static inline uint64_t
370 static inline uint16_t
376 static inline uint32_t
382 static inline uint64_t
531 static inline uint16_t
541 static inline uint32_t
551 static inline uint64_t
742 int_mant =
lsr16(mnt, 3 - exp);
743 error = (
lsr16(mnt, 1 - exp) & 3) | !!(mnt & (
lsl16(1, 1 - exp) - 1));
746 if (!biased_exp && error) {
752 (error == 2 && (int_mant & 1)))) ||
792 return fp16_pack(sgn, biased_exp, int_mant);
826 int_mant =
lsr32(mnt, 3 - exp);
827 error = (
lsr32(mnt, 1 - exp) & 3) | !!(mnt & (
lsl32(1, 1 - exp) - 1));
830 if (!biased_exp && error) {
836 (error == 2 && (int_mant & 1)))) ||
869 return fp32_pack(sgn, biased_exp, int_mant);
903 int_mant =
lsr64(mnt, 3 - exp);
904 error = (
lsr64(mnt, 1 - exp) & 3) | !!(mnt & (
lsl64(1, 1 - exp) - 1));
907 if (!biased_exp && error) {
913 (error == 2 && (int_mant & 1)))) ||
946 return fp64_pack(sgn, biased_exp, int_mant);
958 int a_sgn, a_exp, b_sgn, b_exp;
959 uint16_t a_mnt, b_mnt;
971 return a ==
b || (!a_mnt && !b_mnt);
977 int a_sgn, a_exp, b_sgn, b_exp;
978 uint16_t a_mnt, b_mnt;
988 if (!a_mnt && !b_mnt)
993 return a_sgn ^ (a_exp > b_exp);
995 return a_sgn ^ (a_mnt > b_mnt);
1002 int a_sgn, a_exp, b_sgn, b_exp;
1003 uint16_t a_mnt, b_mnt;
1013 if (!a_mnt && !b_mnt)
1018 return a_sgn ^ (a_exp > b_exp);
1020 return a_sgn ^ (a_mnt > b_mnt);
1027 int a_sgn, a_exp, b_sgn, b_exp;
1028 uint16_t a_mnt, b_mnt;
1046 int a_sgn, a_exp, b_sgn, b_exp;
1047 uint32_t a_mnt, b_mnt;
1059 return a ==
b || (!a_mnt && !b_mnt);
1065 int a_sgn, a_exp, b_sgn, b_exp;
1066 uint32_t a_mnt, b_mnt;
1076 if (!a_mnt && !b_mnt)
1081 return a_sgn ^ (a_exp > b_exp);
1083 return a_sgn ^ (a_mnt > b_mnt);
1090 int a_sgn, a_exp, b_sgn, b_exp;
1091 uint32_t a_mnt, b_mnt;
1101 if (!a_mnt && !b_mnt)
1106 return a_sgn ^ (a_exp > b_exp);
1108 return a_sgn ^ (a_mnt > b_mnt);
1115 int a_sgn, a_exp, b_sgn, b_exp;
1116 uint32_t a_mnt, b_mnt;
1134 int a_sgn, a_exp, b_sgn, b_exp;
1135 uint64_t a_mnt, b_mnt;
1147 return a ==
b || (!a_mnt && !b_mnt);
1153 int a_sgn, a_exp, b_sgn, b_exp;
1154 uint64_t a_mnt, b_mnt;
1164 if (!a_mnt && !b_mnt)
1169 return a_sgn ^ (a_exp > b_exp);
1171 return a_sgn ^ (a_mnt > b_mnt);
1178 int a_sgn, a_exp, b_sgn, b_exp;
1179 uint64_t a_mnt, b_mnt;
1189 if (!a_mnt && !b_mnt)
1194 return a_sgn ^ (a_exp > b_exp);
1196 return a_sgn ^ (a_mnt > b_mnt);
1203 int a_sgn, a_exp, b_sgn, b_exp;
1204 uint64_t a_mnt, b_mnt;
1222 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1223 uint16_t a_mnt, b_mnt,
x, x_mnt;
1242 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1248 if (a_exp >= b_exp) {
1249 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1250 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1253 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1254 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1259 if (a_sgn == b_sgn) {
1260 x_mnt = a_mnt + b_mnt;
1261 }
else if (a_mnt >= b_mnt) {
1262 x_mnt = a_mnt - b_mnt;
1265 x_mnt = b_mnt - a_mnt;
1282 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1283 uint32_t a_mnt, b_mnt,
x, x_mnt;
1302 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1308 if (a_exp >= b_exp) {
1309 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
1310 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
1313 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
1314 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
1319 if (a_sgn == b_sgn) {
1320 x_mnt = a_mnt + b_mnt;
1321 }
else if (a_mnt >= b_mnt) {
1322 x_mnt = a_mnt - b_mnt;
1325 x_mnt = b_mnt - a_mnt;
1342 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1343 uint64_t a_mnt, b_mnt,
x, x_mnt;
1362 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1368 if (a_exp >= b_exp) {
1369 b_mnt = (
lsr64(b_mnt, a_exp - b_exp) |
1370 !!(b_mnt & (
lsl64(1, a_exp - b_exp) - 1)));
1373 a_mnt = (
lsr64(a_mnt, b_exp - a_exp) |
1374 !!(a_mnt & (
lsl64(1, b_exp - a_exp) - 1)));
1379 if (a_sgn == b_sgn) {
1380 x_mnt = a_mnt + b_mnt;
1381 }
else if (a_mnt >= b_mnt) {
1382 x_mnt = a_mnt - b_mnt;
1385 x_mnt = b_mnt - a_mnt;
1402 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1403 uint16_t a_mnt, b_mnt,
x;
1420 }
else if (!a_mnt || !b_mnt) {
1425 x_sgn = a_sgn ^ b_sgn;
1427 x_mnt = (uint32_t)a_mnt * b_mnt;
1439 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1440 uint32_t a_mnt, b_mnt,
x;
1457 }
else if (!a_mnt || !b_mnt) {
1462 x_sgn = a_sgn ^ b_sgn;
1464 x_mnt = (uint64_t)a_mnt * b_mnt;
1476 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1477 uint64_t a_mnt, b_mnt,
x;
1478 uint64_t x0_mnt, x1_mnt;
1494 }
else if (!a_mnt || !b_mnt) {
1499 x_sgn = a_sgn ^ b_sgn;
1501 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1505 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1512 int mode,
int *flags)
1514 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1515 uint16_t a_mnt, b_mnt, c_mnt,
x;
1516 uint32_t x_mnt, y_mnt;
1541 (a_sgn != (b_sgn ^ c_sgn)))) {
1549 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1557 y_sgn = b_sgn ^ c_sgn;
1559 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1565 if (x_exp >= y_exp) {
1566 y_mnt = (
lsr32(y_mnt, x_exp - y_exp) |
1567 !!(y_mnt & (
lsl32(1, x_exp - y_exp) - 1)));
1570 x_mnt = (
lsr32(x_mnt, y_exp - x_exp) |
1571 !!(x_mnt & (
lsl32(1, y_exp - x_exp) - 1)));
1574 if (x_sgn == y_sgn) {
1575 x_mnt = x_mnt + y_mnt;
1576 }
else if (x_mnt >= y_mnt) {
1577 x_mnt = x_mnt - y_mnt;
1580 x_mnt = y_mnt - x_mnt;
1590 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1597 int mode,
int *flags)
1599 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1600 uint32_t a_mnt, b_mnt, c_mnt,
x;
1601 uint64_t x_mnt, y_mnt;
1626 (a_sgn != (b_sgn ^ c_sgn)))) {
1634 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1642 y_sgn = b_sgn ^ c_sgn;
1644 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1650 if (x_exp >= y_exp) {
1651 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
1652 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
1655 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
1656 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
1659 if (x_sgn == y_sgn) {
1660 x_mnt = x_mnt + y_mnt;
1661 }
else if (x_mnt >= y_mnt) {
1662 x_mnt = x_mnt - y_mnt;
1665 x_mnt = y_mnt - x_mnt;
1675 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1682 int mode,
int *flags)
1684 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1685 uint64_t a_mnt, b_mnt, c_mnt,
x;
1686 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1711 (a_sgn != (b_sgn ^ c_sgn)))) {
1719 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1728 y_sgn = b_sgn ^ c_sgn;
1730 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1731 if (!y0_mnt && !y1_mnt) {
1736 if (x_exp >= y_exp) {
1739 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1740 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1741 y0_mnt |= !!(
t0 |
t1);
1746 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1747 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1748 x0_mnt |= !!(
t0 |
t1);
1751 if (x_sgn == y_sgn) {
1752 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1753 }
else if (
cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1754 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1757 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1760 if (!x0_mnt && !x1_mnt) {
1767 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1775 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1776 uint16_t a_mnt, b_mnt,
x;
1787 (!a_mnt && !b_mnt)) {
1801 x_sgn = a_sgn ^ b_sgn;
1804 x_mnt |= (x_mnt * b_mnt !=
1809 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1817 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1818 uint32_t a_mnt, b_mnt,
x;
1829 (!a_mnt && !b_mnt)) {
1843 x_sgn = a_sgn ^ b_sgn;
1846 x_mnt |= (x_mnt * b_mnt !=
1851 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1859 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp,
c;
1860 uint64_t a_mnt, b_mnt,
x, x_mnt, x0_mnt, x1_mnt;
1870 (!a_mnt && !b_mnt)) {
1885 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1886 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1887 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1888 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1889 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1890 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1893 x_sgn = a_sgn ^ b_sgn;
1895 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1896 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1900 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1901 c =
cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1957 b =
b < -300 ? -300 :
b;
1958 b =
b > 300 ? 300 :
b;
1991 b =
b < -300 ? -300 :
b;
1992 b =
b > 300 ? 300 :
b;
2025 b =
b < -3000 ? -3000 :
b;
2026 b =
b > 3000 ? 3000 :
b;
2039 int a_sgn, a_exp, x_sgn, x_exp;
2040 uint16_t a_mnt, x_mnt;
2066 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2069 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2072 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2075 x_exp = (a_exp + 27) >> 1;
2076 x_mnt = ((
x - (1 << 18)) >> 19) + 1;
2077 t1 = (uint32_t)x_mnt * x_mnt;
2078 t0 = (uint32_t)a_mnt << 9;
2091 int a_sgn, a_exp, x_sgn, x_exp;
2092 uint32_t a_mnt,
x, x_mnt;
2118 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2121 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
2124 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
2127 x = ((((uint64_t)a_mnt << 32) /
x) >> 2) + (
x >> 1);
2130 x_exp = (a_exp + 147) >> 1;
2131 x_mnt = ((
x - (1 << 5)) >> 6) + 1;
2132 t1 = (uint64_t)x_mnt * x_mnt;
2133 t0 = (uint64_t)a_mnt << 19;
2146 int a_sgn, a_exp, x_sgn, x_exp,
c;
2147 uint64_t a_mnt, x_mnt,
r, x0, x1;
2173 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2176 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
2179 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
2182 x = ((a_mnt /
x) >> 2) + (
x >> 1);
2185 r = ((uint64_t)1 << 62) /
x;
2189 lsr128(&x0, &x1, x0, x1, 31);
2192 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2193 lsl128(&x0, &x1, x0, x1, 5);
2194 lsr128(&x0, &x1, x0, x1, 56);
2196 x0 = ((uint64_t)
x << 31) + (x0 >> 1);
2199 x_exp = (a_exp + 1053) >> 1;
2201 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2203 lsl128(&x0, &x1, x0, x1, 19);
2216 uint32_t
x = (uint32_t)fpscr;
2217 return (
x >> 22 & 0xf) | (
x >> 19 & 1 ?
FPLIB_FZ16 : 0);
2225 bool underflow =
false;
2242 if ((flags &
FPLIB_IXC) && !(underflow && fpscr.fz)) {
2424 int sgn1, exp1, sgn2, exp2, result;
2425 uint16_t mnt1, mnt2;
2436 if (op1 == op2 || (!mnt1 && !mnt2)) {
2438 }
else if (sgn1 != sgn2) {
2439 result = sgn1 ? 8 : 2;
2440 }
else if (exp1 != exp2) {
2441 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2443 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2458 int sgn1, exp1, sgn2, exp2, result;
2459 uint32_t mnt1, mnt2;
2470 if (op1 == op2 || (!mnt1 && !mnt2)) {
2472 }
else if (sgn1 != sgn2) {
2473 result = sgn1 ? 8 : 2;
2474 }
else if (exp1 != exp2) {
2475 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2477 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2492 int sgn1, exp1, sgn2, exp2, result;
2493 uint64_t mnt1, mnt2;
2504 if (op1 == op2 || (!mnt1 && !mnt2)) {
2506 }
else if (sgn1 != sgn2) {
2507 result = sgn1 ? 8 : 2;
2508 }
else if (exp1 != exp2) {
2509 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2511 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2635 bool alt_hp = fpscr.ahp;
2640 }
else if (fpscr.dn) {
2650 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2663 rounding, (
mode & 0xf) | alt_hp << 4, &flags);
2684 bool alt_hp = fpscr.ahp;
2689 }
else if (fpscr.dn) {
2699 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2712 rounding, (
mode & 0xf) | alt_hp << 4, &flags);
2789 rounding,
mode, &flags);
2875 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2885 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2895 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2937 static uint16_t coeff[32] = {
2972 coeff[
op & ((1 << 5) - 1)]);
2979 static uint32_t coeff[64] = {
3046 coeff[
op & ((1 << 6) - 1)]);
3053 static uint64_t coeff[64] = {
3120 coeff[
op & ((1 << 6) - 1)]);
3183 int sgn1, exp1, sgn2, exp2;
3184 uint16_t mnt1, mnt2,
x, result;
3192 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3206 int sgn1, exp1, sgn2, exp2;
3207 uint32_t mnt1, mnt2,
x, result;
3215 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3229 int sgn1, exp1, sgn2, exp2;
3230 uint64_t mnt1, mnt2,
x, result;
3238 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3251 return fplibMax<uint16_t>(op1, op2, fpscr);
3259 return fplibMax<uint32_t>(op1, op2, fpscr);
3267 return fplibMax<uint64_t>(op1, op2, fpscr);
3276 int sgn1, exp1, sgn2, exp2;
3277 uint16_t mnt1, mnt2,
x, result;
3285 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3299 int sgn1, exp1, sgn2, exp2;
3300 uint32_t mnt1, mnt2,
x, result;
3308 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3322 int sgn1, exp1, sgn2, exp2;
3323 uint64_t mnt1, mnt2,
x, result;
3331 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3344 return fplibMin<uint16_t>(op1, op2, fpscr);
3352 return fplibMin<uint32_t>(op1, op2, fpscr);
3360 return fplibMin<uint64_t>(op1, op2, fpscr);
3399 int sgn1, exp1, sgn2, exp2;
3400 uint16_t mnt1, mnt2, result;
3412 }
else if (!mnt1 || !mnt2) {
3430 int sgn1, exp1, sgn2, exp2;
3431 uint32_t mnt1, mnt2, result;
3443 }
else if (!mnt1 || !mnt2) {
3461 int sgn1, exp1, sgn2, exp2;
3462 uint64_t mnt1, mnt2, result;
3474 }
else if (!mnt1 || !mnt2) {
3508 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3509 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3510 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3511 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3512 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3513 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3514 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3515 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3516 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3517 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3518 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3519 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3520 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3521 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3522 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3523 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3533 uint16_t mnt, result;
3568 uint32_t mnt, result;
3603 uint64_t mnt, result;
3637 int sgn1, exp1, sgn2, exp2;
3638 uint16_t mnt1, mnt2, result;
3640 op1 = fplibNeg<uint16_t>(op1);
3667 int sgn1, exp1, sgn2, exp2;
3668 uint32_t mnt1, mnt2, result;
3670 op1 = fplibNeg<uint32_t>(op1);
3697 int sgn1, exp1, sgn2, exp2;
3698 uint64_t mnt1, mnt2, result;
3700 op1 = fplibNeg<uint64_t>(op1);
3728 uint16_t mnt, result;
3740 bool overflow_to_inf =
false;
3743 overflow_to_inf =
true;
3746 overflow_to_inf = !sgn;
3749 overflow_to_inf = sgn;
3752 overflow_to_inf =
false;
3755 panic(
"Unrecognized FP rounding mode");
3766 uint16_t fraction = (((uint32_t)1 << 19) /
3767 (mnt >> (
FP16_BITS - 10) | 1) + 1) >> 1;
3769 if (result_exp == 0) {
3771 }
else if (result_exp == -1) {
3775 result =
fp16_pack(sgn, result_exp, fraction);
3790 uint32_t mnt, result;
3802 bool overflow_to_inf =
false;
3805 overflow_to_inf =
true;
3808 overflow_to_inf = !sgn;
3811 overflow_to_inf = sgn;
3814 overflow_to_inf =
false;
3817 panic(
"Unrecognized FP rounding mode");
3828 uint32_t fraction = (((uint32_t)1 << 19) /
3829 (mnt >> (
FP32_BITS - 10) | 1) + 1) >> 1;
3831 if (result_exp == 0) {
3833 }
else if (result_exp == -1) {
3837 result =
fp32_pack(sgn, result_exp, fraction);
3852 uint64_t mnt, result;
3864 bool overflow_to_inf =
false;
3867 overflow_to_inf =
true;
3870 overflow_to_inf = !sgn;
3873 overflow_to_inf = sgn;
3876 overflow_to_inf =
false;
3879 panic(
"Unrecognized FP rounding mode");
3890 uint64_t fraction = (((uint32_t)1 << 19) /
3891 (mnt >> (
FP64_BITS - 10) | 1) + 1) >> 1;
3893 if (result_exp == 0) {
3895 }
else if (result_exp == -1) {
3899 result =
fp64_pack(sgn, result_exp, fraction);
3913 int sgn1, exp1, sgn2, exp2;
3914 uint16_t mnt1, mnt2, result;
3916 op1 = fplibNeg<uint16_t>(op1);
3943 int sgn1, exp1, sgn2, exp2;
3944 uint32_t mnt1, mnt2, result;
3946 op1 = fplibNeg<uint32_t>(op1);
3973 int sgn1, exp1, sgn2, exp2;
3974 uint64_t mnt1, mnt2, result;
3976 op1 = fplibNeg<uint64_t>(op1);
4004 uint16_t mnt, result;
4031 uint32_t mnt, result;
4058 uint64_t mnt, result;
4086 uint16_t mnt, result;
4098 }
else if (exp >= expint) {
4103 uint16_t
x = expint - exp >=
FP16_BITS ? 0 : mnt >> (expint - exp);
4105 ((mnt << 1 >> (expint - exp - 1) & 3) |
4106 ((uint16_t)(mnt << 2 << (
FP16_BITS + exp - expint)) != 0));
4109 x += (
err == 3 || (
err == 2 && (
x & 1)));
4123 panic(
"Unrecognized FP rounding mode");
4151 uint32_t mnt, result;
4163 }
else if (exp >= expint) {
4168 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
4170 ((mnt << 1 >> (expint - exp - 1) & 3) |
4171 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
4174 x += (
err == 3 || (
err == 2 && (
x & 1)));
4188 panic(
"Unrecognized FP rounding mode");
4216 uint64_t mnt, result;
4228 }
else if (exp >= expint) {
4233 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
4235 ((mnt << 1 >> (expint - exp - 1) & 3) |
4236 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
4239 x += (
err == 3 || (
err == 2 && (
x & 1)));
4253 panic(
"Unrecognized FP rounding mode");
4367 static uint16_t coeff[2][8] = {
4401 static uint32_t coeff[2][8] = {
4435 static uint64_t coeff[2][8] = {
4437 0x3ff0000000000000
ULL,
4438 0xbfc5555555555543
ULL,
4439 0x3f8111111110f30c
ULL,
4440 0xbf2a01a019b92fc6
ULL,
4441 0x3ec71de351f3d22b
ULL,
4442 0xbe5ae5e2b60f7b91
ULL,
4443 0x3de5d8408868552f
ULL,
4444 0x0000000000000000
ULL
4447 0x3ff0000000000000
ULL,
4448 0xbfe0000000000000
ULL,
4449 0x3fa5555555555536
ULL,
4450 0xbf56c16c16c13a0b
ULL,
4451 0x3efa01a019b1e8d8
ULL,
4452 0xbe927e4f7282f468
ULL,
4453 0x3e21ee96d2641b13
ULL,
4454 0xbda8f76380fbb401
ULL
4527 static constexpr uint16_t fpOne =
4531 return op1 ^ ((op2 >> 1) << (
FP16_BITS - 1));
4538 static constexpr uint32_t fpOne =
4542 return op1 ^ ((op2 >> 1) << (
FP32_BITS - 1));
4549 static constexpr uint64_t fpOne =
4553 return op1 ^ ((op2 >> 1) << (
FP64_BITS - 1));
4566 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
4570 err = (exp > expmax - 2 ? 0 :
4576 x += (
err == 3 || (
err == 2 && (
x & 1)));
4590 panic(
"Unrecognized FP rounding mode");
4595 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
4602 return sgn ? -
x :
x;
4612 (uint64_t)-
x <= (uint64_t)1 << (
FP32_BITS - 1))) {
4626 (uint64_t)-
x <= (uint64_t)1 << (
FP16_BITS - 1))) {
4640 uint16_t mnt, result;
4655 u, rounding, &flags);
4687 u, rounding, &flags);
4701 uint32_t mnt, result;
4716 u, rounding, &flags);
4743 result =
FPToFixed_32(sgn, exp + fbits, mnt,
u, rounding, &flags);
4758 uint32_t sgn =
bits(
op, 63);
4759 int32_t exp =
bits(
op, 62, 52);
4760 uint64_t mnt =
bits(
op, 51, 0);
4772 }
else if (exp == 0x7ff) {
4784 }
else if (mnt_shft >= 0) {
4785 result =
lsl64(mnt, mnt_shft);
4786 }
else if (mnt_shft < 0) {
4788 result =
lsr64(mnt, abs(mnt_shft));
4790 uint64_t max_result = (1UL << (
FP32_BITS - 1)) -!sgn;
4798 result = sgn ? -result : result;
4800 if (sgn == 1 && result == 0)
4840 u, rounding, &flags);
4870 u, rounding, &flags);
4884 uint64_t mnt, result;
4896 result =
FPToFixed_64(sgn, exp + fbits, mnt,
u, rounding, &flags);
4909 uint64_t x_mnt = x_sgn ? -
a :
a;
4929 uint64_t x_mnt = x_sgn ? -
a :
a;
4949 uint64_t x_mnt = x_sgn ? -
a :
a;
4968 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4980 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4992 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),