73#define FP16_EXP_BITS 5
74#define FP32_EXP_BITS 8
75#define FP64_EXP_BITS 11
77#define FP16_EXP_BIAS 15
78#define FP32_EXP_BIAS 127
79#define FP64_EXP_BIAS 1023
81#define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1)
82#define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1)
83#define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1)
85#define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1)
86#define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1)
87#define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1)
89#define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1))
90#define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1))
91#define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1))
93#define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1))
94#define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1))
95#define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1))
103static inline uint16_t
109static inline uint32_t
115static inline uint32_t
121static inline uint64_t
127static inline uint64_t
139 }
else if (
shift < 64) {
140 *r1 = x1 << shift | x0 >> (64 -
shift);
142 }
else if (
shift < 128) {
143 *r1 = x0 << (
shift - 64);
157 }
else if (
shift < 64) {
160 }
else if (
shift < 128) {
172 uint32_t
mask = ((uint32_t)1 << 31) - 1;
177 uint64_t p0 =
a0 *
b0;
178 uint64_t p2 =
a1 *
b1;
179 uint64_t p1 = (
a0 +
a1) * (
b0 +
b1) - p0 - p2;
181 uint64_t s1 = (s0 >> 31) + p1;
182 uint64_t s2 = (s1 >> 31) + p2;
183 *x0 = (s0 &
mask) | (s1 &
mask) << 31 | s2 << 62;
188void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t
a, uint32_t
b)
190 uint64_t
t0 = (uint64_t)(uint32_t)
a *
b;
191 uint64_t
t1 = (
t0 >> 32) + (
a >> 32) *
b;
192 *x0 =
t1 << 32 | (uint32_t)
t0;
197add128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
201 *x1 =
a1 +
b1 + (*x0 <
a0);
205sub128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
209 *x1 =
a1 -
b1 - (*x0 >
a0);
218static inline uint16_t
228 if (!(mnt >> (16 -
shift))) {
236static inline uint32_t
246 if (!(mnt >> (32 -
shift))) {
254static inline uint64_t
264 if (!(mnt >> (64 -
shift))) {
290 if (!(x1 >> (64 -
shift))) {
291 x1 = x1 << shift | x0 >> (64 -
shift);
301static inline uint16_t
307static inline uint32_t
313static inline uint64_t
319static inline uint16_t
325static inline uint32_t
331static inline uint64_t
337static inline uint16_t
343static inline uint32_t
349static inline uint64_t
355static inline uint16_t
361static inline uint32_t
367static inline uint64_t
373static inline uint16_t
379static inline uint32_t
385static inline uint64_t
534static inline uint16_t
544static inline uint32_t
554static inline uint64_t
745 int_mant =
lsr16(mnt, 3 - exp);
749 if (!biased_exp &&
error) {
755 (
error == 2 && (int_mant & 1)))) ||
795 return fp16_pack(sgn, biased_exp, int_mant);
829 int_mant =
lsr32(mnt, 3 - exp);
833 if (!biased_exp &&
error) {
839 (
error == 2 && (int_mant & 1)))) ||
872 return fp32_pack(sgn, biased_exp, int_mant);
906 int_mant =
lsr64(mnt, 3 - exp);
910 if (!biased_exp &&
error) {
916 (
error == 2 && (int_mant & 1)))) ||
949 return fp64_pack(sgn, biased_exp, int_mant);
961 int a_sgn, a_exp, b_sgn, b_exp;
962 uint16_t a_mnt, b_mnt;
974 return a ==
b || (!a_mnt && !b_mnt);
980 int a_sgn, a_exp, b_sgn, b_exp;
981 uint16_t a_mnt, b_mnt;
991 if (!a_mnt && !b_mnt)
996 return a_sgn ^ (a_exp > b_exp);
998 return a_sgn ^ (a_mnt > b_mnt);
1005 int a_sgn, a_exp, b_sgn, b_exp;
1006 uint16_t a_mnt, b_mnt;
1016 if (!a_mnt && !b_mnt)
1021 return a_sgn ^ (a_exp > b_exp);
1023 return a_sgn ^ (a_mnt > b_mnt);
1030 int a_sgn, a_exp, b_sgn, b_exp;
1031 uint16_t a_mnt, b_mnt;
1049 int a_sgn, a_exp, b_sgn, b_exp;
1050 uint32_t a_mnt, b_mnt;
1062 return a ==
b || (!a_mnt && !b_mnt);
1068 int a_sgn, a_exp, b_sgn, b_exp;
1069 uint32_t a_mnt, b_mnt;
1079 if (!a_mnt && !b_mnt)
1084 return a_sgn ^ (a_exp > b_exp);
1086 return a_sgn ^ (a_mnt > b_mnt);
1093 int a_sgn, a_exp, b_sgn, b_exp;
1094 uint32_t a_mnt, b_mnt;
1104 if (!a_mnt && !b_mnt)
1109 return a_sgn ^ (a_exp > b_exp);
1111 return a_sgn ^ (a_mnt > b_mnt);
1118 int a_sgn, a_exp, b_sgn, b_exp;
1119 uint32_t a_mnt, b_mnt;
1137 int a_sgn, a_exp, b_sgn, b_exp;
1138 uint64_t a_mnt, b_mnt;
1150 return a ==
b || (!a_mnt && !b_mnt);
1156 int a_sgn, a_exp, b_sgn, b_exp;
1157 uint64_t a_mnt, b_mnt;
1167 if (!a_mnt && !b_mnt)
1172 return a_sgn ^ (a_exp > b_exp);
1174 return a_sgn ^ (a_mnt > b_mnt);
1181 int a_sgn, a_exp, b_sgn, b_exp;
1182 uint64_t a_mnt, b_mnt;
1192 if (!a_mnt && !b_mnt)
1197 return a_sgn ^ (a_exp > b_exp);
1199 return a_sgn ^ (a_mnt > b_mnt);
1206 int a_sgn, a_exp, b_sgn, b_exp;
1207 uint64_t a_mnt, b_mnt;
1225 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1226 uint16_t a_mnt, b_mnt,
x, x_mnt;
1245 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1251 if (a_exp >= b_exp) {
1252 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1253 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1256 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1257 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1262 if (a_sgn == b_sgn) {
1263 x_mnt = a_mnt + b_mnt;
1264 }
else if (a_mnt >= b_mnt) {
1265 x_mnt = a_mnt - b_mnt;
1268 x_mnt = b_mnt - a_mnt;
1285 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1286 uint32_t a_mnt, b_mnt,
x, x_mnt;
1305 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1311 if (a_exp >= b_exp) {
1312 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
1313 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
1316 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
1317 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
1322 if (a_sgn == b_sgn) {
1323 x_mnt = a_mnt + b_mnt;
1324 }
else if (a_mnt >= b_mnt) {
1325 x_mnt = a_mnt - b_mnt;
1328 x_mnt = b_mnt - a_mnt;
1345 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1346 uint64_t a_mnt, b_mnt,
x, x_mnt;
1365 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1371 if (a_exp >= b_exp) {
1372 b_mnt = (
lsr64(b_mnt, a_exp - b_exp) |
1373 !!(b_mnt & (
lsl64(1, a_exp - b_exp) - 1)));
1376 a_mnt = (
lsr64(a_mnt, b_exp - a_exp) |
1377 !!(a_mnt & (
lsl64(1, b_exp - a_exp) - 1)));
1382 if (a_sgn == b_sgn) {
1383 x_mnt = a_mnt + b_mnt;
1384 }
else if (a_mnt >= b_mnt) {
1385 x_mnt = a_mnt - b_mnt;
1388 x_mnt = b_mnt - a_mnt;
1405 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1406 uint16_t a_mnt, b_mnt,
x;
1423 }
else if (!a_mnt || !b_mnt) {
1428 x_sgn = a_sgn ^ b_sgn;
1430 x_mnt = (uint32_t)a_mnt * b_mnt;
1442 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1443 uint32_t a_mnt, b_mnt,
x;
1460 }
else if (!a_mnt || !b_mnt) {
1465 x_sgn = a_sgn ^ b_sgn;
1467 x_mnt = (uint64_t)a_mnt * b_mnt;
1479 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1480 uint64_t a_mnt, b_mnt,
x;
1481 uint64_t x0_mnt, x1_mnt;
1497 }
else if (!a_mnt || !b_mnt) {
1502 x_sgn = a_sgn ^ b_sgn;
1504 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1508 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1517 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1518 uint16_t a_mnt, b_mnt, c_mnt,
x;
1519 uint32_t x_mnt, y_mnt;
1544 (a_sgn != (b_sgn ^ c_sgn)))) {
1552 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1560 y_sgn = b_sgn ^ c_sgn;
1562 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1568 if (x_exp >= y_exp) {
1569 y_mnt = (
lsr32(y_mnt, x_exp - y_exp) |
1570 !!(y_mnt & (
lsl32(1, x_exp - y_exp) - 1)));
1573 x_mnt = (
lsr32(x_mnt, y_exp - x_exp) |
1574 !!(x_mnt & (
lsl32(1, y_exp - x_exp) - 1)));
1577 if (x_sgn == y_sgn) {
1578 x_mnt = x_mnt + y_mnt;
1579 }
else if (x_mnt >= y_mnt) {
1580 x_mnt = x_mnt - y_mnt;
1583 x_mnt = y_mnt - x_mnt;
1593 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1602 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1603 uint32_t a_mnt, b_mnt, c_mnt,
x;
1604 uint64_t x_mnt, y_mnt;
1629 (a_sgn != (b_sgn ^ c_sgn)))) {
1637 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1645 y_sgn = b_sgn ^ c_sgn;
1647 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1653 if (x_exp >= y_exp) {
1654 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
1655 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
1658 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
1659 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
1662 if (x_sgn == y_sgn) {
1663 x_mnt = x_mnt + y_mnt;
1664 }
else if (x_mnt >= y_mnt) {
1665 x_mnt = x_mnt - y_mnt;
1668 x_mnt = y_mnt - x_mnt;
1678 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1687 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1688 uint64_t a_mnt, b_mnt, c_mnt,
x;
1689 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1714 (a_sgn != (b_sgn ^ c_sgn)))) {
1722 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1731 y_sgn = b_sgn ^ c_sgn;
1733 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1734 if (!y0_mnt && !y1_mnt) {
1739 if (x_exp >= y_exp) {
1742 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1743 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1744 y0_mnt |= !!(
t0 |
t1);
1749 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1750 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1751 x0_mnt |= !!(
t0 |
t1);
1754 if (x_sgn == y_sgn) {
1755 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1756 }
else if (
cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1757 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1760 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1763 if (!x0_mnt && !x1_mnt) {
1770 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1778 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1779 uint16_t a_mnt, b_mnt,
x;
1790 (!a_mnt && !b_mnt)) {
1804 x_sgn = a_sgn ^ b_sgn;
1807 x_mnt |= (x_mnt * b_mnt !=
1812 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1820 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1821 uint32_t a_mnt, b_mnt,
x;
1832 (!a_mnt && !b_mnt)) {
1846 x_sgn = a_sgn ^ b_sgn;
1849 x_mnt |= (x_mnt * b_mnt !=
1854 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1862 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp,
c;
1863 uint64_t a_mnt, b_mnt,
x, x_mnt, x0_mnt, x1_mnt;
1873 (!a_mnt && !b_mnt)) {
1888 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1889 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1890 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1891 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1892 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1893 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1896 x_sgn = a_sgn ^ b_sgn;
1898 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1899 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1903 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1904 c =
cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1960 b =
b < -300 ? -300 :
b;
1961 b =
b > 300 ? 300 :
b;
1994 b =
b < -300 ? -300 :
b;
1995 b =
b > 300 ? 300 :
b;
2028 b =
b < -3000 ? -3000 :
b;
2029 b =
b > 3000 ? 3000 :
b;
2042 int a_sgn, a_exp, x_sgn, x_exp;
2043 uint16_t a_mnt, x_mnt;
2069 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2072 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2075 x = (((uint32_t)a_mnt << 16) / (
x >> 15) + (
x >> 16)) << 15;
2078 x_exp = (a_exp + 27) >> 1;
2079 x_mnt = ((
x - (1 << 18)) >> 19) + 1;
2080 t1 = (uint32_t)x_mnt * x_mnt;
2081 t0 = (uint32_t)a_mnt << 9;
2094 int a_sgn, a_exp, x_sgn, x_exp;
2095 uint32_t a_mnt,
x, x_mnt;
2121 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2124 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
2127 x = (a_mnt / (
x >> 15) + (
x >> 16)) << 15;
2130 x = ((((uint64_t)a_mnt << 32) /
x) >> 2) + (
x >> 1);
2133 x_exp = (a_exp + 147) >> 1;
2134 x_mnt = ((
x - (1 << 5)) >> 6) + 1;
2135 t1 = (uint64_t)x_mnt * x_mnt;
2136 t0 = (uint64_t)a_mnt << 19;
2149 int a_sgn, a_exp, x_sgn, x_exp,
c;
2150 uint64_t a_mnt, x_mnt,
r, x0, x1;
2176 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2179 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
2182 x = ((a_mnt >> 32) / (
x >> 15) + (
x >> 16)) << 15;
2185 x = ((a_mnt /
x) >> 2) + (
x >> 1);
2188 r = ((uint64_t)1 << 62) /
x;
2192 lsr128(&x0, &x1, x0, x1, 31);
2195 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2196 lsl128(&x0, &x1, x0, x1, 5);
2197 lsr128(&x0, &x1, x0, x1, 56);
2199 x0 = ((uint64_t)
x << 31) + (x0 >> 1);
2202 x_exp = (a_exp + 1053) >> 1;
2204 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2206 lsl128(&x0, &x1, x0, x1, 19);
2219 uint32_t
x = (uint32_t)fpscr;
2220 return (
x >> 22 & 0xf) | (
x >> 19 & 1 ?
FPLIB_FZ16 : 0);
2228 bool underflow =
false;
2427 int sgn1, exp1, sgn2, exp2, result;
2428 uint16_t mnt1, mnt2;
2439 if (op1 == op2 || (!mnt1 && !mnt2)) {
2441 }
else if (sgn1 != sgn2) {
2442 result = sgn1 ? 8 : 2;
2443 }
else if (exp1 != exp2) {
2444 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2446 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2461 int sgn1, exp1, sgn2, exp2, result;
2462 uint32_t mnt1, mnt2;
2473 if (op1 == op2 || (!mnt1 && !mnt2)) {
2475 }
else if (sgn1 != sgn2) {
2476 result = sgn1 ? 8 : 2;
2477 }
else if (exp1 != exp2) {
2478 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2480 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2495 int sgn1, exp1, sgn2, exp2, result;
2496 uint64_t mnt1, mnt2;
2507 if (op1 == op2 || (!mnt1 && !mnt2)) {
2509 }
else if (sgn1 != sgn2) {
2510 result = sgn1 ? 8 : 2;
2511 }
else if (exp1 != exp2) {
2512 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2514 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2638 bool alt_hp = fpscr.ahp;
2643 }
else if (fpscr.dn) {
2653 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2666 rounding, (
mode & 0xf) | alt_hp << 4, &
flags);
2687 bool alt_hp = fpscr.ahp;
2692 }
else if (fpscr.dn) {
2702 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2715 rounding, (
mode & 0xf) | alt_hp << 4, &
flags);
2940 static uint16_t coeff[32] = {
2975 coeff[
op & ((1 << 5) - 1)]);
2982 static uint32_t coeff[64] = {
3049 coeff[
op & ((1 << 6) - 1)]);
3056 static uint64_t coeff[64] = {
3123 coeff[
op & ((1 << 6) - 1)]);
3186 int sgn1, exp1, sgn2, exp2;
3187 uint16_t mnt1, mnt2,
x, result;
3195 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3209 int sgn1, exp1, sgn2, exp2;
3210 uint32_t mnt1, mnt2,
x, result;
3218 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3232 int sgn1, exp1, sgn2, exp2;
3233 uint64_t mnt1, mnt2,
x, result;
3241 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3279 int sgn1, exp1, sgn2, exp2;
3280 uint16_t mnt1, mnt2,
x, result;
3288 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3302 int sgn1, exp1, sgn2, exp2;
3303 uint32_t mnt1, mnt2,
x, result;
3311 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3325 int sgn1, exp1, sgn2, exp2;
3326 uint64_t mnt1, mnt2,
x, result;
3334 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3402 int sgn1, exp1, sgn2, exp2;
3403 uint16_t mnt1, mnt2, result;
3415 }
else if (!mnt1 || !mnt2) {
3433 int sgn1, exp1, sgn2, exp2;
3434 uint32_t mnt1, mnt2, result;
3446 }
else if (!mnt1 || !mnt2) {
3464 int sgn1, exp1, sgn2, exp2;
3465 uint64_t mnt1, mnt2, result;
3477 }
else if (!mnt1 || !mnt2) {
3511 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3512 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3513 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3514 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3515 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3516 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3517 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3518 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3519 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3520 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3521 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3522 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3523 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3524 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3525 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3526 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3536 uint16_t mnt, result;
3571 uint32_t mnt, result;
3606 uint64_t mnt, result;
3640 int sgn1, exp1, sgn2, exp2;
3641 uint16_t mnt1, mnt2, result;
3670 int sgn1, exp1, sgn2, exp2;
3671 uint32_t mnt1, mnt2, result;
3700 int sgn1, exp1, sgn2, exp2;
3701 uint64_t mnt1, mnt2, result;
3731 uint16_t mnt, result;
3743 bool overflow_to_inf =
false;
3746 overflow_to_inf =
true;
3749 overflow_to_inf = !sgn;
3752 overflow_to_inf = sgn;
3755 overflow_to_inf =
false;
3758 panic(
"Unrecognized FP rounding mode");
3769 uint16_t fraction = (((uint32_t)1 << 19) /
3770 (mnt >> (
FP16_BITS - 10) | 1) + 1) >> 1;
3772 if (result_exp == 0) {
3774 }
else if (result_exp == -1) {
3778 result =
fp16_pack(sgn, result_exp, fraction);
3793 uint32_t mnt, result;
3805 bool overflow_to_inf =
false;
3808 overflow_to_inf =
true;
3811 overflow_to_inf = !sgn;
3814 overflow_to_inf = sgn;
3817 overflow_to_inf =
false;
3820 panic(
"Unrecognized FP rounding mode");
3831 uint32_t fraction = (((uint32_t)1 << 19) /
3832 (mnt >> (
FP32_BITS - 10) | 1) + 1) >> 1;
3834 if (result_exp == 0) {
3836 }
else if (result_exp == -1) {
3840 result =
fp32_pack(sgn, result_exp, fraction);
3855 uint64_t mnt, result;
3867 bool overflow_to_inf =
false;
3870 overflow_to_inf =
true;
3873 overflow_to_inf = !sgn;
3876 overflow_to_inf = sgn;
3879 overflow_to_inf =
false;
3882 panic(
"Unrecognized FP rounding mode");
3893 uint64_t fraction = (((uint32_t)1 << 19) /
3894 (mnt >> (
FP64_BITS - 10) | 1) + 1) >> 1;
3896 if (result_exp == 0) {
3898 }
else if (result_exp == -1) {
3902 result =
fp64_pack(sgn, result_exp, fraction);
3916 int sgn1, exp1, sgn2, exp2;
3917 uint16_t mnt1, mnt2, result;
3946 int sgn1, exp1, sgn2, exp2;
3947 uint32_t mnt1, mnt2, result;
3976 int sgn1, exp1, sgn2, exp2;
3977 uint64_t mnt1, mnt2, result;
4007 uint16_t mnt, result;
4034 uint32_t mnt, result;
4061 uint64_t mnt, result;
4089 uint16_t mnt, result;
4101 }
else if (exp >= expint) {
4106 uint16_t
x = expint - exp >=
FP16_BITS ? 0 : mnt >> (expint - exp);
4108 ((mnt << 1 >> (expint - exp - 1) & 3) |
4109 ((uint16_t)(mnt << 2 << (
FP16_BITS + exp - expint)) != 0));
4112 x += (
err == 3 || (
err == 2 && (
x & 1)));
4126 panic(
"Unrecognized FP rounding mode");
4154 uint32_t mnt, result;
4166 }
else if (exp >= expint) {
4171 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
4173 ((mnt << 1 >> (expint - exp - 1) & 3) |
4174 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
4177 x += (
err == 3 || (
err == 2 && (
x & 1)));
4191 panic(
"Unrecognized FP rounding mode");
4219 uint64_t mnt, result;
4231 }
else if (exp >= expint) {
4236 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
4238 ((mnt << 1 >> (expint - exp - 1) & 3) |
4239 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
4242 x += (
err == 3 || (
err == 2 && (
x & 1)));
4256 panic(
"Unrecognized FP rounding mode");
4370 static uint16_t coeff[2][8] = {
4404 static uint32_t coeff[2][8] = {
4438 static uint64_t coeff[2][8] = {
4440 0x3ff0000000000000ULL,
4441 0xbfc5555555555543ULL,
4442 0x3f8111111110f30cULL,
4443 0xbf2a01a019b92fc6ULL,
4444 0x3ec71de351f3d22bULL,
4445 0xbe5ae5e2b60f7b91ULL,
4446 0x3de5d8408868552fULL,
4447 0x0000000000000000ULL
4450 0x3ff0000000000000ULL,
4451 0xbfe0000000000000ULL,
4452 0x3fa5555555555536ULL,
4453 0xbf56c16c16c13a0bULL,
4454 0x3efa01a019b1e8d8ULL,
4455 0xbe927e4f7282f468ULL,
4456 0x3e21ee96d2641b13ULL,
4457 0xbda8f76380fbb401ULL
4482 result = (result & ~(1ULL << (
FP16_BITS - 1))) |
4530 static constexpr uint16_t fpOne =
4534 return op1 ^ ((op2 >> 1) << (
FP16_BITS - 1));
4541 static constexpr uint32_t fpOne =
4545 return op1 ^ ((op2 >> 1) << (
FP32_BITS - 1));
4552 static constexpr uint64_t fpOne =
4556 return op1 ^ ((op2 >> 1) << (
FP64_BITS - 1));
4569 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
4573 err = (exp > expmax - 2 ? 0 :
4579 x += (
err == 3 || (
err == 2 && (
x & 1)));
4593 panic(
"Unrecognized FP rounding mode");
4596 if (
u ? sgn &&
x :
x > (1ULL << (
FP64_BITS - 1)) - !sgn) {
4598 return ((uint64_t)!
u << (
FP64_BITS - 1)) - !sgn;
4605 return sgn ? -
x :
x;
4615 (uint64_t)-
x <= (uint64_t)1 << (
FP32_BITS - 1))) {
4629 (uint64_t)-
x <= (uint64_t)1 << (
FP16_BITS - 1))) {
4643 uint16_t mnt, result;
4704 uint32_t mnt, result;
4761 uint32_t sgn =
bits(
op, 63);
4762 int32_t exp =
bits(
op, 62, 52);
4763 uint64_t mnt =
bits(
op, 51, 0);
4775 }
else if (exp == 0x7ff) {
4787 }
else if (mnt_shft >= 0) {
4788 result =
lsl64(mnt, mnt_shft);
4789 }
else if (mnt_shft < 0) {
4791 result =
lsr64(mnt, abs(mnt_shft));
4793 uint64_t max_result = (1UL << (
FP32_BITS - 1)) -!sgn;
4801 result = sgn ? -result : result;
4803 if (sgn == 1 && result == 0)
4887 uint64_t mnt, result;
4912 uint64_t x_mnt = x_sgn ? -
a :
a;
4932 uint64_t x_mnt = x_sgn ? -
a :
a;
4952 uint64_t x_mnt = x_sgn ? -
a :
a;
4971 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4983 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4995 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
Floating-point library code, which will gradually replace vfp.hh.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_FPOnePointFive(int sgn)
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
static int fp64_is_infinity(int exp, uint64_t mnt)
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
static FPRounding FPCRRounding(FPSCR &fpscr)
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
static int modeConv(FPSCR fpscr)
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
static uint32_t fp32_FPThree(int sgn)
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
static uint16_t fp16_zero(int sgn)
uint16_t fplibNeg(uint16_t op)
static uint16_t fp16_max_normal(int sgn)
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t lsl16(uint16_t x, uint32_t shift)
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
static uint32_t fp32_defaultNaN()
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
static uint32_t lsr32(uint32_t x, uint32_t shift)
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
uint16_t fplibExpA(uint16_t op)
static uint32_t fp32_FPTwo(int sgn)
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
static uint16_t fp16_FPOnePointFive(int sgn)
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
static uint32_t fp32_infinity(int sgn)
static uint64_t lsr64(uint64_t x, uint32_t shift)
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
static int fp64_is_NaN(int exp, uint64_t mnt)
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t fp16_FPTwo(int sgn)
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
static int fp32_is_NaN(int exp, uint32_t mnt)
static uint64_t fp64_FPTwo(int sgn)
static uint16_t lsr16(uint16_t x, uint32_t shift)
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_infinity(int sgn)
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
static void set_fpscr0(FPSCR &fpscr, int flags)
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
uint16_t fplibAbs(uint16_t op)
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_defaultNaN()
static uint32_t lsl32(uint32_t x, uint32_t shift)
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static uint32_t fp32_FPOnePointFive(int sgn)
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint64_t fp64_infinity(int sgn)
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
static int fp16_is_infinity(int exp, uint16_t mnt)
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_FPThree(int sgn)
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
static int fp16_is_NaN(int exp, uint16_t mnt)
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
static uint32_t fp32_zero(int sgn)
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
uint16_t fplibDefaultNaN()
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
static uint64_t fp64_defaultNaN()
static const uint8_t recip_sqrt_estimate[256]
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t lsl64(uint64_t x, uint32_t shift)
static int fp32_is_infinity(int exp, uint32_t mnt)
static uint64_t fp64_zero(int sgn)
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_FPThree(int sgn)
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
static uint64_t fp64_max_normal(int sgn)
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
uint16_t fplibInfinity(int sgn)
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
static void set_fpscr(FPSCR &fpscr, int flags)
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
static uint32_t fp32_max_normal(int sgn)
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.