61 #define FPLIB_IDC 128 // Input Denormal 62 #define FPLIB_IXC 16 // Inexact 63 #define FPLIB_UFC 8 // Underflow 64 #define FPLIB_OFC 4 // Overflow 65 #define FPLIB_DZC 2 // Division by Zero 66 #define FPLIB_IOC 1 // Invalid Operation 72 #define FP16_EXP_BITS 5 73 #define FP32_EXP_BITS 8 74 #define FP64_EXP_BITS 11 76 #define FP16_EXP_BIAS 15 77 #define FP32_EXP_BIAS 127 78 #define FP64_EXP_BIAS 1023 80 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 81 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 82 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 84 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 85 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 86 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 88 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 89 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 90 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 92 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 93 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 94 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 96 static inline uint16_t
99 return shift < 16 ? x <<
shift : 0;
102 static inline uint16_t
105 return shift < 16 ? x >>
shift : 0;
108 static inline uint32_t
111 return shift < 32 ? x <<
shift : 0;
114 static inline uint32_t
117 return shift < 32 ? x >>
shift : 0;
120 static inline uint64_t
123 return shift < 64 ? x <<
shift : 0;
126 static inline uint64_t
129 return shift < 64 ? x >>
shift : 0;
133 lsl128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
138 }
else if (shift < 64) {
139 *r1 = x1 << shift | x0 >> (64 -
shift);
141 }
else if (shift < 128) {
142 *r1 = x0 << (shift - 64);
151 lsr128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
156 }
else if (shift < 64) {
157 *r0 = x0 >> shift | x1 << (64 -
shift);
159 }
else if (shift < 128) {
160 *r0 = x1 >> (shift - 64);
171 uint32_t
mask = ((uint32_t)1 << 31) - 1;
173 uint64_t
a1 = a >> 31 &
mask;
175 uint64_t
b1 = b >> 31 &
mask;
176 uint64_t p0 = a0 *
b0;
177 uint64_t p2 = a1 *
b1;
178 uint64_t p1 = (a0 +
a1) * (b0 + b1) - p0 - p2;
180 uint64_t s1 = (s0 >> 31) + p1;
181 uint64_t s2 = (s1 >> 31) + p2;
182 *x0 = (s0 &
mask) | (s1 & mask) << 31 | s2 << 62;
187 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t
a, uint32_t
b)
189 uint64_t
t0 = (uint64_t)(uint32_t)a *
b;
190 uint64_t
t1 = (t0 >> 32) + (a >> 32) *
b;
191 *x0 = t1 << 32 | (uint32_t)t0;
196 add128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
200 *x1 = a1 + b1 + (*x0 <
a0);
204 sub128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
208 *x1 = a1 - b1 - (*x0 >
a0);
214 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
217 static inline uint16_t
226 for (shift = 8;
shift; shift >>= 1) {
227 if (!(mnt >> (16 - shift))) {
235 static inline uint32_t
244 for (shift = 16;
shift; shift >>= 1) {
245 if (!(mnt >> (32 - shift))) {
253 static inline uint64_t
262 for (shift = 32;
shift; shift >>= 1) {
263 if (!(mnt >> (64 - shift))) {
288 for (shift = 32;
shift; shift >>= 1) {
289 if (!(x1 >> (64 - shift))) {
290 x1 = x1 << shift | x0 >> (64 -
shift);
300 static inline uint16_t
306 static inline uint32_t
312 static inline uint64_t
318 static inline uint16_t
324 static inline uint32_t
330 static inline uint64_t
336 static inline uint16_t
342 static inline uint32_t
348 static inline uint64_t
354 static inline uint16_t
360 static inline uint32_t
366 static inline uint64_t
372 static inline uint16_t
378 static inline uint32_t
384 static inline uint64_t
523 static inline uint16_t
533 static inline uint32_t
543 static inline uint64_t
734 int_mant =
lsr16(mnt, 3 - exp);
735 error = (
lsr16(mnt, 1 - exp) & 3) | !!(mnt & (
lsl16(1, 1 - exp) - 1));
738 if (!biased_exp && error) {
743 if ((rm ==
FPLIB_RN && (error == 3 ||
744 (error == 2 && (int_mant & 1)))) ||
784 return fp16_pack(sgn, biased_exp, int_mant);
790 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
818 int_mant =
lsr32(mnt, 3 - exp);
819 error = (
lsr32(mnt, 1 - exp) & 3) | !!(mnt & (
lsl32(1, 1 - exp) - 1));
822 if (!biased_exp && error) {
827 if ((rm ==
FPLIB_RN && (error == 3 ||
828 (error == 2 && (int_mant & 1)))) ||
861 return fp32_pack(sgn, biased_exp, int_mant);
867 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
895 int_mant =
lsr64(mnt, 3 - exp);
896 error = (
lsr64(mnt, 1 - exp) & 3) | !!(mnt & (
lsl64(1, 1 - exp) - 1));
899 if (!biased_exp && error) {
904 if ((rm ==
FPLIB_RN && (error == 3 ||
905 (error == 2 && (int_mant & 1)))) ||
938 return fp64_pack(sgn, biased_exp, int_mant);
944 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
950 int a_sgn, a_exp, b_sgn, b_exp;
951 uint16_t a_mnt, b_mnt;
953 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
954 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
963 return a == b || (!a_mnt && !b_mnt);
969 int a_sgn, a_exp, b_sgn, b_exp;
970 uint16_t a_mnt, b_mnt;
972 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
973 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
980 if (!a_mnt && !b_mnt)
985 return a_sgn ^ (a_exp > b_exp);
987 return a_sgn ^ (a_mnt > b_mnt);
994 int a_sgn, a_exp, b_sgn, b_exp;
995 uint16_t a_mnt, b_mnt;
997 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
998 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1005 if (!a_mnt && !b_mnt)
1010 return a_sgn ^ (a_exp > b_exp);
1012 return a_sgn ^ (a_mnt > b_mnt);
1019 int a_sgn, a_exp, b_sgn, b_exp;
1020 uint16_t a_mnt, b_mnt;
1022 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1023 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1038 int a_sgn, a_exp, b_sgn, b_exp;
1039 uint32_t a_mnt, b_mnt;
1041 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1042 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1051 return a == b || (!a_mnt && !b_mnt);
1057 int a_sgn, a_exp, b_sgn, b_exp;
1058 uint32_t a_mnt, b_mnt;
1060 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1061 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1068 if (!a_mnt && !b_mnt)
1073 return a_sgn ^ (a_exp > b_exp);
1075 return a_sgn ^ (a_mnt > b_mnt);
1082 int a_sgn, a_exp, b_sgn, b_exp;
1083 uint32_t a_mnt, b_mnt;
1085 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1086 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1093 if (!a_mnt && !b_mnt)
1098 return a_sgn ^ (a_exp > b_exp);
1100 return a_sgn ^ (a_mnt > b_mnt);
1107 int a_sgn, a_exp, b_sgn, b_exp;
1108 uint32_t a_mnt, b_mnt;
1110 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1111 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1126 int a_sgn, a_exp, b_sgn, b_exp;
1127 uint64_t a_mnt, b_mnt;
1129 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1130 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1139 return a == b || (!a_mnt && !b_mnt);
1145 int a_sgn, a_exp, b_sgn, b_exp;
1146 uint64_t a_mnt, b_mnt;
1148 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1149 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1156 if (!a_mnt && !b_mnt)
1161 return a_sgn ^ (a_exp > b_exp);
1163 return a_sgn ^ (a_mnt > b_mnt);
1170 int a_sgn, a_exp, b_sgn, b_exp;
1171 uint64_t a_mnt, b_mnt;
1173 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1174 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1181 if (!a_mnt && !b_mnt)
1186 return a_sgn ^ (a_exp > b_exp);
1188 return a_sgn ^ (a_mnt > b_mnt);
1195 int a_sgn, a_exp, b_sgn, b_exp;
1196 uint64_t a_mnt, b_mnt;
1198 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1199 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1214 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1215 uint16_t a_mnt, b_mnt,
x, x_mnt;
1217 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1218 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1234 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1240 if (a_exp >= b_exp) {
1241 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1242 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1245 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1246 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1251 if (a_sgn == b_sgn) {
1252 x_mnt = a_mnt + b_mnt;
1253 }
else if (a_mnt >= b_mnt) {
1254 x_mnt = a_mnt - b_mnt;
1257 x_mnt = b_mnt - a_mnt;
1274 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1275 uint32_t a_mnt, b_mnt,
x, x_mnt;
1277 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1278 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1294 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1300 if (a_exp >= b_exp) {
1301 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
1302 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
1305 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
1306 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
1311 if (a_sgn == b_sgn) {
1312 x_mnt = a_mnt + b_mnt;
1313 }
else if (a_mnt >= b_mnt) {
1314 x_mnt = a_mnt - b_mnt;
1317 x_mnt = b_mnt - a_mnt;
1334 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1335 uint64_t a_mnt, b_mnt,
x, x_mnt;
1337 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1338 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1354 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1360 if (a_exp >= b_exp) {
1361 b_mnt = (
lsr64(b_mnt, a_exp - b_exp) |
1362 !!(b_mnt & (
lsl64(1, a_exp - b_exp) - 1)));
1365 a_mnt = (
lsr64(a_mnt, b_exp - a_exp) |
1366 !!(a_mnt & (
lsl64(1, b_exp - a_exp) - 1)));
1371 if (a_sgn == b_sgn) {
1372 x_mnt = a_mnt + b_mnt;
1373 }
else if (a_mnt >= b_mnt) {
1374 x_mnt = a_mnt - b_mnt;
1377 x_mnt = b_mnt - a_mnt;
1394 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1395 uint16_t a_mnt, b_mnt,
x;
1398 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1399 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1412 }
else if (!a_mnt || !b_mnt) {
1417 x_sgn = a_sgn ^ b_sgn;
1419 x_mnt = (uint32_t)a_mnt * b_mnt;
1425 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1431 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1432 uint32_t a_mnt, b_mnt,
x;
1435 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1436 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1449 }
else if (!a_mnt || !b_mnt) {
1454 x_sgn = a_sgn ^ b_sgn;
1456 x_mnt = (uint64_t)a_mnt * b_mnt;
1462 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1468 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1469 uint64_t a_mnt, b_mnt,
x;
1470 uint64_t x0_mnt, x1_mnt;
1472 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1473 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1486 }
else if (!a_mnt || !b_mnt) {
1491 x_sgn = a_sgn ^ b_sgn;
1493 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1497 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1499 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1504 int mode,
int *flags)
1506 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1507 uint16_t a_mnt, b_mnt, c_mnt,
x;
1508 uint32_t x_mnt, y_mnt;
1510 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1511 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1512 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1533 (a_sgn != (b_sgn ^ c_sgn)))) {
1541 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1549 y_sgn = b_sgn ^ c_sgn;
1550 y_exp = b_exp + c_exp -
FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1551 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1557 if (x_exp >= y_exp) {
1558 y_mnt = (
lsr32(y_mnt, x_exp - y_exp) |
1559 !!(y_mnt & (
lsl32(1, x_exp - y_exp) - 1)));
1562 x_mnt = (
lsr32(x_mnt, y_exp - x_exp) |
1563 !!(x_mnt & (
lsl32(1, y_exp - x_exp) - 1)));
1566 if (x_sgn == y_sgn) {
1567 x_mnt = x_mnt + y_mnt;
1568 }
else if (x_mnt >= y_mnt) {
1569 x_mnt = x_mnt - y_mnt;
1572 x_mnt = y_mnt - x_mnt;
1582 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1584 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1589 int mode,
int *flags)
1591 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1592 uint32_t a_mnt, b_mnt, c_mnt,
x;
1593 uint64_t x_mnt, y_mnt;
1595 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1596 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1597 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1618 (a_sgn != (b_sgn ^ c_sgn)))) {
1626 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1634 y_sgn = b_sgn ^ c_sgn;
1635 y_exp = b_exp + c_exp -
FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1636 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1642 if (x_exp >= y_exp) {
1643 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
1644 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
1647 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
1648 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
1651 if (x_sgn == y_sgn) {
1652 x_mnt = x_mnt + y_mnt;
1653 }
else if (x_mnt >= y_mnt) {
1654 x_mnt = x_mnt - y_mnt;
1657 x_mnt = y_mnt - x_mnt;
1667 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1669 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1674 int mode,
int *flags)
1676 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1677 uint64_t a_mnt, b_mnt, c_mnt,
x;
1678 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1680 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1681 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1682 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1703 (a_sgn != (b_sgn ^ c_sgn)))) {
1711 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1720 y_sgn = b_sgn ^ c_sgn;
1721 y_exp = b_exp + c_exp -
FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1722 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1723 if (!y0_mnt && !y1_mnt) {
1728 if (x_exp >= y_exp) {
1730 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1731 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1732 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1733 y0_mnt |= !!(t0 |
t1);
1737 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1738 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1739 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1740 x0_mnt |= !!(t0 |
t1);
1743 if (x_sgn == y_sgn) {
1744 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1745 }
else if (
cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1746 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1749 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1752 if (!x0_mnt && !x1_mnt) {
1759 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1761 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1767 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1768 uint16_t a_mnt, b_mnt,
x;
1771 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1772 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1779 (!a_mnt && !b_mnt)) {
1793 x_sgn = a_sgn ^ b_sgn;
1796 x_mnt |= (x_mnt * b_mnt !=
1801 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1803 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1809 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1810 uint32_t a_mnt, b_mnt,
x;
1813 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1814 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1821 (!a_mnt && !b_mnt)) {
1835 x_sgn = a_sgn ^ b_sgn;
1838 x_mnt |= (x_mnt * b_mnt !=
1843 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1845 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1851 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp,
c;
1852 uint64_t a_mnt, b_mnt,
x, x_mnt, x0_mnt, x1_mnt;
1854 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1855 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1862 (!a_mnt && !b_mnt)) {
1877 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1878 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1879 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1880 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1881 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1882 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1885 x_sgn = a_sgn ^ b_sgn;
1887 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1888 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1892 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1893 c =
cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1900 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1932 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1949 b = b < -300 ? -300 :
b;
1950 b = b > 300 ? 300 :
b;
1966 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1983 b = b < -300 ? -300 :
b;
1984 b = b > 300 ? 300 :
b;
2000 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2017 b = b < -3000 ? -3000 :
b;
2018 b = b > 3000 ? 3000 :
b;
2031 int a_sgn, a_exp, x_sgn, x_exp;
2032 uint16_t a_mnt, x_mnt;
2035 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2058 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2061 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2064 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2067 x_exp = (a_exp + 27) >> 1;
2068 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2069 t1 = (uint32_t)x_mnt * x_mnt;
2070 t0 = (uint32_t)a_mnt << 9;
2077 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2083 int a_sgn, a_exp, x_sgn, x_exp;
2084 uint32_t a_mnt,
x, x_mnt;
2087 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2110 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2113 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2116 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2119 x = ((((uint64_t)a_mnt << 32) /
x) >> 2) + (x >> 1);
2122 x_exp = (a_exp + 147) >> 1;
2123 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2124 t1 = (uint64_t)x_mnt * x_mnt;
2125 t0 = (uint64_t)a_mnt << 19;
2132 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2138 int a_sgn, a_exp, x_sgn, x_exp,
c;
2139 uint64_t a_mnt, x_mnt,
r, x0, x1;
2142 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2165 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2168 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2171 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2174 x = ((a_mnt /
x) >> 2) + (x >> 1);
2177 r = ((uint64_t)1 << 62) /
x;
2180 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2181 lsr128(&x0, &x1, x0, x1, 31);
2184 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2185 lsl128(&x0, &x1, x0, x1, 5);
2186 lsr128(&x0, &x1, x0, x1, 56);
2188 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2191 x_exp = (a_exp + 1053) >> 1;
2193 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2195 lsl128(&x0, &x1, x0, x1, 19);
2196 c =
cmp128(x0, x1, 0, a_mnt);
2202 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2208 uint32_t
x = (uint32_t)fpscr;
2209 return (x >> 22 & 0xf) | (x >> 19 & 1 ?
FPLIB_FZ16 : 0);
2217 bool underflow =
false;
2234 if ((flags &
FPLIB_IXC) && !(underflow && fpscr.fz)) {
2416 int sgn1, exp1, sgn2, exp2, result;
2417 uint16_t mnt1, mnt2;
2419 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2420 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2428 if (op1 == op2 || (!mnt1 && !mnt2)) {
2430 }
else if (sgn1 != sgn2) {
2431 result = sgn1 ? 8 : 2;
2432 }
else if (exp1 != exp2) {
2433 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2435 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2450 int sgn1, exp1, sgn2, exp2, result;
2451 uint32_t mnt1, mnt2;
2453 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2454 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2462 if (op1 == op2 || (!mnt1 && !mnt2)) {
2464 }
else if (sgn1 != sgn2) {
2465 result = sgn1 ? 8 : 2;
2466 }
else if (exp1 != exp2) {
2467 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2469 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2484 int sgn1, exp1, sgn2, exp2, result;
2485 uint64_t mnt1, mnt2;
2487 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2488 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2496 if (op1 == op2 || (!mnt1 && !mnt2)) {
2498 }
else if (sgn1 != sgn2) {
2499 result = sgn1 ? 8 : 2;
2500 }
else if (exp1 != exp2) {
2501 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2503 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2627 bool alt_hp = fpscr.ahp;
2632 }
else if (fpscr.dn) {
2642 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2655 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2676 bool alt_hp = fpscr.ahp;
2681 }
else if (fpscr.dn) {
2691 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2704 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2723 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2781 rounding, mode, &flags);
2800 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2867 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2877 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2887 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2929 static uint16_t coeff[32] = {
2964 coeff[op & ((1 << 5) - 1)]);
2971 static uint32_t coeff[64] = {
3038 coeff[op & ((1 << 6) - 1)]);
3045 static uint64_t coeff[64] = {
3112 coeff[op & ((1 << 6) - 1)]);
3175 int sgn1, exp1, sgn2, exp2;
3176 uint16_t mnt1, mnt2,
x, result;
3178 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3179 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3184 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3198 int sgn1, exp1, sgn2, exp2;
3199 uint32_t mnt1, mnt2,
x, result;
3201 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3202 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3207 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3221 int sgn1, exp1, sgn2, exp2;
3222 uint64_t mnt1, mnt2,
x, result;
3224 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3225 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3230 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3243 return fplibMax<uint16_t>(op1, op2, fpscr);
3251 return fplibMax<uint32_t>(op1, op2, fpscr);
3259 return fplibMax<uint64_t>(op1, op2, fpscr);
3268 int sgn1, exp1, sgn2, exp2;
3269 uint16_t mnt1, mnt2,
x, result;
3271 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3272 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3277 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3291 int sgn1, exp1, sgn2, exp2;
3292 uint32_t mnt1, mnt2,
x, result;
3294 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3295 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3300 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3314 int sgn1, exp1, sgn2, exp2;
3315 uint64_t mnt1, mnt2,
x, result;
3317 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3318 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3323 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3336 return fplibMin<uint16_t>(op1, op2, fpscr);
3344 return fplibMin<uint32_t>(op1, op2, fpscr);
3352 return fplibMin<uint64_t>(op1, op2, fpscr);
3391 int sgn1, exp1, sgn2, exp2;
3392 uint16_t mnt1, mnt2, result;
3394 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3395 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3404 }
else if (!mnt1 || !mnt2) {
3407 result =
fp16_mul(op1, op2, mode, &flags);
3422 int sgn1, exp1, sgn2, exp2;
3423 uint32_t mnt1, mnt2, result;
3425 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3426 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3435 }
else if (!mnt1 || !mnt2) {
3438 result =
fp32_mul(op1, op2, mode, &flags);
3453 int sgn1, exp1, sgn2, exp2;
3454 uint64_t mnt1, mnt2, result;
3456 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3457 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3466 }
else if (!mnt1 || !mnt2) {
3469 result =
fp64_mul(op1, op2, mode, &flags);
3500 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3501 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3502 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3503 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3504 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3505 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3506 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3507 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3508 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3509 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3510 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3511 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3512 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3513 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3514 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3515 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3525 uint16_t mnt, result;
3542 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3560 uint32_t mnt, result;
3577 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3595 uint64_t mnt, result;
3612 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3629 int sgn1, exp1, sgn2, exp2;
3630 uint16_t mnt1, mnt2, result;
3632 op1 = fplibNeg<uint16_t>(op1);
3633 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3634 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3659 int sgn1, exp1, sgn2, exp2;
3660 uint32_t mnt1, mnt2, result;
3662 op1 = fplibNeg<uint32_t>(op1);
3663 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3664 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3689 int sgn1, exp1, sgn2, exp2;
3690 uint64_t mnt1, mnt2, result;
3692 op1 = fplibNeg<uint64_t>(op1);
3693 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3694 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3720 uint16_t mnt, result;
3732 bool overflow_to_inf =
false;
3735 overflow_to_inf =
true;
3738 overflow_to_inf = !sgn;
3741 overflow_to_inf = sgn;
3744 overflow_to_inf =
false;
3747 panic(
"Unrecognized FP rounding mode");
3758 uint16_t fraction = (((uint32_t)1 << 19) /
3759 (mnt >> (
FP16_BITS - 10) | 1) + 1) >> 1;
3761 if (result_exp == 0) {
3763 }
else if (result_exp == -1) {
3767 result =
fp16_pack(sgn, result_exp, fraction);
3782 uint32_t mnt, result;
3794 bool overflow_to_inf =
false;
3797 overflow_to_inf =
true;
3800 overflow_to_inf = !sgn;
3803 overflow_to_inf = sgn;
3806 overflow_to_inf =
false;
3809 panic(
"Unrecognized FP rounding mode");
3820 uint32_t fraction = (((uint32_t)1 << 19) /
3821 (mnt >> (
FP32_BITS - 10) | 1) + 1) >> 1;
3823 if (result_exp == 0) {
3825 }
else if (result_exp == -1) {
3829 result =
fp32_pack(sgn, result_exp, fraction);
3844 uint64_t mnt, result;
3856 bool overflow_to_inf =
false;
3859 overflow_to_inf =
true;
3862 overflow_to_inf = !sgn;
3865 overflow_to_inf = sgn;
3868 overflow_to_inf =
false;
3871 panic(
"Unrecognized FP rounding mode");
3882 uint64_t fraction = (((uint32_t)1 << 19) /
3883 (mnt >> (
FP64_BITS - 10) | 1) + 1) >> 1;
3885 if (result_exp == 0) {
3887 }
else if (result_exp == -1) {
3891 result =
fp64_pack(sgn, result_exp, fraction);
3905 int sgn1, exp1, sgn2, exp2;
3906 uint16_t mnt1, mnt2, result;
3908 op1 = fplibNeg<uint16_t>(op1);
3909 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3910 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3935 int sgn1, exp1, sgn2, exp2;
3936 uint32_t mnt1, mnt2, result;
3938 op1 = fplibNeg<uint32_t>(op1);
3939 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3940 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3965 int sgn1, exp1, sgn2, exp2;
3966 uint64_t mnt1, mnt2, result;
3968 op1 = fplibNeg<uint64_t>(op1);
3969 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3970 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3996 uint16_t mnt, result;
4023 uint32_t mnt, result;
4050 uint64_t mnt, result;
4078 uint16_t mnt, result;
4090 }
else if (exp >= expint) {
4095 uint16_t
x = expint - exp >=
FP16_BITS ? 0 : mnt >> (expint - exp);
4096 int err = exp < expint -
FP16_BITS ? 1 :
4097 ((mnt << 1 >> (expint - exp - 1) & 3) |
4098 ((uint16_t)(mnt << 2 << (
FP16_BITS + exp - expint)) != 0));
4101 x += (err == 3 || (err == 2 && (x & 1)));
4115 panic(
"Unrecognized FP rounding mode");
4143 uint32_t mnt, result;
4155 }
else if (exp >= expint) {
4160 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
4161 int err = exp < expint -
FP32_BITS ? 1 :
4162 ((mnt << 1 >> (expint - exp - 1) & 3) |
4163 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
4166 x += (err == 3 || (err == 2 && (x & 1)));
4180 panic(
"Unrecognized FP rounding mode");
4208 uint64_t mnt, result;
4220 }
else if (exp >= expint) {
4225 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
4226 int err = exp < expint -
FP64_BITS ? 1 :
4227 ((mnt << 1 >> (expint - exp - 1) & 3) |
4228 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
4231 x += (err == 3 || (err == 2 && (x & 1)));
4245 panic(
"Unrecognized FP rounding mode");
4359 static uint16_t coeff[2][8] = {
4393 static uint32_t coeff[2][8] = {
4427 static uint64_t coeff[2][8] = {
4429 0x3ff0000000000000
ULL,
4430 0xbfc5555555555543
ULL,
4431 0x3f8111111110f30c
ULL,
4432 0xbf2a01a019b92fc6
ULL,
4433 0x3ec71de351f3d22b
ULL,
4434 0xbe5ae5e2b60f7b91
ULL,
4435 0x3de5d8408868552f
ULL,
4436 0x0000000000000000ULL
4439 0x3ff0000000000000
ULL,
4440 0xbfe0000000000000
ULL,
4441 0x3fa5555555555536
ULL,
4442 0xbf56c16c16c13a0b
ULL,
4443 0x3efa01a019b1e8d8
ULL,
4444 0xbe927e4f7282f468
ULL,
4445 0x3e21ee96d2641b13
ULL,
4446 0xbda8f76380fbb401ULL
4466 uint16_t result =
fp16_mul(op1, op1, mode, &flags);
4469 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4486 uint32_t result =
fp32_mul(op1, op1, mode, &flags);
4489 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4505 uint64_t result =
fp64_mul(op1, op1, mode, &flags);
4508 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4519 static constexpr uint16_t fpOne =
4523 return op1 ^ ((op2 >> 1) << (
FP16_BITS - 1));
4530 static constexpr uint32_t fpOne =
4534 return op1 ^ ((op2 >> 1) << (
FP32_BITS - 1));
4541 static constexpr uint64_t fpOne =
4545 return op1 ^ ((op2 >> 1) << (
FP64_BITS - 1));
4558 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4562 err = (exp > expmax - 2 ? 0 :
4568 x += (err == 3 || (err == 2 && (x & 1)));
4582 panic(
"Unrecognized FP rounding mode");
4585 if (u ? sgn && x : x > (1
ULL << (FP64_BITS - 1)) - !sgn) {
4587 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4594 return sgn ? -
x :
x;
4601 uint64_t
x =
FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4604 (uint64_t)-x <= (uint64_t)1 << (
FP32_BITS - 1))) {
4606 x = ((uint32_t)!u << (
FP32_BITS - 1)) - !sgn;
4615 uint64_t
x =
FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4618 (uint64_t)-x <= (uint64_t)1 << (
FP16_BITS - 1))) {
4620 x = ((uint16_t)!u << (
FP16_BITS - 1)) - !sgn;
4632 uint16_t mnt, result;
4647 u, rounding, &flags);
4679 u, rounding, &flags);
4693 uint32_t mnt, result;
4708 u, rounding, &flags);
4735 result =
FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4750 uint32_t sgn =
bits(op, 63);
4751 int32_t exp =
bits(op, 62, 52);
4752 uint64_t mnt =
bits(op, 51, 0);
4764 }
else if (exp == 0x7ff) {
4776 }
else if (mnt_shft >= 0) {
4777 result =
lsl64(mnt, mnt_shft);
4778 }
else if (mnt_shft < 0) {
4780 result =
lsr64(mnt, abs(mnt_shft));
4782 uint64_t max_result = (1UL << (
FP32_BITS - 1)) -!sgn;
4783 if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4790 result = sgn ? -result : result;
4792 if (sgn == 1 && result == 0)
4832 u, rounding, &flags);
4862 u, rounding, &flags);
4876 uint64_t mnt, result;
4888 result =
FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4901 uint64_t x_mnt = x_sgn ? -
a :
a;
4913 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4921 uint64_t x_mnt = x_sgn ? -
a :
a;
4933 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4941 uint64_t x_mnt = x_sgn ? -
a :
a;
4950 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4960 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4972 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4984 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
static uint32_t lsr32(uint32_t x, uint32_t shift)
#define panic(...)
This implements a cprintf based panic() function.
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibExpA(uint16_t op)
static uint16_t fp16_FPThree(int sgn)
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t lsl16(uint16_t x, uint32_t shift)
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
uint16_t fplibDefaultNaN()
Foating-point value for default NaN.
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
static void set_fpscr0(FPSCR &fpscr, int flags)
uint16_t fplibNeg(uint16_t op)
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Floating-point library code, which will gradually replace vfp.hh.
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
static uint64_t fp64_FPThree(int sgn)
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
static uint32_t fp32_zero(int sgn)
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
static uint32_t fp32_defaultNaN()
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
void neg(sc_fxval &c, const sc_fxnum &a)
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
static uint32_t fp32_FPOnePointFive(int sgn)
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t fp16_zero(int sgn)
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
static FPRounding FPCRRounding(FPSCR &fpscr)
static const uint8_t recip_sqrt_estimate[256]
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_defaultNaN()
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
static uint16_t fp16_max_normal(int sgn)
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int modeConv(FPSCR fpscr)
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
static uint32_t fp32_FPTwo(int sgn)
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
uint16_t fplibInfinity(int sgn)
Floating-point value for +/- infinity.
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static int fp16_is_infinity(int exp, uint16_t mnt)
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_FPOnePointFive(int sgn)
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
#define ULL(N)
uint64_t constant
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
uint16_t fplibAbs(uint16_t op)
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
static int fp64_is_infinity(int exp, uint64_t mnt)
static void set_fpscr(FPSCR &fpscr, int flags)
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
static uint64_t fp64_zero(int sgn)
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
static int fp64_is_NaN(int exp, uint64_t mnt)
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static uint64_t fp64_infinity(int sgn)
static uint16_t fp16_infinity(int sgn)
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
static uint32_t fp32_max_normal(int sgn)
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
static uint64_t fp64_FPOnePointFive(int sgn)
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
static uint16_t fp16_FPTwo(int sgn)
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
static int fp16_is_NaN(int exp, uint16_t mnt)
static uint32_t fp32_FPThree(int sgn)
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_is_infinity(int exp, uint32_t mnt)
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
static uint64_t lsr64(uint64_t x, uint32_t shift)
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_defaultNaN()
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
static uint64_t lsl64(uint64_t x, uint32_t shift)
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
static uint32_t fp32_infinity(int sgn)
static uint64_t fp64_FPTwo(int sgn)
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_is_NaN(int exp, uint32_t mnt)
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
static uint32_t lsl32(uint32_t x, uint32_t shift)
static uint16_t lsr16(uint16_t x, uint32_t shift)
static uint64_t fp64_max_normal(int sgn)