59 #define FPLIB_IDC 128 // Input Denormal 60 #define FPLIB_IXC 16 // Inexact 61 #define FPLIB_UFC 8 // Underflow 62 #define FPLIB_OFC 4 // Overflow 63 #define FPLIB_DZC 2 // Division by Zero 64 #define FPLIB_IOC 1 // Invalid Operation 70 #define FP16_EXP_BITS 5 71 #define FP32_EXP_BITS 8 72 #define FP64_EXP_BITS 11 74 #define FP16_EXP_BIAS 15 75 #define FP32_EXP_BIAS 127 76 #define FP64_EXP_BIAS 1023 78 #define FP16_EXP_INF ((1ULL << FP16_EXP_BITS) - 1) 79 #define FP32_EXP_INF ((1ULL << FP32_EXP_BITS) - 1) 80 #define FP64_EXP_INF ((1ULL << FP64_EXP_BITS) - 1) 82 #define FP16_MANT_BITS (FP16_BITS - FP16_EXP_BITS - 1) 83 #define FP32_MANT_BITS (FP32_BITS - FP32_EXP_BITS - 1) 84 #define FP64_MANT_BITS (FP64_BITS - FP64_EXP_BITS - 1) 86 #define FP16_EXP(x) ((x) >> FP16_MANT_BITS & ((1ULL << FP16_EXP_BITS) - 1)) 87 #define FP32_EXP(x) ((x) >> FP32_MANT_BITS & ((1ULL << FP32_EXP_BITS) - 1)) 88 #define FP64_EXP(x) ((x) >> FP64_MANT_BITS & ((1ULL << FP64_EXP_BITS) - 1)) 90 #define FP16_MANT(x) ((x) & ((1ULL << FP16_MANT_BITS) - 1)) 91 #define FP32_MANT(x) ((x) & ((1ULL << FP32_MANT_BITS) - 1)) 92 #define FP64_MANT(x) ((x) & ((1ULL << FP64_MANT_BITS) - 1)) 94 static inline uint16_t
97 return shift < 16 ? x <<
shift : 0;
100 static inline uint16_t
103 return shift < 16 ? x >>
shift : 0;
106 static inline uint32_t
109 return shift < 32 ? x <<
shift : 0;
112 static inline uint32_t
115 return shift < 32 ? x >>
shift : 0;
118 static inline uint64_t
121 return shift < 64 ? x <<
shift : 0;
124 static inline uint64_t
127 return shift < 64 ? x >>
shift : 0;
131 lsl128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
136 }
else if (shift < 64) {
137 *r1 = x1 << shift | x0 >> (64 -
shift);
139 }
else if (shift < 128) {
140 *r1 = x0 << (shift - 64);
149 lsr128(uint64_t *
r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t
shift)
154 }
else if (shift < 64) {
155 *r0 = x0 >> shift | x1 << (64 -
shift);
157 }
else if (shift < 128) {
158 *r0 = x1 >> (shift - 64);
169 uint32_t
mask = ((uint32_t)1 << 31) - 1;
171 uint64_t
a1 = a >> 31 &
mask;
173 uint64_t
b1 = b >> 31 &
mask;
174 uint64_t p0 = a0 *
b0;
175 uint64_t p2 = a1 *
b1;
176 uint64_t p1 = (a0 +
a1) * (b0 + b1) - p0 - p2;
178 uint64_t s1 = (s0 >> 31) + p1;
179 uint64_t s2 = (s1 >> 31) + p2;
180 *x0 = (s0 &
mask) | (s1 & mask) << 31 | s2 << 62;
185 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t
a, uint32_t
b)
187 uint64_t
t0 = (uint64_t)(uint32_t)a *
b;
188 uint64_t
t1 = (t0 >> 32) + (a >> 32) *
b;
189 *x0 = t1 << 32 | (uint32_t)t0;
194 add128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
198 *x1 = a1 + b1 + (*x0 <
a0);
202 sub128(uint64_t *x0, uint64_t *x1, uint64_t
a0, uint64_t
a1, uint64_t
b0,
206 *x1 = a1 - b1 - (*x0 >
a0);
212 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
215 static inline uint16_t
224 for (shift = 8;
shift; shift >>= 1) {
225 if (!(mnt >> (16 - shift))) {
233 static inline uint32_t
242 for (shift = 16;
shift; shift >>= 1) {
243 if (!(mnt >> (32 - shift))) {
251 static inline uint64_t
260 for (shift = 32;
shift; shift >>= 1) {
261 if (!(mnt >> (64 - shift))) {
286 for (shift = 32;
shift; shift >>= 1) {
287 if (!(x1 >> (64 - shift))) {
288 x1 = x1 << shift | x0 >> (64 -
shift);
298 static inline uint16_t
304 static inline uint32_t
310 static inline uint64_t
316 static inline uint16_t
322 static inline uint32_t
328 static inline uint64_t
334 static inline uint16_t
340 static inline uint32_t
346 static inline uint64_t
352 static inline uint16_t
358 static inline uint32_t
364 static inline uint64_t
370 static inline uint16_t
376 static inline uint32_t
382 static inline uint64_t
531 static inline uint16_t
541 static inline uint32_t
551 static inline uint64_t
742 int_mant =
lsr16(mnt, 3 - exp);
743 error = (
lsr16(mnt, 1 - exp) & 3) | !!(mnt & (
lsl16(1, 1 - exp) - 1));
746 if (!biased_exp && error) {
751 if ((rm ==
FPLIB_RN && (error == 3 ||
752 (error == 2 && (int_mant & 1)))) ||
792 return fp16_pack(sgn, biased_exp, int_mant);
798 return fp16_round_(sgn, exp, mnt, mode & 3, mode, flags);
826 int_mant =
lsr32(mnt, 3 - exp);
827 error = (
lsr32(mnt, 1 - exp) & 3) | !!(mnt & (
lsl32(1, 1 - exp) - 1));
830 if (!biased_exp && error) {
835 if ((rm ==
FPLIB_RN && (error == 3 ||
836 (error == 2 && (int_mant & 1)))) ||
869 return fp32_pack(sgn, biased_exp, int_mant);
875 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
903 int_mant =
lsr64(mnt, 3 - exp);
904 error = (
lsr64(mnt, 1 - exp) & 3) | !!(mnt & (
lsl64(1, 1 - exp) - 1));
907 if (!biased_exp && error) {
912 if ((rm ==
FPLIB_RN && (error == 3 ||
913 (error == 2 && (int_mant & 1)))) ||
946 return fp64_pack(sgn, biased_exp, int_mant);
952 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
958 int a_sgn, a_exp, b_sgn, b_exp;
959 uint16_t a_mnt, b_mnt;
961 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
962 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
971 return a == b || (!a_mnt && !b_mnt);
977 int a_sgn, a_exp, b_sgn, b_exp;
978 uint16_t a_mnt, b_mnt;
980 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
981 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
988 if (!a_mnt && !b_mnt)
993 return a_sgn ^ (a_exp > b_exp);
995 return a_sgn ^ (a_mnt > b_mnt);
1002 int a_sgn, a_exp, b_sgn, b_exp;
1003 uint16_t a_mnt, b_mnt;
1005 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1006 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1013 if (!a_mnt && !b_mnt)
1018 return a_sgn ^ (a_exp > b_exp);
1020 return a_sgn ^ (a_mnt > b_mnt);
1027 int a_sgn, a_exp, b_sgn, b_exp;
1028 uint16_t a_mnt, b_mnt;
1030 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1031 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1046 int a_sgn, a_exp, b_sgn, b_exp;
1047 uint32_t a_mnt, b_mnt;
1049 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1050 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1059 return a == b || (!a_mnt && !b_mnt);
1065 int a_sgn, a_exp, b_sgn, b_exp;
1066 uint32_t a_mnt, b_mnt;
1068 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1069 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1076 if (!a_mnt && !b_mnt)
1081 return a_sgn ^ (a_exp > b_exp);
1083 return a_sgn ^ (a_mnt > b_mnt);
1090 int a_sgn, a_exp, b_sgn, b_exp;
1091 uint32_t a_mnt, b_mnt;
1093 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1094 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1101 if (!a_mnt && !b_mnt)
1106 return a_sgn ^ (a_exp > b_exp);
1108 return a_sgn ^ (a_mnt > b_mnt);
1115 int a_sgn, a_exp, b_sgn, b_exp;
1116 uint32_t a_mnt, b_mnt;
1118 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1119 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1134 int a_sgn, a_exp, b_sgn, b_exp;
1135 uint64_t a_mnt, b_mnt;
1137 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1138 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1147 return a == b || (!a_mnt && !b_mnt);
1153 int a_sgn, a_exp, b_sgn, b_exp;
1154 uint64_t a_mnt, b_mnt;
1156 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1157 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1164 if (!a_mnt && !b_mnt)
1169 return a_sgn ^ (a_exp > b_exp);
1171 return a_sgn ^ (a_mnt > b_mnt);
1178 int a_sgn, a_exp, b_sgn, b_exp;
1179 uint64_t a_mnt, b_mnt;
1181 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1182 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1189 if (!a_mnt && !b_mnt)
1194 return a_sgn ^ (a_exp > b_exp);
1196 return a_sgn ^ (a_mnt > b_mnt);
1203 int a_sgn, a_exp, b_sgn, b_exp;
1204 uint64_t a_mnt, b_mnt;
1206 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1207 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1222 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1223 uint16_t a_mnt, b_mnt,
x, x_mnt;
1225 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1226 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1242 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1248 if (a_exp >= b_exp) {
1249 b_mnt = (
lsr16(b_mnt, a_exp - b_exp) |
1250 !!(b_mnt & (
lsl16(1, a_exp - b_exp) - 1)));
1253 a_mnt = (
lsr16(a_mnt, b_exp - a_exp) |
1254 !!(a_mnt & (
lsl16(1, b_exp - a_exp) - 1)));
1259 if (a_sgn == b_sgn) {
1260 x_mnt = a_mnt + b_mnt;
1261 }
else if (a_mnt >= b_mnt) {
1262 x_mnt = a_mnt - b_mnt;
1265 x_mnt = b_mnt - a_mnt;
1282 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1283 uint32_t a_mnt, b_mnt,
x, x_mnt;
1285 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1286 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1302 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1308 if (a_exp >= b_exp) {
1309 b_mnt = (
lsr32(b_mnt, a_exp - b_exp) |
1310 !!(b_mnt & (
lsl32(1, a_exp - b_exp) - 1)));
1313 a_mnt = (
lsr32(a_mnt, b_exp - a_exp) |
1314 !!(a_mnt & (
lsl32(1, b_exp - a_exp) - 1)));
1319 if (a_sgn == b_sgn) {
1320 x_mnt = a_mnt + b_mnt;
1321 }
else if (a_mnt >= b_mnt) {
1322 x_mnt = a_mnt - b_mnt;
1325 x_mnt = b_mnt - a_mnt;
1342 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1343 uint64_t a_mnt, b_mnt,
x, x_mnt;
1345 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1346 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1362 }
else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
1368 if (a_exp >= b_exp) {
1369 b_mnt = (
lsr64(b_mnt, a_exp - b_exp) |
1370 !!(b_mnt & (
lsl64(1, a_exp - b_exp) - 1)));
1373 a_mnt = (
lsr64(a_mnt, b_exp - a_exp) |
1374 !!(a_mnt & (
lsl64(1, b_exp - a_exp) - 1)));
1379 if (a_sgn == b_sgn) {
1380 x_mnt = a_mnt + b_mnt;
1381 }
else if (a_mnt >= b_mnt) {
1382 x_mnt = a_mnt - b_mnt;
1385 x_mnt = b_mnt - a_mnt;
1402 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1403 uint16_t a_mnt, b_mnt,
x;
1406 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1407 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1420 }
else if (!a_mnt || !b_mnt) {
1425 x_sgn = a_sgn ^ b_sgn;
1427 x_mnt = (uint32_t)a_mnt * b_mnt;
1433 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1439 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1440 uint32_t a_mnt, b_mnt,
x;
1443 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1444 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1457 }
else if (!a_mnt || !b_mnt) {
1462 x_sgn = a_sgn ^ b_sgn;
1464 x_mnt = (uint64_t)a_mnt * b_mnt;
1470 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1476 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1477 uint64_t a_mnt, b_mnt,
x;
1478 uint64_t x0_mnt, x1_mnt;
1480 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1481 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1494 }
else if (!a_mnt || !b_mnt) {
1499 x_sgn = a_sgn ^ b_sgn;
1501 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1505 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1507 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1512 int mode,
int *flags)
1514 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1515 uint16_t a_mnt, b_mnt, c_mnt,
x;
1516 uint32_t x_mnt, y_mnt;
1518 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1519 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1520 fp16_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1541 (a_sgn != (b_sgn ^ c_sgn)))) {
1549 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1557 y_sgn = b_sgn ^ c_sgn;
1558 y_exp = b_exp + c_exp -
FP16_EXP_BIAS + 2 * FP16_EXP_BITS + 1 - 3;
1559 y_mnt = (uint32_t)b_mnt * c_mnt << 3;
1565 if (x_exp >= y_exp) {
1566 y_mnt = (
lsr32(y_mnt, x_exp - y_exp) |
1567 !!(y_mnt & (
lsl32(1, x_exp - y_exp) - 1)));
1570 x_mnt = (
lsr32(x_mnt, y_exp - x_exp) |
1571 !!(x_mnt & (
lsl32(1, y_exp - x_exp) - 1)));
1574 if (x_sgn == y_sgn) {
1575 x_mnt = x_mnt + y_mnt;
1576 }
else if (x_mnt >= y_mnt) {
1577 x_mnt = x_mnt - y_mnt;
1580 x_mnt = y_mnt - x_mnt;
1590 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1592 return fp16_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1597 int mode,
int *flags)
1599 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1600 uint32_t a_mnt, b_mnt, c_mnt,
x;
1601 uint64_t x_mnt, y_mnt;
1603 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1604 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1605 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1626 (a_sgn != (b_sgn ^ c_sgn)))) {
1634 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1642 y_sgn = b_sgn ^ c_sgn;
1643 y_exp = b_exp + c_exp -
FP32_EXP_BIAS + 2 * FP32_EXP_BITS + 1 - 3;
1644 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1650 if (x_exp >= y_exp) {
1651 y_mnt = (
lsr64(y_mnt, x_exp - y_exp) |
1652 !!(y_mnt & (
lsl64(1, x_exp - y_exp) - 1)));
1655 x_mnt = (
lsr64(x_mnt, y_exp - x_exp) |
1656 !!(x_mnt & (
lsl64(1, y_exp - x_exp) - 1)));
1659 if (x_sgn == y_sgn) {
1660 x_mnt = x_mnt + y_mnt;
1661 }
else if (x_mnt >= y_mnt) {
1662 x_mnt = x_mnt - y_mnt;
1665 x_mnt = y_mnt - x_mnt;
1675 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1677 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1682 int mode,
int *flags)
1684 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1685 uint64_t a_mnt, b_mnt, c_mnt,
x;
1686 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1688 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1689 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1690 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1711 (a_sgn != (b_sgn ^ c_sgn)))) {
1719 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1728 y_sgn = b_sgn ^ c_sgn;
1729 y_exp = b_exp + c_exp -
FP64_EXP_BIAS + 2 * FP64_EXP_BITS + 1 - 3;
1730 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1731 if (!y0_mnt && !y1_mnt) {
1736 if (x_exp >= y_exp) {
1738 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1739 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1740 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1741 y0_mnt |= !!(t0 |
t1);
1745 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1746 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1747 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1748 x0_mnt |= !!(t0 |
t1);
1751 if (x_sgn == y_sgn) {
1752 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1753 }
else if (
cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1754 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1757 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1760 if (!x0_mnt && !x1_mnt) {
1767 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1769 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1775 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1776 uint16_t a_mnt, b_mnt,
x;
1779 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1780 fp16_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1787 (!a_mnt && !b_mnt)) {
1801 x_sgn = a_sgn ^ b_sgn;
1804 x_mnt |= (x_mnt * b_mnt !=
1809 x_mnt = x_mnt >> (
FP16_BITS - 1) | !!(uint16_t)(x_mnt << 1);
1811 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
1817 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1818 uint32_t a_mnt, b_mnt,
x;
1821 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1822 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1829 (!a_mnt && !b_mnt)) {
1843 x_sgn = a_sgn ^ b_sgn;
1846 x_mnt |= (x_mnt * b_mnt !=
1851 x_mnt = x_mnt >> (
FP32_BITS - 1) | !!(uint32_t)(x_mnt << 1);
1853 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1859 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp,
c;
1860 uint64_t a_mnt, b_mnt,
x, x_mnt, x0_mnt, x1_mnt;
1862 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1863 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1870 (!a_mnt && !b_mnt)) {
1885 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1886 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1887 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1888 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1889 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1890 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1893 x_sgn = a_sgn ^ b_sgn;
1895 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2);
1896 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1900 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1);
1901 c =
cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1908 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1940 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1957 b = b < -300 ? -300 :
b;
1958 b = b > 300 ? 300 :
b;
1974 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1991 b = b < -300 ? -300 :
b;
1992 b = b > 300 ? 300 :
b;
2008 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2025 b = b < -3000 ? -3000 :
b;
2026 b = b > 3000 ? 3000 :
b;
2039 int a_sgn, a_exp, x_sgn, x_exp;
2040 uint16_t a_mnt, x_mnt;
2043 fp16_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2066 x = ((uint32_t)a_mnt << 14) + ((uint32_t)a_mnt << 13) + ((uint32_t)5 << 28);
2069 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2072 x = (((uint32_t)a_mnt << 16) / (x >> 15) + (x >> 16)) << 15;
2075 x_exp = (a_exp + 27) >> 1;
2076 x_mnt = ((x - (1 << 18)) >> 19) + 1;
2077 t1 = (uint32_t)x_mnt * x_mnt;
2078 t0 = (uint32_t)a_mnt << 9;
2085 return fp16_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2091 int a_sgn, a_exp, x_sgn, x_exp;
2092 uint32_t a_mnt,
x, x_mnt;
2095 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2118 x = (a_mnt >> 2) + (a_mnt >> 3) + ((uint32_t)5 << 28);
2121 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2124 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
2127 x = ((((uint64_t)a_mnt << 32) /
x) >> 2) + (x >> 1);
2130 x_exp = (a_exp + 147) >> 1;
2131 x_mnt = ((x - (1 << 5)) >> 6) + 1;
2132 t1 = (uint64_t)x_mnt * x_mnt;
2133 t0 = (uint64_t)a_mnt << 19;
2140 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
2146 int a_sgn, a_exp, x_sgn, x_exp,
c;
2147 uint64_t a_mnt, x_mnt,
r, x0, x1;
2150 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
2173 x = (a_mnt >> 34) + (a_mnt >> 35) + ((uint32_t)5 << 28);
2176 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2179 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
2182 x = ((a_mnt /
x) >> 2) + (x >> 1);
2185 r = ((uint64_t)1 << 62) /
x;
2188 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
2189 lsr128(&x0, &x1, x0, x1, 31);
2192 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
2193 lsl128(&x0, &x1, x0, x1, 5);
2194 lsr128(&x0, &x1, x0, x1, 56);
2196 x0 = ((uint64_t)x << 31) + (x0 >> 1);
2199 x_exp = (a_exp + 1053) >> 1;
2201 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
2203 lsl128(&x0, &x1, x0, x1, 19);
2204 c =
cmp128(x0, x1, 0, a_mnt);
2210 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
2216 uint32_t
x = (uint32_t)fpscr;
2217 return (x >> 22 & 0xf) | (x >> 19 & 1 ?
FPLIB_FZ16 : 0);
2225 bool underflow =
false;
2242 if ((flags &
FPLIB_IXC) && !(underflow && fpscr.fz)) {
2424 int sgn1, exp1, sgn2, exp2, result;
2425 uint16_t mnt1, mnt2;
2427 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2428 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2436 if (op1 == op2 || (!mnt1 && !mnt2)) {
2438 }
else if (sgn1 != sgn2) {
2439 result = sgn1 ? 8 : 2;
2440 }
else if (exp1 != exp2) {
2441 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2443 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2458 int sgn1, exp1, sgn2, exp2, result;
2459 uint32_t mnt1, mnt2;
2461 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2462 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2470 if (op1 == op2 || (!mnt1 && !mnt2)) {
2472 }
else if (sgn1 != sgn2) {
2473 result = sgn1 ? 8 : 2;
2474 }
else if (exp1 != exp2) {
2475 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2477 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2492 int sgn1, exp1, sgn2, exp2, result;
2493 uint64_t mnt1, mnt2;
2495 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2496 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2504 if (op1 == op2 || (!mnt1 && !mnt2)) {
2506 }
else if (sgn1 != sgn2) {
2507 result = sgn1 ? 8 : 2;
2508 }
else if (exp1 != exp2) {
2509 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
2511 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
2635 bool alt_hp = fpscr.ahp;
2640 }
else if (fpscr.dn) {
2650 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2663 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2684 bool alt_hp = fpscr.ahp;
2689 }
else if (fpscr.dn) {
2699 result = ((uint16_t)sgn << (
FP16_BITS - 1) |
2712 rounding, (mode & 0xf) | alt_hp << 4, &flags);
2731 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2789 rounding, mode, &flags);
2808 fp16_unpack(&sgn, &exp, &mnt, op, mode & 0xf, &flags);
2875 fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
2885 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2895 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2937 static uint16_t coeff[32] = {
2972 coeff[op & ((1 << 5) - 1)]);
2979 static uint32_t coeff[64] = {
3046 coeff[op & ((1 << 6) - 1)]);
3053 static uint64_t coeff[64] = {
3120 coeff[op & ((1 << 6) - 1)]);
3183 int sgn1, exp1, sgn2, exp2;
3184 uint16_t mnt1, mnt2,
x, result;
3186 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3187 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3192 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3206 int sgn1, exp1, sgn2, exp2;
3207 uint32_t mnt1, mnt2,
x, result;
3209 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3210 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3215 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3229 int sgn1, exp1, sgn2, exp2;
3230 uint64_t mnt1, mnt2,
x, result;
3232 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3233 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3238 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
3251 return fplibMax<uint16_t>(op1, op2, fpscr);
3259 return fplibMax<uint32_t>(op1, op2, fpscr);
3267 return fplibMax<uint64_t>(op1, op2, fpscr);
3276 int sgn1, exp1, sgn2, exp2;
3277 uint16_t mnt1, mnt2,
x, result;
3279 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3280 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3285 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3299 int sgn1, exp1, sgn2, exp2;
3300 uint32_t mnt1, mnt2,
x, result;
3302 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3303 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3308 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3322 int sgn1, exp1, sgn2, exp2;
3323 uint64_t mnt1, mnt2,
x, result;
3325 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3326 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3331 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
3344 return fplibMin<uint16_t>(op1, op2, fpscr);
3352 return fplibMin<uint32_t>(op1, op2, fpscr);
3360 return fplibMin<uint64_t>(op1, op2, fpscr);
3399 int sgn1, exp1, sgn2, exp2;
3400 uint16_t mnt1, mnt2, result;
3402 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3403 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3412 }
else if (!mnt1 || !mnt2) {
3415 result =
fp16_mul(op1, op2, mode, &flags);
3430 int sgn1, exp1, sgn2, exp2;
3431 uint32_t mnt1, mnt2, result;
3433 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3434 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3443 }
else if (!mnt1 || !mnt2) {
3446 result =
fp32_mul(op1, op2, mode, &flags);
3461 int sgn1, exp1, sgn2, exp2;
3462 uint64_t mnt1, mnt2, result;
3464 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3465 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3474 }
else if (!mnt1 || !mnt2) {
3477 result =
fp64_mul(op1, op2, mode, &flags);
3508 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
3509 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
3510 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
3511 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
3512 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
3513 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
3514 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
3515 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
3516 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
3517 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
3518 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
3519 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
3520 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
3521 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
3522 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
3523 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
3533 uint16_t mnt, result;
3550 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3568 uint32_t mnt, result;
3585 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3603 uint64_t mnt, result;
3620 mnt = recip_sqrt_estimate[(~exp & 1) << 7 |
3637 int sgn1, exp1, sgn2, exp2;
3638 uint16_t mnt1, mnt2, result;
3640 op1 = fplibNeg<uint16_t>(op1);
3641 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3642 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3667 int sgn1, exp1, sgn2, exp2;
3668 uint32_t mnt1, mnt2, result;
3670 op1 = fplibNeg<uint32_t>(op1);
3671 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3672 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3697 int sgn1, exp1, sgn2, exp2;
3698 uint64_t mnt1, mnt2, result;
3700 op1 = fplibNeg<uint64_t>(op1);
3701 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3702 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3728 uint16_t mnt, result;
3740 bool overflow_to_inf =
false;
3743 overflow_to_inf =
true;
3746 overflow_to_inf = !sgn;
3749 overflow_to_inf = sgn;
3752 overflow_to_inf =
false;
3755 panic(
"Unrecognized FP rounding mode");
3766 uint16_t fraction = (((uint32_t)1 << 19) /
3767 (mnt >> (
FP16_BITS - 10) | 1) + 1) >> 1;
3769 if (result_exp == 0) {
3771 }
else if (result_exp == -1) {
3775 result =
fp16_pack(sgn, result_exp, fraction);
3790 uint32_t mnt, result;
3802 bool overflow_to_inf =
false;
3805 overflow_to_inf =
true;
3808 overflow_to_inf = !sgn;
3811 overflow_to_inf = sgn;
3814 overflow_to_inf =
false;
3817 panic(
"Unrecognized FP rounding mode");
3828 uint32_t fraction = (((uint32_t)1 << 19) /
3829 (mnt >> (
FP32_BITS - 10) | 1) + 1) >> 1;
3831 if (result_exp == 0) {
3833 }
else if (result_exp == -1) {
3837 result =
fp32_pack(sgn, result_exp, fraction);
3852 uint64_t mnt, result;
3864 bool overflow_to_inf =
false;
3867 overflow_to_inf =
true;
3870 overflow_to_inf = !sgn;
3873 overflow_to_inf = sgn;
3876 overflow_to_inf =
false;
3879 panic(
"Unrecognized FP rounding mode");
3890 uint64_t fraction = (((uint32_t)1 << 19) /
3891 (mnt >> (
FP64_BITS - 10) | 1) + 1) >> 1;
3893 if (result_exp == 0) {
3895 }
else if (result_exp == -1) {
3899 result =
fp64_pack(sgn, result_exp, fraction);
3913 int sgn1, exp1, sgn2, exp2;
3914 uint16_t mnt1, mnt2, result;
3916 op1 = fplibNeg<uint16_t>(op1);
3917 fp16_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3918 fp16_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3943 int sgn1, exp1, sgn2, exp2;
3944 uint32_t mnt1, mnt2, result;
3946 op1 = fplibNeg<uint32_t>(op1);
3947 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3948 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
3973 int sgn1, exp1, sgn2, exp2;
3974 uint64_t mnt1, mnt2, result;
3976 op1 = fplibNeg<uint64_t>(op1);
3977 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
3978 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
4004 uint16_t mnt, result;
4031 uint32_t mnt, result;
4058 uint64_t mnt, result;
4086 uint16_t mnt, result;
4098 }
else if (exp >= expint) {
4103 uint16_t
x = expint - exp >=
FP16_BITS ? 0 : mnt >> (expint - exp);
4104 int err = exp < expint -
FP16_BITS ? 1 :
4105 ((mnt << 1 >> (expint - exp - 1) & 3) |
4106 ((uint16_t)(mnt << 2 << (
FP16_BITS + exp - expint)) != 0));
4109 x += (err == 3 || (err == 2 && (x & 1)));
4123 panic(
"Unrecognized FP rounding mode");
4151 uint32_t mnt, result;
4163 }
else if (exp >= expint) {
4168 uint32_t
x = expint - exp >=
FP32_BITS ? 0 : mnt >> (expint - exp);
4169 int err = exp < expint -
FP32_BITS ? 1 :
4170 ((mnt << 1 >> (expint - exp - 1) & 3) |
4171 ((uint32_t)(mnt << 2 << (
FP32_BITS + exp - expint)) != 0));
4174 x += (err == 3 || (err == 2 && (x & 1)));
4188 panic(
"Unrecognized FP rounding mode");
4216 uint64_t mnt, result;
4228 }
else if (exp >= expint) {
4233 uint64_t
x = expint - exp >=
FP64_BITS ? 0 : mnt >> (expint - exp);
4234 int err = exp < expint -
FP64_BITS ? 1 :
4235 ((mnt << 1 >> (expint - exp - 1) & 3) |
4236 ((uint64_t)(mnt << 2 << (
FP64_BITS + exp - expint)) != 0));
4239 x += (err == 3 || (err == 2 && (x & 1)));
4253 panic(
"Unrecognized FP rounding mode");
4367 static uint16_t coeff[2][8] = {
4401 static uint32_t coeff[2][8] = {
4435 static uint64_t coeff[2][8] = {
4437 0x3ff0000000000000
ULL,
4438 0xbfc5555555555543
ULL,
4439 0x3f8111111110f30c
ULL,
4440 0xbf2a01a019b92fc6
ULL,
4441 0x3ec71de351f3d22b
ULL,
4442 0xbe5ae5e2b60f7b91
ULL,
4443 0x3de5d8408868552f
ULL,
4444 0x0000000000000000ULL
4447 0x3ff0000000000000
ULL,
4448 0xbfe0000000000000
ULL,
4449 0x3fa5555555555536
ULL,
4450 0xbf56c16c16c13a0b
ULL,
4451 0x3efa01a019b1e8d8
ULL,
4452 0xbe927e4f7282f468
ULL,
4453 0x3e21ee96d2641b13
ULL,
4454 0xbda8f76380fbb401ULL
4474 uint16_t result =
fp16_mul(op1, op1, mode, &flags);
4477 fp16_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4494 uint32_t result =
fp32_mul(op1, op1, mode, &flags);
4497 fp32_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4513 uint64_t result =
fp64_mul(op1, op1, mode, &flags);
4516 fp64_unpack(&sgn, &exp, &mnt, result, mode, &flags);
4527 static constexpr uint16_t fpOne =
4531 return op1 ^ ((op2 >> 1) << (
FP16_BITS - 1));
4538 static constexpr uint32_t fpOne =
4542 return op1 ^ ((op2 >> 1) << (
FP32_BITS - 1));
4549 static constexpr uint64_t fpOne =
4553 return op1 ^ ((op2 >> 1) << (
FP64_BITS - 1));
4566 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4570 err = (exp > expmax - 2 ? 0 :
4576 x += (err == 3 || (err == 2 && (x & 1)));
4590 panic(
"Unrecognized FP rounding mode");
4593 if (u ? sgn && x : x > (1
ULL << (FP64_BITS - 1)) - !sgn) {
4595 return ((uint64_t)!u << (FP64_BITS - 1)) - !sgn;
4602 return sgn ? -
x :
x;
4609 uint64_t
x =
FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4612 (uint64_t)-x <= (uint64_t)1 << (
FP32_BITS - 1))) {
4614 x = ((uint32_t)!u << (
FP32_BITS - 1)) - !sgn;
4623 uint64_t
x =
FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
4626 (uint64_t)-x <= (uint64_t)1 << (
FP16_BITS - 1))) {
4628 x = ((uint16_t)!u << (
FP16_BITS - 1)) - !sgn;
4640 uint16_t mnt, result;
4655 u, rounding, &flags);
4687 u, rounding, &flags);
4701 uint32_t mnt, result;
4716 u, rounding, &flags);
4743 result =
FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
4758 uint32_t sgn =
bits(op, 63);
4759 int32_t exp =
bits(op, 62, 52);
4760 uint64_t mnt =
bits(op, 51, 0);
4772 }
else if (exp == 0x7ff) {
4784 }
else if (mnt_shft >= 0) {
4785 result =
lsl64(mnt, mnt_shft);
4786 }
else if (mnt_shft < 0) {
4788 result =
lsr64(mnt, abs(mnt_shft));
4790 uint64_t max_result = (1UL << (
FP32_BITS - 1)) -!sgn;
4791 if ((exp - FP64_EXP_BIAS) > 31 || result > max_result) {
4798 result = sgn ? -result : result;
4800 if (sgn == 1 && result == 0)
4840 u, rounding, &flags);
4870 u, rounding, &flags);
4884 uint64_t mnt, result;
4896 result =
FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
4909 uint64_t x_mnt = x_sgn ? -
a :
a;
4921 return fp16_round(x_sgn, x_exp, x_mnt, mode, flags);
4929 uint64_t x_mnt = x_sgn ? -
a :
a;
4941 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
4949 uint64_t x_mnt = x_sgn ? -
a :
a;
4958 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
4968 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4980 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
4992 (
int)rounding | ((uint32_t)fpscr >> 22 & 12),
static uint32_t lsr32(uint32_t x, uint32_t shift)
#define panic(...)
This implements a cprintf based panic() function.
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibExpA(uint16_t op)
static uint16_t fp16_FPThree(int sgn)
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
static int fp32_is_signalling_NaN(int exp, uint32_t mnt)
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
static int fp64_is_quiet_NaN(int exp, uint64_t mnt)
bool fplibCompareGT(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t lsl16(uint16_t x, uint32_t shift)
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr)
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
uint16_t fplibDefaultNaN()
Foating-point value for default NaN.
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
static void set_fpscr0(FPSCR &fpscr, int flags)
uint16_t fplibNeg(uint16_t op)
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr)
uint32_t fplibFPToFixedJS(uint64_t op, FPSCR &fpscr, bool is64, uint8_t &nz)
Floating-point JS convert to a signed integer, with rounding to zero.
static int fp16_compare_un(uint16_t a, uint16_t b, int mode, int *flags)
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Floating-point library code, which will gradually replace vfp.hh.
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_scale(uint64_t a, int64_t b, int mode, int *flags)
static uint64_t fp64_FPThree(int sgn)
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
static uint32_t fp32_zero(int sgn)
static int fp16_is_signalling_NaN(int exp, uint16_t mnt)
bool fplibCompareEQ(uint16_t a, uint16_t b, FPSCR &fpscr)
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
static int fp64_is_signalling_NaN(int exp, uint64_t mnt)
static uint32_t fp32_defaultNaN()
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
void neg(sc_fxval &c, const sc_fxnum &a)
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
static uint32_t fp32_FPOnePointFive(int sgn)
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint16_t fp16_zero(int sgn)
static int fp16_compare_ge(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_scale(uint16_t a, int16_t b, int mode, int *flags)
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
static FPRounding FPCRRounding(FPSCR &fpscr)
static const uint8_t recip_sqrt_estimate[256]
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr)
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_defaultNaN()
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
static uint16_t fp16_max_normal(int sgn)
static uint16_t fp16_process_NaNs3(uint16_t a, uint16_t b, uint16_t c, int mode, int *flags)
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int modeConv(FPSCR fpscr)
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
static uint32_t fp32_FPTwo(int sgn)
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr)
static int fp32_is_quiet_NaN(int exp, uint32_t mnt)
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
uint16_t fplibInfinity(int sgn)
Floating-point value for +/- infinity.
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static int fp16_is_infinity(int exp, uint16_t mnt)
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_FPOnePointFive(int sgn)
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
#define ULL(N)
uint64_t constant
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
static int fp64_compare_un(uint64_t a, uint64_t b, int mode, int *flags)
uint16_t fplibAbs(uint16_t op)
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp16_compare_eq(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t fp16_sqrt(uint16_t a, int mode, int *flags)
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
bool fplibCompareGE(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t fp16_add(uint16_t a, uint16_t b, int neg, int mode, int *flags)
static int fp64_is_infinity(int exp, uint64_t mnt)
static void set_fpscr(FPSCR &fpscr, int flags)
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
static uint64_t fp64_zero(int sgn)
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2, FPSCR &fpscr)
static void fp16_minmaxnum(uint16_t *op1, uint16_t *op2, int sgn)
static uint16_t fp16_muladd(uint16_t a, uint16_t b, uint16_t c, int scale, int mode, int *flags)
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
static int fp64_is_NaN(int exp, uint64_t mnt)
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
static uint64_t fp64_infinity(int sgn)
static uint16_t fp16_infinity(int sgn)
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr)
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr)
static uint32_t fp32_max_normal(int sgn)
bool fplibCompareUN(uint16_t a, uint16_t b, FPSCR &fpscr)
static uint16_t fp16_process_NaNs(uint16_t a, uint16_t b, int mode, int *flags)
static uint16_t FPToFixed_16(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
static uint16_t fp16_repack(int sgn, int exp, uint16_t mnt)
static uint16_t fp16_process_NaN(uint16_t a, int mode, int *flags)
static uint64_t fp64_FPOnePointFive(int sgn)
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr)
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
static uint16_t fp16_FPTwo(int sgn)
static uint32_t fp32_scale(uint32_t a, int32_t b, int mode, int *flags)
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
static int fp32_compare_un(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_round(int sgn, int exp, uint16_t mnt, int mode, int *flags)
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
static int fp16_is_NaN(int exp, uint16_t mnt)
static uint32_t fp32_FPThree(int sgn)
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_is_infinity(int exp, uint32_t mnt)
static int fp16_is_quiet_NaN(int exp, uint16_t mnt)
static uint64_t lsr64(uint64_t x, uint32_t shift)
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2, FPSCR &fpscr)
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
static uint16_t fp16_defaultNaN()
static uint16_t fp16_div(uint16_t a, uint16_t b, int mode, int *flags)
static uint64_t lsl64(uint64_t x, uint32_t shift)
static uint16_t fp16_mul(uint16_t a, uint16_t b, int mode, int *flags)
static int fp16_compare_gt(uint16_t a, uint16_t b, int mode, int *flags)
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
static uint32_t fp32_infinity(int sgn)
static uint64_t fp64_FPTwo(int sgn)
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr)
static int fp32_is_NaN(int exp, uint32_t mnt)
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
static uint32_t lsl32(uint32_t x, uint32_t shift)
static uint16_t lsr16(uint16_t x, uint32_t shift)
static uint64_t fp64_max_normal(int sgn)