43 using namespace ArmISA;
109 std::stringstream
ss;
122 std::stringstream
ss;
137 std::stringstream
ss;
152 std::stringstream
ss;
168 std::stringstream
ss;
185 int roundingMode = fegetround();
208 bool underflow =
false;
223 if ((exceptions &
FeInexact) && !(underflow && flush) &&
mask.ixc) {
229 template <
class fpType>
233 int fpClass = std::fpclassify(
val);
235 if (fpClass == FP_NAN) {
236 const bool single = (
sizeof(
val) ==
sizeof(
float));
237 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
238 const bool nan = std::isnan(op1);
239 if (!nan || defaultNan) {
244 }
else if (fpClass == FP_SUBNORMAL && flush == 1) {
246 uint64_t bitMask = 0x1ULL << (
sizeof(fpType) * 8 - 1);
259 template <
class fpType>
261 fixDest(
bool flush,
bool defaultNan, fpType
val, fpType op1, fpType op2)
263 int fpClass = std::fpclassify(
val);
265 if (fpClass == FP_NAN) {
266 const bool single = (
sizeof(
val) ==
sizeof(
float));
267 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
268 const bool nan1 = std::isnan(op1);
269 const bool nan2 = std::isnan(op2);
270 const bool signal1 = nan1 && ((
fpToBits(op1) & qnan) != qnan);
271 const bool signal2 = nan2 && ((
fpToBits(op2) & qnan) != qnan);
272 if ((!nan1 && !nan2) || defaultNan) {
274 }
else if (signal1) {
276 }
else if (signal2) {
283 }
else if (fpClass == FP_SUBNORMAL && flush) {
285 uint64_t bitMask = 0x1ULL << (
sizeof(fpType) * 8 - 1);
295 float val,
float op1,
float op2);
298 double val,
double op1,
double op2);
300 template <
class fpType>
304 fpType mid =
fixDest(flush, defaultNan,
val, op1, op2);
305 const bool single = (
sizeof(fpType) ==
sizeof(
float));
306 const fpType junk = 0.0;
307 if ((single && (
val ==
bitsToFp(0x00800000, junk) ||
309 (!single && (
val ==
bitsToFp(0x0010000000000000ULL, junk) ||
312 __asm__ __volatile__(
"" :
"=m" (op1) :
"m" (op1));
315 __asm__ __volatile__(
"" :
"=m" (temp) :
"m" (temp));
324 __asm__ __volatile__(
"" ::
"m" (temp));
331 float val,
float op1,
float op2);
334 double val,
double op1,
double op2);
339 const float junk = 0.0;
341 if (std::isnan(
val)) {
343 uint32_t op1Bits =
bits(valBits, 50, 29) |
345 (
bits(valBits, 63) << 31);
348 float mid =
fixDest(fpscr.fz, fpscr.dn, (
float)
val, op1);
353 if (mid ==
bitsToFp(0x00800000, junk) ||
354 mid ==
bitsToFp(0x80800000, junk)) {
355 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
358 __asm__ __volatile__(
"" :
"=m" (temp) :
"m" (temp));
367 __asm__ __volatile__(
"" ::
"m" (temp));
375 const double junk = 0.0;
377 if (std::isnan(
val)) {
379 uint64_t op1Bits = ((uint64_t)
bits(valBits, 21, 0) << 29) |
381 ((uint64_t)
bits(valBits, 31) << 63);
384 double mid =
fixDest(fpscr.fz, fpscr.dn, (
double)
val, op1);
385 if (mid ==
bitsToFp(0x0010000000000000ULL, junk) ||
386 mid ==
bitsToFp(0x8010000000000000ULL, junk)) {
387 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
390 __asm__ __volatile__(
"" :
"=m" (temp) :
"m" (temp));
399 __asm__ __volatile__(
"" ::
"m" (temp));
404 static inline uint16_t
406 uint32_t
rMode,
bool ahp, uint64_t opBits,
bool isDouble)
420 sBitPos = eWidth + mWidth;
421 eHalfRange = (1 << (eWidth-1)) - 1;
424 bool neg =
bits(opBits, sBitPos);
425 uint32_t exponent =
bits(opBits, sBitPos-1, mWidth);
426 uint64_t oldMantissa =
bits(opBits, mWidth-1, 0);
427 uint32_t mantissa = oldMantissa >> (mWidth - 10);
429 uint64_t extra = oldMantissa &
mask(mWidth - 10);
430 if (exponent ==
mask(eWidth)) {
431 if (oldMantissa != 0) {
433 if (
bits(mantissa, 9) == 0) {
441 }
else if (defaultNan) {
447 mantissa |= (1 << 9);
459 }
else if (exponent == 0 && oldMantissa == 0) {
464 bool inexact = (extra != 0);
472 if (inexact || fpscr.ufe)
481 (extra == (1 << 9) &&
bits(mantissa, 0))))) {
486 if (mantissa == (1 << 10)) {
495 bool topOne =
bits(extra, mWidth - 10 - 1);
496 bool restZeros =
bits(extra, mWidth - 10 - 2, 0) == 0;
498 if (exponent <= (eHalfRange - 15)) {
500 mantissa |= (1 << 10);
501 while (mantissa && exponent <= (eHalfRange - 15)) {
502 restZeros = restZeros && !topOne;
503 topOne =
bits(mantissa, 0);
504 mantissa = mantissa >> 1;
507 if (topOne || !restZeros)
512 exponent -= (eHalfRange - 15);
515 if (exponent == 0 && (inexact || fpscr.ufe)) {
522 bool nonZero = topOne || !restZeros;
526 (!restZeros ||
bits(mantissa, 0)))) {
531 if (mantissa == (1 << 10)) {
538 if (exponent >= 0x20) {
546 if (exponent >= 0x1f) {
569 uint32_t result =
bits(mantissa, 9, 0);
592 static inline uint64_t
607 sBitPos = eWidth + mWidth;
608 eHalfRange = (1 << (eWidth-1)) - 1;
612 uint32_t exponent =
bits(
op, 14, 10);
613 uint64_t mantissa =
bits(
op, 9, 0);
618 exponent = exponent + (eHalfRange - 15) + 1;
619 while (mantissa < (1 << 10)) {
620 mantissa = mantissa << 1;
624 mantissa = mantissa << (mWidth - 10);
625 }
else if (exponent == 0x1f && !
ahp) {
627 exponent =
mask(eWidth);
630 mantissa = mantissa << (mWidth - 10);
631 if (
bits(mantissa, mWidth-1) == 0) {
634 mantissa |= (((uint64_t) 1) << (mWidth-1));
637 mantissa &= ~
mask(mWidth-1);
642 exponent = exponent + (eHalfRange - 15);
643 mantissa = mantissa << (mWidth - 10);
646 uint64_t result =
bits(mantissa, mWidth-1, 0);
649 result |= (((uint64_t) 1) << sBitPos);
681 else if (
width == 32)
683 else if (
width != 64)
686 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
688 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
699 else if (
width == 32)
701 else if (
width != 64)
705 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
707 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
719 else if (
width == 32)
721 else if (
width != 64)
725 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
727 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
738 else if (
width == 32)
740 else if (
width != 64)
744 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
746 __asm__ __volatile__(
"" :
"=m" (
scale) :
"m" (
scale));
758 q0 = (int64_t)(
a * 512.0);
759 r = 1.0 / sqrt(((
double)q0 + 0.5) / 512.0);
761 q1 = (int64_t)(
a * 256.0);
762 r = 1.0 / sqrt(((
double)q1 + 0.5) / 256.0);
764 s = (int64_t)(256.0 *
r + 0.5);
765 return (
double)
s / 256.0;
773 const uint32_t qnan = 0x7fc00000;
775 int fpClass = std::fpclassify(
op);
776 if (fpClass == FP_NAN) {
780 }
else if (fpClass == FP_ZERO) {
784 (0xFF << 23) | (0 << 0), junk);
785 }
else if (std::signbit(
op)) {
789 }
else if (fpClass == FP_INFINITE) {
794 if (
bits(opBits, 23)) {
795 scaled =
bitsToFp((0 << 0) | (
bits(opBits, 22, 0) << 29) |
796 (0x3fdULL << 52) | (
bits(opBits, 31) << 63),
799 scaled =
bitsToFp((0 << 0) | (
bits(opBits, 22, 0) << 29) |
800 (0x3feULL << 52) | (
bits(opBits, 31) << 63),
803 uint64_t resultExp = (380 -
bits(opBits, 30, 23)) / 2;
808 (
bits(resultExp, 7, 0) << 23) |
809 (
bits(estimate, 51, 29) << 0), junk);
816 if (
bits(
op, 31, 30) == 0) {
823 (
bits((uint64_t)
op, 30, 0) << 21) |
824 (0 << 0), (
double)0.0);
828 (
bits((uint64_t)
op, 29, 0) << 22) |
829 (0 << 0), (
double)0.0);
832 return (1 << 31) |
bits(estimate, 51, 21);
844 q = (int64_t)(
a * 512.0);
845 r = 1.0 / (((double)
q + 0.5) / 512.0);
846 s = (int64_t)(256.0 *
r + 0.5);
847 return (
double)
s / 256.0;
855 const uint32_t qnan = 0x7fc00000;
857 int fpClass = std::fpclassify(
op);
858 if (fpClass == FP_NAN) {
862 }
else if (fpClass == FP_INFINITE) {
863 return bitsToFp(std::signbit(
op) << 31, junk);
864 }
else if (fpClass == FP_ZERO) {
868 (0xFF << 23) | (0 << 0), junk);
869 }
else if (fabs(
op) >= pow(2.0, 126)) {
871 return bitsToFp(std::signbit(
op) << 31, junk);
875 scaled =
bitsToFp((0 << 0) | (
bits(opBits, 22, 0) << 29) |
876 (0x3feULL << 52) | (0ULL << 63),
878 uint64_t resultExp = 253 -
bits(opBits, 30, 23);
883 (
bits(resultExp, 7, 0) << 23) |
884 (
bits(estimate, 51, 29) << 0), junk);
897 (
bits((uint64_t)
op, 30, 0) << 21) |
898 (0 << 0), (
double)0.0);
900 return (1 << 31) |
bits(estimate, 51, 21);
908 new_fpscr.ahp = fpscr.ahp;
911 new_fpscr.fz16 = fpscr.fz16;
915 template <
class fpType>
918 fpType op1, fpType op2)
const
923 const bool single = (
sizeof(fpType) ==
sizeof(
float));
924 const uint64_t qnan =
925 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
926 const bool nan1 = std::isnan(op1);
927 const bool nan2 = std::isnan(op2);
928 const bool signal1 = nan1 && ((
fpToBits(op1) & qnan) != qnan);
929 const bool signal2 = nan2 && ((
fpToBits(op2) & qnan) != qnan);
933 }
else if (signal1) {
935 }
else if (signal2) {
942 if (signal1 || signal2) {
953 float op1,
float op2)
const;
956 double op1,
double op2)
const;
959 template <
class fpType>
962 fpType (*func)(fpType, fpType, fpType),
963 bool flush,
bool defaultNan, uint32_t
rMode)
const
965 const bool single = (
sizeof(fpType) ==
sizeof(
float));
971 __asm__ __volatile__ (
"" :
"=m" (op1),
"=m" (op2),
"=m" (op3),
"=m" (state)
972 :
"m" (op1),
"m" (op2),
"m" (op3),
"m" (state));
973 fpType dest = func(op1, op2, op3);
974 __asm__ __volatile__ (
"" :
"=m" (dest) :
"m" (dest));
976 int fpClass = std::fpclassify(dest);
978 if (fpClass == FP_NAN) {
979 const uint64_t qnan =
980 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
981 const bool nan1 = std::isnan(op1);
982 const bool nan2 = std::isnan(op2);
983 const bool nan3 = std::isnan(op3);
984 const bool signal1 = nan1 && ((
fpToBits(op1) & qnan) != qnan);
985 const bool signal2 = nan2 && ((
fpToBits(op2) & qnan) != qnan);
986 const bool signal3 = nan3 && ((
fpToBits(op3) & qnan) != qnan);
987 if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
989 }
else if (signal1) {
991 }
else if (signal2) {
993 }
else if (signal3) {
1005 (single && (dest ==
bitsToFp(0x00800000, junk) ||
1006 dest ==
bitsToFp(0x80800000, junk))) ||
1008 (dest ==
bitsToFp(0x0010000000000000ULL, junk) ||
1009 dest ==
bitsToFp(0x8010000000000000ULL, junk)))
1016 __asm__ __volatile__ (
"" :
"=m" (op1),
"=m" (op2),
"=m" (op3)
1017 :
"m" (op1),
"m" (op2),
"m" (op3));
1018 fpType temp = func(op1, op2, op2);
1019 __asm__ __volatile__ (
"" :
"=m" (temp) :
"m" (temp));
1030 float (*func)(
float,
float,
float),
1031 bool flush,
bool defaultNan, uint32_t
rMode)
const;
1033 double FpOp::ternaryOp(FPSCR &fpscr,
double op1,
double op2,
double op3,
1034 double (*func)(
double,
double,
double),
1035 bool flush,
bool defaultNan, uint32_t
rMode)
const;
1037 template <
class fpType>
1040 fpType (*func)(fpType, fpType),
1041 bool flush,
bool defaultNan, uint32_t
rMode)
const
1043 const bool single = (
sizeof(fpType) ==
sizeof(
float));
1049 __asm__ __volatile__ (
"" :
"=m" (op1),
"=m" (op2),
"=m" (state)
1050 :
"m" (op1),
"m" (op2),
"m" (state));
1051 fpType dest = func(op1, op2);
1052 __asm__ __volatile__ (
"" :
"=m" (dest) :
"m" (dest));
1055 if (std::isnan(dest)) {
1056 const uint64_t qnan =
1057 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1058 const bool nan1 = std::isnan(op1);
1059 const bool nan2 = std::isnan(op2);
1060 const bool signal1 = nan1 && ((
fpToBits(op1) & qnan) != qnan);
1061 const bool signal2 = nan2 && ((
fpToBits(op2) & qnan) != qnan);
1062 if ((!nan1 && !nan2) || (defaultNan == 1)) {
1064 }
else if (signal1) {
1066 }
else if (signal2) {
1076 (single && (dest ==
bitsToFp(0x00800000, junk) ||
1077 dest ==
bitsToFp(0x80800000, junk))) ||
1079 (dest ==
bitsToFp(0x0010000000000000ULL, junk) ||
1080 dest ==
bitsToFp(0x8010000000000000ULL, junk)))
1087 __asm__ __volatile__ (
"" :
"=m" (op1),
"=m" (op2)
1088 :
"m" (op1),
"m" (op2));
1089 fpType temp = func(op1, op2);
1090 __asm__ __volatile__ (
"" :
"=m" (temp) :
"m" (temp));
1101 float (*func)(
float,
float),
1102 bool flush,
bool defaultNan, uint32_t
rMode)
const;
1105 double (*func)(
double,
double),
1106 bool flush,
bool defaultNan, uint32_t
rMode)
const;
1108 template <
class fpType>
1111 bool flush, uint32_t
rMode)
const
1113 const bool single = (
sizeof(fpType) ==
sizeof(
float));
1119 __asm__ __volatile__ (
"" :
"=m" (op1),
"=m" (state)
1120 :
"m" (op1),
"m" (state));
1121 fpType dest = func(op1);
1122 __asm__ __volatile__ (
"" :
"=m" (dest) :
"m" (dest));
1125 if (std::isnan(dest)) {
1126 const uint64_t qnan =
1127 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1128 const bool nan = std::isnan(op1);
1129 if (!nan || fpscr.dn == 1) {
1137 (single && (dest ==
bitsToFp(0x00800000, junk) ||
1138 dest ==
bitsToFp(0x80800000, junk))) ||
1140 (dest ==
bitsToFp(0x0010000000000000ULL, junk) ||
1141 dest ==
bitsToFp(0x8010000000000000ULL, junk)))
1148 __asm__ __volatile__ (
"" :
"=m" (op1) :
"m" (op1));
1149 fpType temp = func(op1);
1150 __asm__ __volatile__ (
"" :
"=m" (temp) :
"m" (temp));
1160 float FpOp::unaryOp(FPSCR &fpscr,
float op1,
float (*func)(
float),
1161 bool flush, uint32_t
rMode)
const;
1163 double FpOp::unaryOp(FPSCR &fpscr,
double op1,
double (*func)(
double),
1164 bool flush, uint32_t
rMode)
const;
1172 unsigned offset = idx % 8;
1173 idx = (IntRegIndex)(idx -
offset);
1175 idx = (IntRegIndex)(idx + (
offset % 8));
1182 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1183 assert(!inScalarBank(dest));
1184 dest = addStride(dest,
stride);
1185 op1 = addStride(op1,
stride);
1186 if (!inScalarBank(op2)) {
1187 op2 = addStride(op2,
stride);
1194 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1195 assert(!inScalarBank(dest));
1196 dest = addStride(dest,
stride);
1197 if (!inScalarBank(op1)) {
1198 op1 = addStride(op1,
stride);
1205 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1206 assert(!inScalarBank(dest));
1207 dest = addStride(dest,
stride);