gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
vfp.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2013, 2019, 2024-2025 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "arch/arm/insts/vfp.hh"
39
40namespace gem5
41{
42
43using namespace ArmISA;
44
45/*
46 * The asm statements below are to keep gcc from reordering code. Otherwise
47 * the rounding mode might be set after the operation it was intended for, the
48 * exception bits read before it, etc.
49 */
50
51std::string
53 Addr pc, const loader::SymbolTable *symtab) const
54{
55 std::stringstream ss;
56 printMnemonic(ss, "", false);
58 ccprintf(ss, ", ");
60 ccprintf(ss, ", #%d", defCc);
61 ccprintf(ss, ", ");
63 return ss.str();
64}
65
66std::string
68 Addr pc, const loader::SymbolTable *symtab) const
69{
70 std::stringstream ss;
71 printMnemonic(ss, "", false);
73 ccprintf(ss, ", ");
75 ccprintf(ss, ", ");
77 ccprintf(ss, ", ");
79 return ss.str();
80}
81
82std::string
84 Addr pc, const loader::SymbolTable *symtab) const
85{
86 std::stringstream ss;
89 ss << ", ";
91 return ss.str();
92}
93
94std::string
96 Addr pc, const loader::SymbolTable *symtab) const
97{
98 std::stringstream ss;
101 ccprintf(ss, ", #%d", imm);
102 return ss.str();
103}
104
105std::string
107 Addr pc, const loader::SymbolTable *symtab) const
108{
109 std::stringstream ss;
112 ss << ", ";
114 ccprintf(ss, ", #%d", imm);
115 return ss.str();
116}
117
118std::string
120 Addr pc, const loader::SymbolTable *symtab) const
121{
122 std::stringstream ss;
125 ss << ", ";
127 ss << ", ";
129 return ss.str();
130}
131
132std::string
134 Addr pc, const loader::SymbolTable *symtab)
135 const
136{
137 std::stringstream ss;
138 printMnemonic(ss, "", /*withPred=*/false, /*withCond64=*/true, cond);
140 ss << ", ";
142 ss << ", ";
144 return ss.str();
145}
146
147std::string
149 Addr pc, const loader::SymbolTable *symtab) const
150{
151 std::stringstream ss;
154 ss << ", ";
156 ss << ", ";
158 ss << ", ";
160 return ss.str();
161}
162
163std::string
165 Addr pc, const loader::SymbolTable *symtab) const
166{
167 std::stringstream ss;
170 ss << ", ";
172 ss << ", ";
174 ccprintf(ss, ", #%d", imm);
175 return ss.str();
176}
177
178namespace ArmISA
179{
180
183{
184 int roundingMode = fegetround();
185 feclearexcept(FeAllExceptions);
186 switch (rMode) {
187 case VfpRoundNearest:
188 fesetround(FeRoundNearest);
189 break;
190 case VfpRoundUpward:
191 fesetround(FeRoundUpward);
192 break;
193 case VfpRoundDown:
194 fesetround(FeRoundDown);
195 break;
196 case VfpRoundZero:
197 fesetround(FeRoundZero);
198 break;
199 }
200 return roundingMode;
201}
202
203void
204finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
205{
206 int exceptions = fetestexcept(FeAllExceptions);
207 bool underflow = false;
208 if ((exceptions & FeInvalid) && mask.ioc) {
209 fpscr.ioc = 1;
210 }
211 if ((exceptions & FeDivByZero) && mask.dzc) {
212 fpscr.dzc = 1;
213 }
214 if ((exceptions & FeOverflow) && mask.ofc) {
215 fpscr.ofc = 1;
216 }
217 if (exceptions & FeUnderflow) {
218 underflow = true;
219 if (mask.ufc)
220 fpscr.ufc = 1;
221 }
222 if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
223 fpscr.ixc = 1;
224 }
225 fesetround(state);
226}
227
228template <class fpType>
229fpType
230fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
231{
232 int fpClass = std::fpclassify(val);
233 fpType junk = 0.0;
234 if (fpClass == FP_NAN) {
235 const bool single = (sizeof(val) == sizeof(float));
236 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
237 const bool nan = std::isnan(op1);
238 if (!nan || defaultNan) {
239 val = bitsToFp(qnan, junk);
240 } else if (nan) {
241 val = bitsToFp(fpToBits(op1) | qnan, junk);
242 }
243 } else if (fpClass == FP_SUBNORMAL && flush == 1) {
244 // Turn val into a zero with the correct sign;
245 uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);
246 val = bitsToFp(fpToBits(val) & bitMask, junk);
247 feclearexcept(FeInexact);
248 feraiseexcept(FeUnderflow);
249 }
250 return val;
251}
252
253template
254float fixDest<float>(bool flush, bool defaultNan, float val, float op1);
255template
256double fixDest<double>(bool flush, bool defaultNan, double val, double op1);
257
258template <class fpType>
259fpType
260fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
261{
262 int fpClass = std::fpclassify(val);
263 fpType junk = 0.0;
264 if (fpClass == FP_NAN) {
265 const bool single = (sizeof(val) == sizeof(float));
266 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
267 const bool nan1 = std::isnan(op1);
268 const bool nan2 = std::isnan(op2);
269 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
270 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
271 if ((!nan1 && !nan2) || defaultNan) {
272 val = bitsToFp(qnan, junk);
273 } else if (signal1) {
274 val = bitsToFp(fpToBits(op1) | qnan, junk);
275 } else if (signal2) {
276 val = bitsToFp(fpToBits(op2) | qnan, junk);
277 } else if (nan1) {
278 val = op1;
279 } else if (nan2) {
280 val = op2;
281 }
282 } else if (fpClass == FP_SUBNORMAL && flush) {
283 // Turn val into a zero with the correct sign;
284 uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);
285 val = bitsToFp(fpToBits(val) & bitMask, junk);
286 feclearexcept(FeInexact);
287 feraiseexcept(FeUnderflow);
288 }
289 return val;
290}
291
292template
293float fixDest<float>(bool flush, bool defaultNan,
294 float val, float op1, float op2);
295template
296double fixDest<double>(bool flush, bool defaultNan,
297 double val, double op1, double op2);
298
299template <class fpType>
300fpType
301fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
302{
303 fpType mid = fixDest(flush, defaultNan, val, op1, op2);
304 const bool single = (sizeof(fpType) == sizeof(float));
305 const fpType junk = 0.0;
306 if ((single && (val == bitsToFp(0x00800000, junk) ||
307 val == bitsToFp(0x80800000, junk))) ||
308 (!single && (val == bitsToFp(0x0010000000000000ULL, junk) ||
309 val == bitsToFp(0x8010000000000000ULL, junk)))
310 ) {
311 __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
312 fesetround(FeRoundZero);
313 fpType temp = 0.0;
314 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
315 temp = op1 / op2;
316 if (flushToZero(temp)) {
317 feraiseexcept(FeUnderflow);
318 if (flush) {
319 feclearexcept(FeInexact);
320 mid = temp;
321 }
322 }
323 __asm__ __volatile__("" :: "m" (temp));
324 }
325 return mid;
326}
327
328template
329float fixDivDest<float>(bool flush, bool defaultNan,
330 float val, float op1, float op2);
331template
332double fixDivDest<double>(bool flush, bool defaultNan,
333 double val, double op1, double op2);
334
335float
336fixFpDFpSDest(FPSCR fpscr, double val)
337{
338 const float junk = 0.0;
339 float op1 = 0.0;
340 if (std::isnan(val)) {
341 uint64_t valBits = fpToBits(val);
342 uint32_t op1Bits = bits(valBits, 50, 29) |
343 (mask(9) << 22) |
344 (bits(valBits, 63) << 31);
345 op1 = bitsToFp(op1Bits, junk);
346 }
347 float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);
348 if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
350 feclearexcept(FeInexact);
351 }
352 if (mid == bitsToFp(0x00800000, junk) ||
353 mid == bitsToFp(0x80800000, junk)) {
354 __asm__ __volatile__("" : "=m" (val) : "m" (val));
355 fesetround(FeRoundZero);
356 float temp = 0.0;
357 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
358 temp = val;
359 if (flushToZero(temp)) {
360 feraiseexcept(FeUnderflow);
361 if (fpscr.fz) {
362 feclearexcept(FeInexact);
363 mid = temp;
364 }
365 }
366 __asm__ __volatile__("" :: "m" (temp));
367 }
368 return mid;
369}
370
371double
372fixFpSFpDDest(FPSCR fpscr, float val)
373{
374 const double junk = 0.0;
375 double op1 = 0.0;
376 if (std::isnan(val)) {
377 uint32_t valBits = fpToBits(val);
378 uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |
379 (mask(12) << 51) |
380 ((uint64_t)bits(valBits, 31) << 63);
381 op1 = bitsToFp(op1Bits, junk);
382 }
383 double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);
384 if (mid == bitsToFp(0x0010000000000000ULL, junk) ||
385 mid == bitsToFp(0x8010000000000000ULL, junk)) {
386 __asm__ __volatile__("" : "=m" (val) : "m" (val));
387 fesetround(FeRoundZero);
388 double temp = 0.0;
389 __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
390 temp = val;
391 if (flushToZero(temp)) {
392 feraiseexcept(FeUnderflow);
393 if (fpscr.fz) {
394 feclearexcept(FeInexact);
395 mid = temp;
396 }
397 }
398 __asm__ __volatile__("" :: "m" (temp));
399 }
400 return mid;
401}
402
403static inline uint16_t
404vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
405 uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
406{
407 uint32_t mWidth;
408 uint32_t eWidth;
409 uint32_t eHalfRange;
410 uint32_t sBitPos;
411
412 if (isDouble) {
413 mWidth = 52;
414 eWidth = 11;
415 } else {
416 mWidth = 23;
417 eWidth = 8;
418 }
419 sBitPos = eWidth + mWidth;
420 eHalfRange = (1 << (eWidth-1)) - 1;
421
422 // Extract the operand.
423 bool neg = bits(opBits, sBitPos);
424 uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
425 uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
426 uint32_t mantissa = oldMantissa >> (mWidth - 10);
427 // Do the conversion.
428 uint64_t extra = oldMantissa & mask(mWidth - 10);
429 if (exponent == mask(eWidth)) {
430 if (oldMantissa != 0) {
431 // Nans.
432 if (bits(mantissa, 9) == 0) {
433 // Signalling nan.
434 fpscr.ioc = 1;
435 }
436 if (ahp) {
437 mantissa = 0;
438 exponent = 0;
439 fpscr.ioc = 1;
440 } else if (defaultNan) {
441 mantissa = (1 << 9);
442 exponent = 0x1f;
443 neg = false;
444 } else {
445 exponent = 0x1f;
446 mantissa |= (1 << 9);
447 }
448 } else {
449 // Infinities.
450 exponent = 0x1F;
451 if (ahp) {
452 fpscr.ioc = 1;
453 mantissa = 0x3ff;
454 } else {
455 mantissa = 0;
456 }
457 }
458 } else if (exponent == 0 && oldMantissa == 0) {
459 // Zero, don't need to do anything.
460 } else {
461 // Normalized or denormalized numbers.
462
463 bool inexact = (extra != 0);
464
465 if (exponent == 0) {
466 // Denormalized.
467 // If flush to zero is on, this shouldn't happen.
468 assert(!flush);
469
470 // Check for underflow
471 if (inexact || fpscr.ufe)
472 fpscr.ufc = 1;
473
474 // Handle rounding.
475 unsigned mode = rMode;
476 if ((mode == VfpRoundUpward && !neg && extra) ||
477 (mode == VfpRoundDown && neg && extra) ||
478 (mode == VfpRoundNearest &&
479 (extra > (1 << 9) ||
480 (extra == (1 << 9) && bits(mantissa, 0))))) {
481 mantissa++;
482 }
483
484 // See if the number became normalized after rounding.
485 if (mantissa == (1 << 10)) {
486 mantissa = 0;
487 exponent = 1;
488 }
489 } else {
490 // Normalized.
491
492 // We need to track the dropped bits differently since
493 // more can be dropped by denormalizing.
494 bool topOne = bits(extra, mWidth - 10 - 1);
495 bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
496
497 if (exponent <= (eHalfRange - 15)) {
498 // The result is too small. Denormalize.
499 mantissa |= (1 << 10);
500 while (mantissa && exponent <= (eHalfRange - 15)) {
501 restZeros = restZeros && !topOne;
502 topOne = bits(mantissa, 0);
503 mantissa = mantissa >> 1;
504 exponent++;
505 }
506 if (topOne || !restZeros)
507 inexact = true;
508 exponent = 0;
509 } else {
510 // Change bias.
511 exponent -= (eHalfRange - 15);
512 }
513
514 if (exponent == 0 && (inexact || fpscr.ufe)) {
515 // Underflow
516 fpscr.ufc = 1;
517 }
518
519 // Handle rounding.
520 unsigned mode = rMode;
521 bool nonZero = topOne || !restZeros;
522 if ((mode == VfpRoundUpward && !neg && nonZero) ||
523 (mode == VfpRoundDown && neg && nonZero) ||
524 (mode == VfpRoundNearest && topOne &&
525 (!restZeros || bits(mantissa, 0)))) {
526 mantissa++;
527 }
528
529 // See if we rounded up and need to bump the exponent.
530 if (mantissa == (1 << 10)) {
531 mantissa = 0;
532 exponent++;
533 }
534
535 // Deal with overflow
536 if (ahp) {
537 if (exponent >= 0x20) {
538 exponent = 0x1f;
539 mantissa = 0x3ff;
540 fpscr.ioc = 1;
541 // Supress inexact exception.
542 inexact = false;
543 }
544 } else {
545 if (exponent >= 0x1f) {
546 if ((mode == VfpRoundNearest) ||
547 (mode == VfpRoundUpward && !neg) ||
548 (mode == VfpRoundDown && neg)) {
549 // Overflow to infinity.
550 exponent = 0x1f;
551 mantissa = 0;
552 } else {
553 // Overflow to max normal.
554 exponent = 0x1e;
555 mantissa = 0x3ff;
556 }
557 fpscr.ofc = 1;
558 inexact = true;
559 }
560 }
561 }
562
563 if (inexact) {
564 fpscr.ixc = 1;
565 }
566 }
567 // Reassemble and install the result.
568 uint32_t result = bits(mantissa, 9, 0);
569 replaceBits(result, 14, 10, exponent);
570 if (neg)
571 result |= (1 << 15);
572 return result;
573}
574
575uint16_t
576vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
577 uint32_t rMode, bool ahp, float op)
578{
579 uint64_t opBits = fpToBits(op);
580 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
581}
582
583uint16_t
584vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
585 uint32_t rMode, bool ahp, double op)
586{
587 uint64_t opBits = fpToBits(op);
588 return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
589}
590
591static inline uint64_t
592vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
593{
594 uint32_t mWidth;
595 uint32_t eWidth;
596 uint32_t eHalfRange;
597 uint32_t sBitPos;
598
599 if (isDouble) {
600 mWidth = 52;
601 eWidth = 11;
602 } else {
603 mWidth = 23;
604 eWidth = 8;
605 }
606 sBitPos = eWidth + mWidth;
607 eHalfRange = (1 << (eWidth-1)) - 1;
608
609 // Extract the bitfields.
610 bool neg = bits(op, 15);
611 uint32_t exponent = bits(op, 14, 10);
612 uint64_t mantissa = bits(op, 9, 0);
613 // Do the conversion.
614 if (exponent == 0) {
615 if (mantissa != 0) {
616 // Normalize the value.
617 exponent = exponent + (eHalfRange - 15) + 1;
618 while (mantissa < (1 << 10)) {
619 mantissa = mantissa << 1;
620 exponent--;
621 }
622 }
623 mantissa = mantissa << (mWidth - 10);
624 } else if (exponent == 0x1f && !ahp) {
625 // Infinities and nans.
626 exponent = mask(eWidth);
627 if (mantissa != 0) {
628 // Nans.
629 mantissa = mantissa << (mWidth - 10);
630 if (bits(mantissa, mWidth-1) == 0) {
631 // Signalling nan.
632 fpscr.ioc = 1;
633 mantissa |= (((uint64_t) 1) << (mWidth-1));
634 }
635 if (defaultNan) {
636 mantissa &= ~mask(mWidth-1);
637 neg = false;
638 }
639 }
640 } else {
641 exponent = exponent + (eHalfRange - 15);
642 mantissa = mantissa << (mWidth - 10);
643 }
644 // Reassemble the result.
645 uint64_t result = bits(mantissa, mWidth-1, 0);
646 replaceBits(result, sBitPos-1, mWidth, exponent);
647 if (neg) {
648 result |= (((uint64_t) 1) << sBitPos);
649 }
650 return result;
651}
652
653double
654vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
655{
656 double junk = 0.0;
657 uint64_t result;
658
659 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
660 return bitsToFp(result, junk);
661}
662
663float
664vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
665{
666 float junk = 0.0;
667 uint64_t result;
668
669 result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
670 return bitsToFp(result, junk);
671}
672
673float
674vfpUFixedToFpS(bool flush, bool defaultNan,
675 uint64_t val, uint8_t width, uint8_t imm)
676{
677 fesetround(FeRoundNearest);
678 if (width == 16)
679 val = (uint16_t)val;
680 else if (width == 32)
681 val = (uint32_t)val;
682 else if (width != 64)
683 panic("Unsupported width %d", width);
684 float scale = powf(2.0, imm);
685 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
686 feclearexcept(FeAllExceptions);
687 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
688 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
689}
690
691float
692vfpSFixedToFpS(bool flush, bool defaultNan,
693 int64_t val, uint8_t width, uint8_t imm)
694{
695 fesetround(FeRoundNearest);
696 if (width == 16)
697 val = szext<16>(val);
698 else if (width == 32)
699 val = szext<32>(val);
700 else if (width != 64)
701 panic("Unsupported width %d", width);
702
703 float scale = powf(2.0, imm);
704 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
705 feclearexcept(FeAllExceptions);
706 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
707 return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
708}
709
710
711double
712vfpUFixedToFpD(bool flush, bool defaultNan,
713 uint64_t val, uint8_t width, uint8_t imm)
714{
715 fesetround(FeRoundNearest);
716 if (width == 16)
717 val = (uint16_t)val;
718 else if (width == 32)
719 val = (uint32_t)val;
720 else if (width != 64)
721 panic("Unsupported width %d", width);
722
723 double scale = pow(2.0, imm);
724 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
725 feclearexcept(FeAllExceptions);
726 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
727 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
728}
729
730double
731vfpSFixedToFpD(bool flush, bool defaultNan,
732 int64_t val, uint8_t width, uint8_t imm)
733{
734 fesetround(FeRoundNearest);
735 if (width == 16)
736 val = szext<16>(val);
737 else if (width == 32)
738 val = szext<32>(val);
739 else if (width != 64)
740 panic("Unsupported width %d", width);
741
742 double scale = pow(2.0, imm);
743 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
744 feclearexcept(FeAllExceptions);
745 __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
746 return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
747}
748
749// This function implements a magic formula taken from the architecture
750// reference manual. It was originally called recip_sqrt_estimate.
751static double
753{
754 int64_t q0, q1, s;
755 double r;
756 if (a < 0.5) {
757 q0 = (int64_t)(a * 512.0);
758 r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);
759 } else {
760 q1 = (int64_t)(a * 256.0);
761 r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);
762 }
763 s = (int64_t)(256.0 * r + 0.5);
764 return (double)s / 256.0;
765}
766
767// This function is only intended for use in Neon instructions because
768// it ignores certain bits in the FPSCR.
769float
770fprSqrtEstimate(FPSCR &fpscr, float op)
771{
772 const uint32_t qnan = 0x7fc00000;
773 float junk = 0.0;
774 int fpClass = std::fpclassify(op);
775 if (fpClass == FP_NAN) {
776 if ((fpToBits(op) & qnan) != qnan)
777 fpscr.ioc = 1;
778 return bitsToFp(qnan, junk);
779 } else if (fpClass == FP_ZERO) {
780 fpscr.dzc = 1;
781 // Return infinity with the same sign as the operand.
782 return bitsToFp((std::signbit(op) << 31) |
783 (0xFF << 23) | (0 << 0), junk);
784 } else if (std::signbit(op)) {
785 // Set invalid op bit.
786 fpscr.ioc = 1;
787 return bitsToFp(qnan, junk);
788 } else if (fpClass == FP_INFINITE) {
789 return 0.0;
790 } else {
791 uint64_t opBits = fpToBits(op);
792 double scaled;
793 if (bits(opBits, 23)) {
794 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
795 (0x3fdULL << 52) | (bits(opBits, 31) << 63),
796 (double)0.0);
797 } else {
798 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
799 (0x3feULL << 52) | (bits(opBits, 31) << 63),
800 (double)0.0);
801 }
802 uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;
803
804 uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
805
806 return bitsToFp((bits(estimate, 63) << 31) |
807 (bits(resultExp, 7, 0) << 23) |
808 (bits(estimate, 51, 29) << 0), junk);
809 }
810}
811
812uint16_t
813fprSqrtEstimateFpH(FPSCR &fpscr, uint16_t op)
814{
815 const uint16_t qnan = 0x7e00;
816 bool sign = bits(op, 15);
817 int fpClass = fpclassifyFpH(op);
818 if (fpClass == FP_NAN) {
819 if ((op & qnan) != qnan)
820 fpscr.ioc = 1;
821 return qnan;
822 } else if ((fpscr.fz16 && fpClass == FP_SUBNORMAL)
823 || fpClass == FP_ZERO) {
824 fpscr.dzc = 1;
825 // Return infinity with the same sign as the operand.
826 return (sign << 15) | (0x1F << 10) | (0 << 0);
827 } else if (sign) {
828 // Set invalid op bit.
829 fpscr.ioc = 1;
830 return qnan;
831 } else if (fpClass == FP_INFINITE) {
832 return 0;
833 } else {
834 uint64_t opBits = op;
835 uint64_t fraction = bits(opBits, 9, 0) << 42;
836 int16_t exp = bits(opBits, 14, 10);
837
838 if (exp == 0) {
839 while (bits(fraction, 51) == 0) {
840 fraction = bits(fraction, 50, 0) << 1;
841 exp = exp - 1;
842 }
843 fraction = bits(fraction, 50, 0) << 1;
844 }
845
846 // scaled input value to the range of [0.25, 0.5) and [0.5, 1).
847 double scaled;
848 if (bits(exp, 0)) {
849 scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 42) << 42) |
850 (0x3fdULL << 52) | (0ULL << 63),
851 (double)0.0);
852 } else {
853 scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 41) << 41) |
854 (0x3feULL << 52) | (0ULL << 63),
855 (double)0.0);
856 }
857 uint64_t resultExp = (44 - exp) / 2;
858
859 uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
860
861 // if flush-to-zero, flush denormal.
862 if (fpscr.fz16) {
863 if (resultExp == 0) {
864 return 0;
865 }
866 }
867
868 return (bits(resultExp, 4, 0) << 10) | (bits(estimate, 51, 44) << 2);
869 }
870}
871
872uint32_t
874{
875 if (bits(op, 31, 30) == 0) {
876 return -1;
877 } else {
878 double dpOp;
879 if (bits(op, 31)) {
880 dpOp = bitsToFp((0ULL << 63) |
881 (0x3feULL << 52) |
882 (bits((uint64_t)op, 30, 0) << 21) |
883 (0 << 0), (double)0.0);
884 } else {
885 dpOp = bitsToFp((0ULL << 63) |
886 (0x3fdULL << 52) |
887 (bits((uint64_t)op, 29, 0) << 22) |
888 (0 << 0), (double)0.0);
889 }
890 uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));
891 return (1 << 31) | bits(estimate, 51, 21);
892 }
893}
894
895// This function implements a magic formula taken from the architecture
896// reference manual. It was originally called recip_estimate.
897
898static double
900{
901 int64_t q, s;
902 double r;
903 q = (int64_t)(a * 512.0);
904 r = 1.0 / (((double)q + 0.5) / 512.0);
905 s = (int64_t)(256.0 * r + 0.5);
906 return (double)s / 256.0;
907}
908
909// This function is only intended for use in Neon instructions because
910// it ignores certain bits in the FPSCR.
911float
912fpRecipEstimate(FPSCR &fpscr, float op)
913{
914 const uint32_t qnan = 0x7fc00000;
915 float junk = 0.0;
916 int fpClass = std::fpclassify(op);
917 if (fpClass == FP_NAN) {
918 if ((fpToBits(op) & qnan) != qnan)
919 fpscr.ioc = 1;
920 return bitsToFp(qnan, junk);
921 } else if (fpClass == FP_INFINITE) {
922 return bitsToFp(std::signbit(op) << 31, junk);
923 } else if (fpClass == FP_ZERO) {
924 fpscr.dzc = 1;
925 // Return infinity with the same sign as the operand.
926 return bitsToFp((std::signbit(op) << 31) |
927 (0xFF << 23) | (0 << 0), junk);
928 } else if (fabs(op) >= pow(2.0, 126)) {
929 fpscr.ufc = 1;
930 return bitsToFp(std::signbit(op) << 31, junk);
931 } else {
932 uint64_t opBits = fpToBits(op);
933 double scaled;
934 scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
935 (0x3feULL << 52) | (0ULL << 63),
936 (double)0.0);
937 uint64_t resultExp = 253 - bits(opBits, 30, 23);
938
939 uint64_t estimate = fpToBits(recipEstimate(scaled));
940
941 return bitsToFp((bits(opBits, 31) << 31) |
942 (bits(resultExp, 7, 0) << 23) |
943 (bits(estimate, 51, 29) << 0), junk);
944 }
945}
946
947uint16_t
948fpRecipEstimateFpH(FPSCR &fpscr, uint16_t op)
949{
950 const uint16_t qnan = 0x7e00;
951 bool sign = bits(op, 15);
952 int fpClass = fpclassifyFpH(op);
953 if (fpClass == FP_NAN) {
954 if ((op & qnan) != qnan)
955 fpscr.ioc = 1;
956 return qnan;
957 } else if (fpClass == FP_INFINITE) {
958 return sign << 15;
959 } else if ((fpscr.fz16 && fpClass == FP_SUBNORMAL)
960 || fpClass == FP_ZERO) {
961 fpscr.dzc = 1;
962 // Return infinity with the same sign as the operand.
963 return (sign << 15) | (0x1F << 10) | (0 << 0);
964 } else if (bits(op, 14, 8) == 0) {
965 fpscr.ofc = 1;
966 fpscr.ixc = 1;
967 return (sign << 15) | (0x1F << 10) | (0 << 0);
968 } else if (fpscr.fz16 && bits(op, 14, 10) >= 29) {
969 fpscr.ufc = 1;
970 return sign << 15;
971 } else {
972 uint64_t opBits = op;
973 uint64_t fraction = bits(opBits, 9, 0) << 42;
974 int16_t exp = bits(opBits, 14, 10);
975
976 if (exp == 0) {
977 if (bits(fraction, 51) == 0) {
978 exp = -1;
979 fraction = bits(fraction, 49, 0) << 2;
980 } else {
981 fraction = bits(fraction, 50, 0) << 1;
982 }
983 }
984
985 // scaled input value to the range of [0.5, 1)
986 double scaled;
987 scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 44) << 44) |
988 (0x3feULL << 52) | (0ULL << 63),
989 (double)0.0);
990 uint64_t resultExp = 29 - exp;
991
992 uint64_t estimate = fpToBits(recipEstimate(scaled));
993 fraction = bits(estimate, 51, 0);
994
995 if (resultExp == 0) {
996 fraction = (1ULL << 51) | bits(fraction, 51, 1);
997 } else if (resultExp == -1) {
998 fraction = (1ULL << 50) | bits(fraction, 51, 2);
999 resultExp = 0;
1000 }
1001
1002 // if flush-to-zero, flush denormal.
1003 if (fpscr.fz16) {
1004 if (resultExp == 0) {
1005 return 0;
1006 }
1007 }
1008
1009 return (sign << 15) | (bits(resultExp, 4, 0) << 10) |
1010 (bits(fraction, 51, 42) << 0);
1011 }
1012}
1013
1014uint32_t
1016{
1017 if (bits(op, 31) == 0) {
1018 return -1;
1019 } else {
1020 double dpOp;
1021 dpOp = bitsToFp((0ULL << 63) |
1022 (0x3feULL << 52) |
1023 (bits((uint64_t)op, 30, 0) << 21) |
1024 (0 << 0), (double)0.0);
1025 uint64_t estimate = fpToBits(recipEstimate(dpOp));
1026 return (1 << 31) | bits(estimate, 51, 21);
1027 }
1028}
1029
1030FPSCR
1031fpStandardFPSCRValue(const FPSCR &fpscr)
1032{
1033 FPSCR new_fpscr(0);
1034 new_fpscr.ahp = fpscr.ahp;
1035 new_fpscr.dn = 1;
1036 new_fpscr.fz = 1;
1037 new_fpscr.fz16 = fpscr.fz16;
1038 return new_fpscr;
1039};
1040
1041template <class fpType>
1042fpType
1043FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
1044 fpType op1, fpType op2) const
1045{
1046 done = true;
1047 fpType junk = 0.0;
1048 fpType dest = 0.0;
1049 const bool single = (sizeof(fpType) == sizeof(float));
1050 const uint64_t qnan =
1051 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1052 const bool nan1 = std::isnan(op1);
1053 const bool nan2 = std::isnan(op2);
1054 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1055 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1056 if (nan1 || nan2) {
1057 if (defaultNan) {
1058 dest = bitsToFp(qnan, junk);
1059 } else if (signal1) {
1060 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1061 } else if (signal2) {
1062 dest = bitsToFp(fpToBits(op2) | qnan, junk);
1063 } else if (nan1) {
1064 dest = op1;
1065 } else if (nan2) {
1066 dest = op2;
1067 }
1068 if (signal1 || signal2) {
1069 fpscr.ioc = 1;
1070 }
1071 } else {
1072 done = false;
1073 }
1074 return dest;
1075}
1076
1077template
1078float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
1079 float op1, float op2) const;
1080template
1081double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
1082 double op1, double op2) const;
1083
1084// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
1085template <class fpType>
1086fpType
1087FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
1088 fpType (*func)(fpType, fpType, fpType),
1089 bool flush, bool defaultNan, uint32_t rMode) const
1090{
1091 const bool single = (sizeof(fpType) == sizeof(float));
1092 fpType junk = 0.0;
1093
1094 if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
1095 fpscr.idc = 1;
1097 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
1098 : "m" (op1), "m" (op2), "m" (op3), "m" (state));
1099 fpType dest = func(op1, op2, op3);
1100 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1101
1102 int fpClass = std::fpclassify(dest);
1103 // Get NAN behavior right. This varies between x86 and ARM.
1104 if (fpClass == FP_NAN) {
1105 const uint64_t qnan =
1106 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1107 const bool nan1 = std::isnan(op1);
1108 const bool nan2 = std::isnan(op2);
1109 const bool nan3 = std::isnan(op3);
1110 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1111 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1112 const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
1113 if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
1114 dest = bitsToFp(qnan, junk);
1115 } else if (signal1) {
1116 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1117 } else if (signal2) {
1118 dest = bitsToFp(fpToBits(op2) | qnan, junk);
1119 } else if (signal3) {
1120 dest = bitsToFp(fpToBits(op3) | qnan, junk);
1121 } else if (nan1) {
1122 dest = op1;
1123 } else if (nan2) {
1124 dest = op2;
1125 } else if (nan3) {
1126 dest = op3;
1127 }
1128 } else if (flush && flushToZero(dest)) {
1129 feraiseexcept(FeUnderflow);
1130 } else if ((
1131 (single && (dest == bitsToFp(0x00800000, junk) ||
1132 dest == bitsToFp(0x80800000, junk))) ||
1133 (!single &&
1134 (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1135 dest == bitsToFp(0x8010000000000000ULL, junk)))
1136 ) && rMode != VfpRoundZero) {
1137 /*
1138 * Correct for the fact that underflow is detected -before- rounding
1139 * in ARM and -after- rounding in x86.
1140 */
1141 fesetround(FeRoundZero);
1142 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
1143 : "m" (op1), "m" (op2), "m" (op3));
1144 fpType temp = func(op1, op2, op2);
1145 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1146 if (flush && flushToZero(temp)) {
1147 dest = temp;
1148 }
1149 }
1150 finishVfp(fpscr, state, flush);
1151 return dest;
1152}
1153
1154template
1155float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
1156 float (*func)(float, float, float),
1157 bool flush, bool defaultNan, uint32_t rMode) const;
1158template
1159double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
1160 double (*func)(double, double, double),
1161 bool flush, bool defaultNan, uint32_t rMode) const;
1162
1163template <class fpType>
1164fpType
1165FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
1166 fpType (*func)(fpType, fpType),
1167 bool flush, bool defaultNan, uint32_t rMode) const
1168{
1169 const bool single = (sizeof(fpType) == sizeof(float));
1170 fpType junk = 0.0;
1171
1172 if (flush && flushToZero(op1, op2))
1173 fpscr.idc = 1;
1175 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)
1176 : "m" (op1), "m" (op2), "m" (state));
1177 fpType dest = func(op1, op2);
1178 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1179
1180 // Get NAN behavior right. This varies between x86 and ARM.
1181 if (std::isnan(dest)) {
1182 const uint64_t qnan =
1183 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1184 const bool nan1 = std::isnan(op1);
1185 const bool nan2 = std::isnan(op2);
1186 const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
1187 const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
1188 if ((!nan1 && !nan2) || (defaultNan == 1)) {
1189 dest = bitsToFp(qnan, junk);
1190 } else if (signal1) {
1191 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1192 } else if (signal2) {
1193 dest = bitsToFp(fpToBits(op2) | qnan, junk);
1194 } else if (nan1) {
1195 dest = op1;
1196 } else if (nan2) {
1197 dest = op2;
1198 }
1199 } else if (flush && flushToZero(dest)) {
1200 feraiseexcept(FeUnderflow);
1201 } else if ((
1202 (single && (dest == bitsToFp(0x00800000, junk) ||
1203 dest == bitsToFp(0x80800000, junk))) ||
1204 (!single &&
1205 (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1206 dest == bitsToFp(0x8010000000000000ULL, junk)))
1207 ) && rMode != VfpRoundZero) {
1208 /*
1209 * Correct for the fact that underflow is detected -before- rounding
1210 * in ARM and -after- rounding in x86.
1211 */
1212 fesetround(FeRoundZero);
1213 __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)
1214 : "m" (op1), "m" (op2));
1215 fpType temp = func(op1, op2);
1216 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1217 if (flush && flushToZero(temp)) {
1218 dest = temp;
1219 }
1220 }
1221 finishVfp(fpscr, state, flush);
1222 return dest;
1223}
1224
1225template
1226float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
1227 float (*func)(float, float),
1228 bool flush, bool defaultNan, uint32_t rMode) const;
1229template
1230double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
1231 double (*func)(double, double),
1232 bool flush, bool defaultNan, uint32_t rMode) const;
1233
1234template <class fpType>
1235fpType
1236FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
1237 bool flush, uint32_t rMode) const
1238{
1239 const bool single = (sizeof(fpType) == sizeof(float));
1240 fpType junk = 0.0;
1241
1242 if (flush && flushToZero(op1))
1243 fpscr.idc = 1;
1245 __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)
1246 : "m" (op1), "m" (state));
1247 fpType dest = func(op1);
1248 __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
1249
1250 // Get NAN behavior right. This varies between x86 and ARM.
1251 if (std::isnan(dest)) {
1252 const uint64_t qnan =
1253 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
1254 const bool nan = std::isnan(op1);
1255 if (!nan || fpscr.dn == 1) {
1256 dest = bitsToFp(qnan, junk);
1257 } else if (nan) {
1258 dest = bitsToFp(fpToBits(op1) | qnan, junk);
1259 }
1260 } else if (flush && flushToZero(dest)) {
1261 feraiseexcept(FeUnderflow);
1262 } else if ((
1263 (single && (dest == bitsToFp(0x00800000, junk) ||
1264 dest == bitsToFp(0x80800000, junk))) ||
1265 (!single &&
1266 (dest == bitsToFp(0x0010000000000000ULL, junk) ||
1267 dest == bitsToFp(0x8010000000000000ULL, junk)))
1268 ) && rMode != VfpRoundZero) {
1269 /*
1270 * Correct for the fact that underflow is detected -before- rounding
1271 * in ARM and -after- rounding in x86.
1272 */
1273 fesetround(FeRoundZero);
1274 __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));
1275 fpType temp = func(op1);
1276 __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
1277 if (flush && flushToZero(temp)) {
1278 dest = temp;
1279 }
1280 }
1281 finishVfp(fpscr, state, flush);
1282 return dest;
1283}
1284
1285template
1286float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),
1287 bool flush, uint32_t rMode) const;
1288template
1289double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),
1290 bool flush, uint32_t rMode) const;
1291
1294{
1295 if (wide) {
1296 stride *= 2;
1297 }
1298 unsigned offset = idx % 8;
1299 idx = (RegIndex)(idx - offset);
1300 offset += stride;
1301 idx = (RegIndex)(idx + (offset % 8));
1302 return idx;
1303}
1304
1305void
1307{
1308 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1309 assert(!inScalarBank(dest));
1310 dest = addStride(dest, stride);
1311 op1 = addStride(op1, stride);
1312 if (!inScalarBank(op2)) {
1313 op2 = addStride(op2, stride);
1314 }
1315}
1316
1317void
1319{
1320 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1321 assert(!inScalarBank(dest));
1322 dest = addStride(dest, stride);
1323 if (!inScalarBank(op1)) {
1324 op1 = addStride(op1, stride);
1325 }
1326}
1327
1328void
1330{
1331 unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;
1332 assert(!inScalarBank(dest));
1333 dest = addStride(dest, stride);
1334}
1335
1336FPSCR
1337fpVASimdFPSCRValue(const FPSCR &fpscr)
1338{
1339 FPSCR new_fpscr(0);
1340 new_fpscr.ahp = 0; // bit 26
1341 new_fpscr.dn = 1; // bit 25
1342 new_fpscr.fz = 1; // bit 24
1343 new_fpscr.rMode = VfpRoundNearest; // bit 23:22
1344 new_fpscr.fz16 = fpscr.fz16; // bit 19
1345 return new_fpscr;
1346}
1347
1348FPSCR
1349fpVASimdCvtFPSCRValue(const FPSCR &fpscr)
1350{
1351 FPSCR new_fpscr(0);
1352 new_fpscr.ahp = fpscr.ahp; // bit 26
1353 new_fpscr.dn = 1; // bit 25
1354 new_fpscr.fz = 1; // bit 24
1355 new_fpscr.rMode = VfpRoundNearest; // bit 23:22
1356 new_fpscr.fz16 = fpscr.fz16; // bit 19
1357 return new_fpscr;
1358}
1359
1360FPSCR
1361fpRestoreFPSCRValue(const FPSCR fpscr_exec, const FPSCR &fpscr)
1362{
1363 FPSCR new_fpscr(fpscr_exec);
1364 new_fpscr.idc = fpscr_exec.idc | fpscr.idc; // bit 7
1365 new_fpscr.ixc = fpscr_exec.ixc | fpscr.ixc; // bit 4
1366 new_fpscr.ufc = fpscr_exec.ufc | fpscr.ufc; // bit 3
1367 new_fpscr.ofc = fpscr_exec.ofc | fpscr.ofc; // bit 2
1368 new_fpscr.dzc = fpscr_exec.dzc | fpscr.dzc; // bit 1
1369 new_fpscr.ioc = fpscr_exec.ioc | fpscr.ioc; // bit 0
1370 return new_fpscr;
1371}
1372
1373} // namespace ArmISA
1374} // namespace gem5
void printCondition(std::ostream &os, unsigned code, bool noImplicit=false) const
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:52
ConditionCode condCode
Definition vfp.hh:1083
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:67
fpType unaryOp(FPSCR &fpscr, fpType op1, fpType(*func)(fpType), bool flush, uint32_t rMode) const
Definition vfp.cc:1236
fpType processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const
Definition vfp.cc:1043
fpType ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, fpType(*func)(fpType, fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:1087
fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType(*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:1165
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:95
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:106
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:83
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:133
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:164
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:119
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:148
static bool inScalarBank(RegIndex idx)
Definition vfp.hh:621
void nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2)
Definition vfp.cc:1306
RegIndex addStride(RegIndex idx, unsigned stride)
Definition vfp.cc:1293
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr uint64_t szext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:161
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
uint32_t unsignedRecipEstimate(uint32_t op)
Definition vfp.cc:1015
@ FeRoundZero
Definition vfp.hh:102
@ FeRoundNearest
Definition vfp.hh:101
@ FeRoundUpward
Definition vfp.hh:103
@ FeRoundDown
Definition vfp.hh:100
static uint32_t fpToBits(float)
Definition vfp.hh:203
fpType fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
Definition vfp.cc:301
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
static uint16_t vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
Definition vfp.cc:404
uint16_t fpRecipEstimateFpH(FPSCR &fpscr, uint16_t op)
Definition vfp.cc:948
double vfpSFixedToFpD(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:731
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 4 > width
Definition misc_types.hh:72
Bitfield< 7, 0 > imm
Definition types.hh:132
Bitfield< 4 > s
Bitfield< 27 > q
Definition misc_types.hh:55
double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:654
static uint64_t vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
Definition vfp.cc:592
Bitfield< 23, 0 > offset
Definition types.hh:144
uint16_t fprSqrtEstimateFpH(FPSCR &fpscr, uint16_t op)
Definition vfp.cc:813
float vfpUFixedToFpS(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:674
Bitfield< 23, 22 > rMode
uint32_t unsignedRSqrtEstimate(uint32_t op)
Definition vfp.cc:873
VfpSavedState prepFpState(uint32_t rMode)
Definition vfp.cc:182
float fixFpDFpSDest(FPSCR fpscr, double val)
Definition vfp.cc:336
int VfpSavedState
Definition vfp.hh:260
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
Definition vfp.cc:204
double fixFpSFpDDest(FPSCR fpscr, float val)
Definition vfp.cc:372
@ FeUnderflow
Definition vfp.hh:94
@ FeDivByZero
Definition vfp.hh:90
@ FeInvalid
Definition vfp.hh:92
@ FeOverflow
Definition vfp.hh:93
@ FeAllExceptions
Definition vfp.hh:95
@ FeInexact
Definition vfp.hh:91
Bitfield< 26 > ahp
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:664
float vfpSFixedToFpS(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:692
fpType fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
Definition vfp.cc:230
Bitfield< 8 > a
Definition misc_types.hh:66
double vfpUFixedToFpD(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:712
Bitfield< 21, 20 > stride
static float bitsToFp(uint64_t, float)
Definition vfp.hh:227
constexpr int fpclassifyFpH(uint16_t __x)
Definition vfp.hh:121
static double recipEstimate(double a)
Definition vfp.cc:899
FPSCR fpVASimdCvtFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1349
uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, double op)
Definition vfp.cc:584
FPSCR fpStandardFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1031
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op)
Definition vfp.cc:576
float fprSqrtEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:770
static double recipSqrtEstimate(double a)
Definition vfp.cc:752
float fpRecipEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:912
@ VfpRoundNearest
Definition vfp.hh:108
@ VfpRoundZero
Definition vfp.hh:111
@ VfpRoundUpward
Definition vfp.hh:109
@ VfpRoundDown
Definition vfp.hh:110
FPSCR fpRestoreFPSCRValue(const FPSCR fpscr_exec, const FPSCR &fpscr)
Definition vfp.cc:1361
Bitfield< 21 > ss
Definition misc_types.hh:60
FPSCR fpVASimdFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1337
Bitfield< 4 > pc
Bitfield< 4 > op
Definition types.hh:83
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
uint16_t RegIndex
Definition types.hh:176
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
void ccprintf(cp::Print &print)
Definition cprintf.hh:130
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

Generated on Mon May 26 2025 09:18:58 for gem5 by doxygen 1.13.2