gem5 v24.1.0.1
Loading...
Searching...
No Matches
vfp.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2013, 2019, 2024 Arm Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#ifndef __ARCH_ARM_INSTS_VFP_HH__
39#define __ARCH_ARM_INSTS_VFP_HH__
40
41#include <fenv.h>
42
43#include <cmath>
44
46#include "arch/arm/pcstate.hh"
47#include "arch/arm/regs/misc.hh"
48#include "cpu/thread_context.hh"
49
50namespace gem5
51{
52
53namespace ArmISA
54{
55
63
64template<class T>
65static inline void
67{
68 switch (mode) {
69 case VfpMicroop:
70 flags[StaticInst::IsMicroop] = true;
71 break;
72 case VfpFirstMicroop:
73 flags[StaticInst::IsMicroop] =
74 flags[StaticInst::IsFirstMicroop] = true;
75 break;
76 case VfpLastMicroop:
77 flags[StaticInst::IsMicroop] =
78 flags[StaticInst::IsLastMicroop] = true;
79 break;
80 case VfpNotAMicroop:
81 break;
82 }
83 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
84 flags[StaticInst::IsDelayedCommit] = true;
85 }
86}
87
89{
90 FeDivByZero = FE_DIVBYZERO,
91 FeInexact = FE_INEXACT,
92 FeInvalid = FE_INVALID,
93 FeOverflow = FE_OVERFLOW,
94 FeUnderflow = FE_UNDERFLOW,
95 FeAllExceptions = FE_ALL_EXCEPT
96};
97
99{
100 FeRoundDown = FE_DOWNWARD,
101 FeRoundNearest = FE_TONEAREST,
102 FeRoundZero = FE_TOWARDZERO,
103 FeRoundUpward = FE_UPWARD
105
114
115static inline float bitsToFp(uint64_t, float);
116static inline double bitsToFp(uint64_t, double);
117static inline uint32_t fpToBits(float);
118static inline uint64_t fpToBits(double);
119
120template <class fpType>
121static inline bool
123{
124 fpType junk = 0.0;
125 if (std::fpclassify(op) == FP_SUBNORMAL) {
126 uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);
127 op = bitsToFp(fpToBits(op) & bitMask, junk);
128 return true;
129 }
130 return false;
131}
132
133template <class fpType>
134static inline bool
135flushToZero(fpType &op1, fpType &op2)
136{
137 bool flush1 = flushToZero(op1);
138 bool flush2 = flushToZero(op2);
139 return flush1 || flush2;
140}
141
142template <class fpType>
143static inline void
144vfpFlushToZero(FPSCR &fpscr, fpType &op)
145{
146 if (fpscr.fz == 1 && flushToZero(op)) {
147 fpscr.idc = 1;
148 }
149}
150
151template <class fpType>
152static inline void
153vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
154{
155 vfpFlushToZero(fpscr, op1);
156 vfpFlushToZero(fpscr, op2);
157}
158
159static inline uint32_t
161{
162 union
163 {
164 float fp;
165 uint32_t bits;
166 } val;
167 val.fp = fp;
168 return val.bits;
169}
170
171static inline uint64_t
173{
174 union
175 {
176 double fp;
177 uint64_t bits;
178 } val;
179 val.fp = fp;
180 return val.bits;
181}
182
183static inline float
184bitsToFp(uint64_t bits, float junk)
185{
186 union
187 {
188 float fp;
189 uint32_t bits;
190 } val;
191 val.bits = bits;
192 return val.fp;
193}
194
195static inline double
196bitsToFp(uint64_t bits, double junk)
197{
198 union
199 {
200 double fp;
201 uint64_t bits;
202 } val;
203 val.bits = bits;
204 return val.fp;
205}
206
207template <class fpType>
208static inline bool
209isSnan(fpType val)
210{
211 const bool single = (sizeof(fpType) == sizeof(float));
212 const uint64_t qnan =
213 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
214 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
215}
216
217typedef int VfpSavedState;
218
220void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
221
222template <class fpType>
223fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
224
225template <class fpType>
226fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
227
228template <class fpType>
229fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
230
231float fixFpDFpSDest(FPSCR fpscr, double val);
232double fixFpSFpDDest(FPSCR fpscr, float val);
233
234uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
235 uint32_t rMode, bool ahp, float op);
236uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
237 uint32_t rMode, bool ahp, double op);
238
239float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
240double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
241
242static inline double
243makeDouble(uint32_t low, uint32_t high)
244{
245 double junk = 0.0;
246 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
247}
248
249static inline uint32_t
251{
252 return fpToBits(val);
253}
254
255static inline uint32_t
257{
258 return fpToBits(val) >> 32;
259}
260
261static inline void
262setFPExceptions(int exceptions) {
263 feclearexcept(FeAllExceptions);
264 feraiseexcept(exceptions);
265}
266
267template <typename T>
268uint64_t
270vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
271 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
272 bool aarch64 = false)
273{
274 int rmode;
275 bool roundAwayFix = false;
276
277 if (!useRmode) {
278 rmode = fegetround();
279 } else {
280 switch (roundMode)
281 {
282 case VfpRoundNearest:
283 rmode = FeRoundNearest;
284 break;
285 case VfpRoundUpward:
286 rmode = FeRoundUpward;
287 break;
288 case VfpRoundDown:
289 rmode = FeRoundDown;
290 break;
291 case VfpRoundZero:
292 rmode = FeRoundZero;
293 break;
294 case VfpRoundAway:
295 // There is no equivalent rounding mode, use round down and we'll
296 // fix it later
297 rmode = FeRoundDown;
298 roundAwayFix = true;
299 break;
300 default:
301 panic("Unsupported roundMode %d\n", roundMode);
302 }
303 }
304 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
305 fesetround(FeRoundNearest);
306 val = val * pow(2.0, imm);
307 __asm__ __volatile__("" : "=m" (val) : "m" (val));
308 fesetround(rmode);
309 feclearexcept(FeAllExceptions);
310 __asm__ __volatile__("" : "=m" (val) : "m" (val));
311 T origVal = val;
312 val = rint(val);
313 __asm__ __volatile__("" : "=m" (val) : "m" (val));
314
315 int exceptions = fetestexcept(FeAllExceptions);
316
317 int fpType = std::fpclassify(val);
318 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
319 if (fpType == FP_NAN) {
320 exceptions |= FeInvalid;
321 }
322 val = 0.0;
323 } else if (origVal != val) {
324 switch (rmode) {
325 case FeRoundNearest:
326 if (origVal - val > 0.5)
327 val += 1.0;
328 else if (val - origVal > 0.5)
329 val -= 1.0;
330 break;
331 case FeRoundDown:
332 if (roundAwayFix) {
333 // The ordering on the subtraction looks a bit odd in that we
334 // don't do the obvious origVal - val, instead we do
335 // -(val - origVal). This is required to get the corruct bit
336 // exact behaviour when very close to the 0.5 threshold.
337 volatile T error = val;
338 error -= origVal;
339 error = -error;
340 if ( (error > 0.5) ||
341 ((error == 0.5) && (val >= 0)) )
342 val += 1.0;
343 } else {
344 if (origVal < val)
345 val -= 1.0;
346 }
347 break;
348 case FeRoundUpward:
349 if (origVal > val)
350 val += 1.0;
351 break;
352 }
353 exceptions |= FeInexact;
354 }
355
356 __asm__ __volatile__("" : "=m" (val) : "m" (val));
357
358 if (isSigned) {
359 bool outOfRange = false;
360 int64_t result = (int64_t) val;
361 uint64_t finalVal;
362
363 if (!aarch64) {
364 if (width == 16) {
365 finalVal = (int16_t)val;
366 } else if (width == 32) {
367 finalVal =(int32_t)val;
368 } else if (width == 64) {
369 finalVal = result;
370 } else {
371 panic("Unsupported width %d\n", width);
372 }
373
374 // check if value is in range
375 int64_t minVal = ~mask(width-1);
376 if ((double)val < minVal) {
377 outOfRange = true;
378 finalVal = minVal;
379 }
380 int64_t maxVal = mask(width-1);
381 if ((double)val > maxVal) {
382 outOfRange = true;
383 finalVal = maxVal;
384 }
385 } else {
386 bool isNeg = val < 0;
387 finalVal = result & mask(width);
388 // If the result is supposed to be less than 64 bits check that the
389 // upper bits that got thrown away are just sign extension bits
390 if (width != 64) {
391 outOfRange = ((uint64_t) result >> (width - 1)) !=
392 (isNeg ? mask(64-width+1) : 0);
393 }
394 // Check if the original floating point value doesn't matches the
395 // integer version we are also out of range. So create a saturated
396 // result.
397 if (isNeg) {
398 outOfRange |= val < result;
399 if (outOfRange) {
400 finalVal = 1LL << (width-1);
401 }
402 } else {
403 outOfRange |= val > result;
404 if (outOfRange) {
405 finalVal = mask(width-1);
406 }
407 }
408 }
409
410 // Raise an exception if the value was out of range
411 if (outOfRange) {
412 exceptions |= FeInvalid;
413 exceptions &= ~FeInexact;
414 }
415 setFPExceptions(exceptions);
416 return finalVal;
417 } else {
418 if ((double)val < 0) {
419 exceptions |= FeInvalid;
420 exceptions &= ~FeInexact;
421 setFPExceptions(exceptions);
422 return 0;
423 }
424
425 uint64_t result = ((uint64_t) val) & mask(width);
426 if (val > result) {
427 exceptions |= FeInvalid;
428 exceptions &= ~FeInexact;
429 setFPExceptions(exceptions);
430 return mask(width);
431 }
432
433 setFPExceptions(exceptions);
434 return result;
435 }
436};
437
438
439template <typename T>
440T
442vfpFpRint(T val, bool exact, bool defaultNan, bool useRmode = true,
443 VfpRoundingMode roundMode = VfpRoundZero)
444{
445 int rmode;
446 bool roundAwayFix = false;
447
448 if (!useRmode) {
449 rmode = fegetround();
450 } else {
451 switch (roundMode)
452 {
453 case VfpRoundNearest:
454 rmode = FeRoundNearest;
455 break;
456 case VfpRoundUpward:
457 rmode = FeRoundUpward;
458 break;
459 case VfpRoundDown:
460 rmode = FeRoundDown;
461 break;
462 case VfpRoundZero:
463 rmode = FeRoundZero;
464 break;
465 case VfpRoundAway:
466 // There is no equivalent rounding mode, use round down and we'll
467 // fix it later
468 rmode = FeRoundDown;
469 roundAwayFix = true;
470 break;
471 default:
472 panic("Unsupported roundMode %d\n", roundMode);
473 }
474 }
475 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
476 __asm__ __volatile__("" : "=m" (val) : "m" (val));
477 fesetround(rmode);
478 feclearexcept(FeAllExceptions);
479 __asm__ __volatile__("" : "=m" (val) : "m" (val));
480 T origVal = val;
481 val = rint(val);
482 __asm__ __volatile__("" : "=m" (val) : "m" (val));
483
484 int exceptions = fetestexcept(FeAllExceptions);
485 if (!exact) {
486 exceptions &= ~FeInexact;
487 }
488
489 int fpType = std::fpclassify(val);
490 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
491 if (fpType == FP_NAN) {
492 if (isSnan(val)) {
493 exceptions |= FeInvalid;
494 }
495 if (defaultNan || !isSnan(val)) {
496 bool single = (sizeof(T) == sizeof(float));
497 uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
498 val = bitsToFp(qnan, (T)0.0);
499 }
500 } else {
501 val = 0.0;
502 }
503 } else if (origVal != val) {
504 switch (rmode) {
505 case FeRoundNearest:
506 if (origVal - val > 0.5)
507 val += 1.0;
508 else if (val - origVal > 0.5)
509 val -= 1.0;
510 break;
511 case FeRoundDown:
512 if (roundAwayFix) {
513 // The ordering on the subtraction looks a bit odd in that we
514 // don't do the obvious origVal - val, instead we do
515 // -(val - origVal). This is required to get the corruct bit
516 // exact behaviour when very close to the 0.5 threshold.
517 volatile T error = val;
518 error -= origVal;
519 error = -error;
520 if ( (error > 0.5) ||
521 ((error == 0.5) && (val >= 0)) )
522 val += 1.0;
523 } else {
524 if (origVal < val)
525 val -= 1.0;
526 }
527 break;
528 case FeRoundUpward:
529 if (origVal > val)
530 val += 1.0;
531 break;
532 }
533 if (exact) {
534 exceptions |= FeInexact;
535 }
536 }
537 // Fix signal of zero.
538 fpType = std::fpclassify(val);
539 if (fpType == FP_ZERO) {
540 bool single = (sizeof(T) == sizeof(float));
541 uint64_t mask = single ? 0x80000000 : 0x8000000000000000ULL;
542 val = bitsToFp((fpToBits(val) & (~mask)) | (fpToBits(origVal) & mask),
543 (T)0.0);
544 }
545
546 // __asm__ __volatile__("" : "=m" (val) : "m" (val));
547 setFPExceptions(exceptions);
548
549 return val;
550};
551
552
553float vfpUFixedToFpS(bool flush, bool defaultNan,
554 uint64_t val, uint8_t width, uint8_t imm);
555float vfpSFixedToFpS(bool flush, bool defaultNan,
556 int64_t val, uint8_t width, uint8_t imm);
557
558double vfpUFixedToFpD(bool flush, bool defaultNan,
559 uint64_t val, uint8_t width, uint8_t imm);
560double vfpSFixedToFpD(bool flush, bool defaultNan,
561 int64_t val, uint8_t width, uint8_t imm);
562
563float fprSqrtEstimate(FPSCR &fpscr, float op);
564uint32_t unsignedRSqrtEstimate(uint32_t op);
565
566float fpRecipEstimate(FPSCR &fpscr, float op);
567uint32_t unsignedRecipEstimate(uint32_t op);
568
569FPSCR
570fpStandardFPSCRValue(const FPSCR &fpscr);
571
573{
574 public:
575 static bool
577 {
578 return (idx % 32) < 8;
579 }
580
581 protected:
582 bool wide;
583
584 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
585 OpClass __opClass, bool _wide) :
586 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
587 {}
588
589 RegIndex addStride(RegIndex idx, unsigned stride);
590 void nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2);
591 void nextIdxs(RegIndex &dest, RegIndex &op1);
592 void nextIdxs(RegIndex &dest);
593};
594
595template <typename T>
596static inline T
597fpAdd(T a, T b)
598{
599 return a + b;
600};
601
602template <typename T>
603static inline T
604fpSub(T a, T b)
605{
606 return a - b;
607};
608
609static inline float
610fpAddS(float a, float b)
611{
612 return a + b;
613}
614
615static inline double
616fpAddD(double a, double b)
617{
618 return a + b;
619}
620
621static inline float
622fpSubS(float a, float b)
623{
624 return a - b;
625}
626
627static inline double
628fpSubD(double a, double b)
629{
630 return a - b;
631}
632
633static inline float
634fpDivS(float a, float b)
635{
636 return a / b;
637}
638
639static inline double
640fpDivD(double a, double b)
641{
642 return a / b;
643}
644
645template <typename T>
646static inline T
647fpDiv(T a, T b)
648{
649 return a / b;
650};
651
652template <typename T>
653static inline T
654fpMulX(T a, T b)
655{
656 uint64_t opData;
657 uint32_t sign1;
658 uint32_t sign2;
659 const bool single = (sizeof(T) == sizeof(float));
660 if (single) {
661 opData = (fpToBits(a));
662 sign1 = opData>>31;
663 opData = (fpToBits(b));
664 sign2 = opData>>31;
665 } else {
666 opData = (fpToBits(a));
667 sign1 = opData>>63;
668 opData = (fpToBits(b));
669 sign2 = opData>>63;
670 }
671 bool inf1 = (std::fpclassify(a) == FP_INFINITE);
672 bool inf2 = (std::fpclassify(b) == FP_INFINITE);
673 bool zero1 = (std::fpclassify(a) == FP_ZERO);
674 bool zero2 = (std::fpclassify(b) == FP_ZERO);
675 if ((inf1 && zero2) || (zero1 && inf2)) {
676 if (sign1 ^ sign2)
677 return (T)(-2.0);
678 else
679 return (T)(2.0);
680 } else {
681 return (a * b);
682 }
683};
684
685
686template <typename T>
687static inline T
688fpMul(T a, T b)
689{
690 return a * b;
691};
692
693static inline float
694fpMulS(float a, float b)
695{
696 return a * b;
697}
698
699static inline double
700fpMulD(double a, double b)
701{
702 return a * b;
703}
704
705template <typename T>
706static inline T
707// @todo remove this when all calls to it have been replaced with the new fplib implementation
708fpMulAdd(T op1, T op2, T addend)
709{
710 T result;
711
712 if (sizeof(T) == sizeof(float))
713 result = fmaf(op1, op2, addend);
714 else
715 result = fma(op1, op2, addend);
716
717 // ARM doesn't generate signed nan's from this opperation, so fix up the result
718 if (std::isnan(result) && !std::isnan(op1) &&
719 !std::isnan(op2) && !std::isnan(addend))
720 {
721 uint64_t bitMask = 0x1ULL << ((sizeof(T) * 8) - 1);
722 result = bitsToFp(fpToBits(result) & ~bitMask, op1);
723 }
724 return result;
725}
726
727template <typename T>
728static inline T
729fpRIntX(T a, FPSCR &fpscr)
730{
731 T rVal;
732
733 rVal = rint(a);
734 if (rVal != a && !std::isnan(a))
735 fpscr.ixc = 1;
736 return (rVal);
737};
738
739template <typename T>
740static inline T
742{
743 const bool single = (sizeof(T) == sizeof(float));
744 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
745
746 if (std::isnan(a))
747 return ((fpToBits(a) & qnan) == qnan) ? b : a;
748 if (std::isnan(b))
749 return ((fpToBits(b) & qnan) == qnan) ? a : b;
750 // Handle comparisons of +0 and -0.
751 if (!std::signbit(a) && std::signbit(b))
752 return a;
753 return fmax(a, b);
754};
755
756template <typename T>
757static inline T
758fpMax(T a, T b)
759{
760 if (std::isnan(a))
761 return a;
762 if (std::isnan(b))
763 return b;
764 return fpMaxNum<T>(a, b);
765};
766
767template <typename T>
768static inline T
770{
771 const bool single = (sizeof(T) == sizeof(float));
772 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
773
774 if (std::isnan(a))
775 return ((fpToBits(a) & qnan) == qnan) ? b : a;
776 if (std::isnan(b))
777 return ((fpToBits(b) & qnan) == qnan) ? a : b;
778 // Handle comparisons of +0 and -0.
779 if (std::signbit(a) && !std::signbit(b))
780 return a;
781 return fmin(a, b);
782};
783
784template <typename T>
785static inline T
786fpMin(T a, T b)
787{
788 if (std::isnan(a))
789 return a;
790 if (std::isnan(b))
791 return b;
792 return fpMinNum<T>(a, b);
793};
794
795template <typename T>
796static inline T
798{
799 int fpClassA = std::fpclassify(a);
800 int fpClassB = std::fpclassify(b);
801 T aXb;
802 int fpClassAxB;
803
804 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
805 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
806 return 1.5;
807 }
808 aXb = a*b;
809 fpClassAxB = std::fpclassify(aXb);
810 if (fpClassAxB == FP_SUBNORMAL) {
811 feraiseexcept(FeUnderflow);
812 return 1.5;
813 }
814 return (3.0 - (a * b)) / 2.0;
815};
816
817template <typename T>
818static inline T
820{
821 int fpClassA = std::fpclassify(a);
822 int fpClassB = std::fpclassify(b);
823 T aXb;
824 int fpClassAxB;
825
826 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
827 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
828 return 2.0;
829 }
830 aXb = a*b;
831 fpClassAxB = std::fpclassify(aXb);
832 if (fpClassAxB == FP_SUBNORMAL) {
833 feraiseexcept(FeUnderflow);
834 return 2.0;
835 }
836 return 2.0 - (a * b);
837};
838
839
840static inline float
841fpRSqrtsS(float a, float b)
842{
843 int fpClassA = std::fpclassify(a);
844 int fpClassB = std::fpclassify(b);
845 float aXb;
846 int fpClassAxB;
847
848 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
849 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
850 return 1.5;
851 }
852 aXb = a*b;
853 fpClassAxB = std::fpclassify(aXb);
854 if (fpClassAxB == FP_SUBNORMAL) {
855 feraiseexcept(FeUnderflow);
856 return 1.5;
857 }
858 return (3.0 - (a * b)) / 2.0;
859}
860
861static inline float
862fpRecpsS(float a, float b)
863{
864 int fpClassA = std::fpclassify(a);
865 int fpClassB = std::fpclassify(b);
866 float aXb;
867 int fpClassAxB;
868
869 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
870 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
871 return 2.0;
872 }
873 aXb = a*b;
874 fpClassAxB = std::fpclassify(aXb);
875 if (fpClassAxB == FP_SUBNORMAL) {
876 feraiseexcept(FeUnderflow);
877 return 2.0;
878 }
879 return 2.0 - (a * b);
880}
881
882template <typename T>
883static inline T
885 T val;
886
887 val = round(a);
888 if (a - val == 0.5) {
889 if ( (((int) a) & 1) == 0 ) val += 1.0;
890 }
891 else if (a - val == -0.5) {
892 if ( (((int) a) & 1) == 0 ) val -= 1.0;
893 }
894 return val;
895}
896
897
898
899class FpOp : public PredOp
900{
901 protected:
902 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
903 PredOp(mnem, _machInst, __opClass)
904 {}
905
906 virtual float
907 doOp(float op1, float op2) const
908 {
909 panic("Unimplemented version of doOp called.\n");
910 }
911
912 virtual float
913 doOp(float op1) const
914 {
915 panic("Unimplemented version of doOp called.\n");
916 }
917
918 virtual double
919 doOp(double op1, double op2) const
920 {
921 panic("Unimplemented version of doOp called.\n");
922 }
923
924 virtual double
925 doOp(double op1) const
926 {
927 panic("Unimplemented version of doOp called.\n");
928 }
929
930 double
931 dbl(uint32_t low, uint32_t high) const
932 {
933 double junk = 0.0;
934 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
935 }
936
937 uint32_t
938 dblLow(double val) const
939 {
940 return fpToBits(val);
941 }
942
943 uint32_t
944 dblHi(double val) const
945 {
946 return fpToBits(val) >> 32;
947 }
948
949 template <class fpType>
950 fpType
951 processNans(FPSCR &fpscr, bool &done, bool defaultNan,
952 fpType op1, fpType op2) const;
953
954 template <class fpType>
955 fpType
956 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
957 fpType (*func)(fpType, fpType, fpType),
958 bool flush, bool defaultNan, uint32_t rMode) const;
959
960 template <class fpType>
961 fpType
962 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
963 fpType (*func)(fpType, fpType),
964 bool flush, bool defaultNan, uint32_t rMode) const;
965
966 template <class fpType>
967 fpType
968 unaryOp(FPSCR &fpscr, fpType op1,
969 fpType (*func)(fpType),
970 bool flush, uint32_t rMode) const;
971
972 void
973 advancePC(PCStateBase &pcState) const override
974 {
975 auto &apc = pcState.as<PCState>();
976 if (flags[IsLastMicroop]) {
977 apc.uEnd();
978 } else if (flags[IsMicroop]) {
979 apc.uAdvance();
980 } else {
981 apc.advance();
982 }
983 }
984
985 void
986 advancePC(ThreadContext *tc) const override
987 {
988 PCState pc = tc->pcState().as<PCState>();
989 if (flags[IsLastMicroop]) {
990 pc.uEnd();
991 } else if (flags[IsMicroop]) {
992 pc.uAdvance();
993 } else {
994 pc.advance();
995 }
996 tc->pcState(pc);
997 }
998
999 float
1000 fpSqrt (FPSCR fpscr,float x) const
1001 {
1002
1003 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
1004
1005 }
1006
1007 double
1008 fpSqrt (FPSCR fpscr,double x) const
1009 {
1010
1011 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
1012
1013 }
1014};
1015
1016class FpCondCompRegOp : public FpOp
1017{
1018 protected:
1021 uint8_t defCc;
1022
1023 FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
1024 OpClass __opClass, RegIndex _op1, RegIndex _op2,
1025 ConditionCode _condCode, uint8_t _defCc) :
1026 FpOp(mnem, _machInst, __opClass),
1027 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
1028 {}
1029
1030 std::string generateDisassembly(
1031 Addr pc, const loader::SymbolTable *symtab) const override;
1032};
1033
1034class FpCondSelOp : public FpOp
1035{
1036 protected:
1039
1040 FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1041 RegIndex _dest, RegIndex _op1, RegIndex _op2,
1042 ConditionCode _condCode) :
1043 FpOp(mnem, _machInst, __opClass),
1044 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
1045 {}
1046
1047 std::string generateDisassembly(
1048 Addr pc, const loader::SymbolTable *symtab) const override;
1049};
1050
1051class FpRegRegOp : public FpOp
1052{
1053 protected:
1056
1057 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1058 RegIndex _dest, RegIndex _op1,
1060 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
1061 {
1063 }
1064
1065 std::string generateDisassembly(
1066 Addr pc, const loader::SymbolTable *symtab) const override;
1067};
1068
1069class FpRegImmOp : public FpOp
1070{
1071 protected:
1073 uint64_t imm;
1074
1075 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1076 RegIndex _dest, uint64_t _imm,
1078 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
1079 {
1081 }
1082
1083 std::string generateDisassembly(
1084 Addr pc, const loader::SymbolTable *symtab) const override;
1085};
1086
1087class FpRegRegImmOp : public FpOp
1088{
1089 protected:
1092 uint64_t imm;
1093
1094 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1095 RegIndex _dest, RegIndex _op1,
1096 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1097 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
1098 {
1100 }
1101
1102 std::string generateDisassembly(
1103 Addr pc, const loader::SymbolTable *symtab) const override;
1104};
1105
1106class FpRegRegRegOp : public FpOp
1107{
1108 protected:
1112
1113 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1114 RegIndex _dest, RegIndex _op1, RegIndex _op2,
1116 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
1117 {
1119 }
1120
1121 std::string generateDisassembly(
1122 Addr pc, const loader::SymbolTable *symtab) const override;
1123};
1124
1126{
1127 protected:
1132
1133 FpRegRegRegCondOp(const char *mnem, ExtMachInst _machInst,
1134 OpClass __opClass, RegIndex _dest, RegIndex _op1,
1135 RegIndex _op2, ConditionCode _cond,
1137 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
1138 cond(_cond)
1139 {
1141 }
1142
1143 std::string generateDisassembly(
1144 Addr pc, const loader::SymbolTable *symtab) const override;
1145};
1146
1148{
1149 protected:
1154
1155 FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
1156 RegIndex _dest, RegIndex _op1, RegIndex _op2,
1158 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
1159 op3(_op3)
1160 {
1162 }
1163
1164 std::string generateDisassembly(
1165 Addr pc, const loader::SymbolTable *symtab) const override;
1166};
1167
1169{
1170 protected:
1174 uint64_t imm;
1175
1176 FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
1177 OpClass __opClass, RegIndex _dest,
1178 RegIndex _op1, RegIndex _op2,
1179 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1180 FpOp(mnem, _machInst, __opClass),
1181 dest(_dest), op1(_op1), op2(_op2), imm(_imm)
1182 {
1184 }
1185
1186 std::string generateDisassembly(
1187 Addr pc, const loader::SymbolTable *symtab) const override;
1188};
1189
1190} // namespace ArmISA
1191} // namespace gem5
1192
1193#endif //__ARCH_ARM_INSTS_VFP_HH__
std::string error
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:52
FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _op1, RegIndex _op2, ConditionCode _condCode, uint8_t _defCc)
Definition vfp.hh:1023
FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, ConditionCode _condCode)
Definition vfp.hh:1040
ConditionCode condCode
Definition vfp.hh:1038
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:67
void advancePC(ThreadContext *tc) const override
Definition vfp.hh:986
void advancePC(PCStateBase &pcState) const override
Definition vfp.hh:973
float fpSqrt(FPSCR fpscr, float x) const
Definition vfp.hh:1000
uint32_t dblLow(double val) const
Definition vfp.hh:938
fpType unaryOp(FPSCR &fpscr, fpType op1, fpType(*func)(fpType), bool flush, uint32_t rMode) const
Definition vfp.cc:1109
fpType processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const
Definition vfp.cc:916
uint32_t dblHi(double val) const
Definition vfp.hh:944
virtual double doOp(double op1) const
Definition vfp.hh:925
fpType ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, fpType(*func)(fpType, fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:960
virtual float doOp(float op1) const
Definition vfp.hh:913
fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType(*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:1038
virtual double doOp(double op1, double op2) const
Definition vfp.hh:919
double fpSqrt(FPSCR fpscr, double x) const
Definition vfp.hh:1008
FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
Definition vfp.hh:902
virtual float doOp(float op1, float op2) const
Definition vfp.hh:907
double dbl(uint32_t low, uint32_t high) const
Definition vfp.hh:931
FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1075
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:95
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:106
FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1094
FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1057
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:83
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:133
FpRegRegRegCondOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, ConditionCode _cond, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1133
FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1176
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:164
FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1113
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:119
FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, RegIndex _op3, VfpMicroMode mode=VfpNotAMicroop)
Definition vfp.hh:1155
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:148
Base class for predicated macro-operations.
Definition pred_inst.hh:343
Base class for predicated integer operations.
Definition pred_inst.hh:217
static bool inScalarBank(RegIndex idx)
Definition vfp.hh:576
void nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2)
Definition vfp.cc:1179
RegIndex addStride(RegIndex idx, unsigned stride)
Definition vfp.cc:1166
VfpMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, bool _wide)
Definition vfp.hh:584
Target & as()
Definition pcstate.hh:73
std::bitset< Num_Flags > flags
Flag values for this instruction.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual const PCStateBase & pcState() const =0
#define GEM5_NO_OPTIMIZE
Definition compiler.hh:141
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
atomic_var_t state
Definition helpers.cc:211
uint8_t flags
Definition helpers.cc:87
uint32_t unsignedRecipEstimate(uint32_t op)
Definition vfp.cc:888
FeRoundingMode
Definition vfp.hh:99
@ FeRoundZero
Definition vfp.hh:102
@ FeRoundNearest
Definition vfp.hh:101
@ FeRoundUpward
Definition vfp.hh:103
@ FeRoundDown
Definition vfp.hh:100
static uint32_t fpToBits(float)
Definition vfp.hh:160
fpType fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
Definition vfp.cc:301
static uint32_t highFromDouble(double val)
Definition vfp.hh:256
static double fpMulD(double a, double b)
Definition vfp.hh:700
static T fpMin(T a, T b)
Definition vfp.hh:786
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
static T fpMax(T a, T b)
Definition vfp.hh:758
double vfpSFixedToFpD(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:731
T GEM5_NO_OPTIMIZE vfpFpRint(T val, bool exact, bool defaultNan, bool useRmode=true, VfpRoundingMode roundMode=VfpRoundZero)
Definition vfp.hh:442
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 4 > width
Definition misc_types.hh:72
static T fpRIntX(T a, FPSCR &fpscr)
Definition vfp.hh:729
static T fpSub(T a, T b)
Definition vfp.hh:604
Bitfield< 7, 0 > imm
Definition types.hh:132
Bitfield< 7 > b
static const uint32_t FpscrExcMask
Definition misc.hh:3004
static float fpRSqrtsS(float a, float b)
Definition vfp.hh:841
static float fpDivS(float a, float b)
Definition vfp.hh:634
double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:654
static T fpMulX(T a, T b)
Definition vfp.hh:654
static float fpRecpsS(float a, float b)
Definition vfp.hh:862
float vfpUFixedToFpS(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:674
Bitfield< 23, 22 > rMode
uint32_t unsignedRSqrtEstimate(uint32_t op)
Definition vfp.cc:813
VfpSavedState prepFpState(uint32_t rMode)
Definition vfp.cc:182
float fixFpDFpSDest(FPSCR fpscr, double val)
Definition vfp.cc:336
int VfpSavedState
Definition vfp.hh:217
static T fpMul(T a, T b)
Definition vfp.hh:688
static float fpMulS(float a, float b)
Definition vfp.hh:694
static T fpDiv(T a, T b)
Definition vfp.hh:647
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
Definition vfp.cc:204
double fixFpSFpDDest(FPSCR fpscr, float val)
Definition vfp.cc:372
FeExceptionBit
Definition vfp.hh:89
@ FeUnderflow
Definition vfp.hh:94
@ FeDivByZero
Definition vfp.hh:90
@ FeInvalid
Definition vfp.hh:92
@ FeOverflow
Definition vfp.hh:93
@ FeAllExceptions
Definition vfp.hh:95
@ FeInexact
Definition vfp.hh:91
Bitfield< 26 > ahp
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:664
static T roundNEven(T a)
Definition vfp.hh:884
@ VfpNotAMicroop
Definition vfp.hh:58
@ VfpMicroop
Definition vfp.hh:59
@ VfpFirstMicroop
Definition vfp.hh:60
@ VfpLastMicroop
Definition vfp.hh:61
static T fpMinNum(T a, T b)
Definition vfp.hh:769
ConditionCode
Definition cc.hh:104
float vfpSFixedToFpS(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:692
static uint32_t lowFromDouble(double val)
Definition vfp.hh:250
fpType fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
Definition vfp.cc:230
Bitfield< 8 > a
Definition misc_types.hh:66
double vfpUFixedToFpD(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:712
Bitfield< 21, 20 > stride
static float bitsToFp(uint64_t, float)
Definition vfp.hh:184
static T fpMulAdd(T op1, T op2, T addend)
Definition vfp.hh:708
static double fpAddD(double a, double b)
Definition vfp.hh:616
static float fpSubS(float a, float b)
Definition vfp.hh:622
uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, double op)
Definition vfp.cc:584
static double fpSubD(double a, double b)
Definition vfp.hh:628
uint64_t GEM5_NO_OPTIMIZE vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool useRmode=true, VfpRoundingMode roundMode=VfpRoundZero, bool aarch64=false)
Definition vfp.hh:270
FPSCR fpStandardFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:904
static T fpRSqrts(T a, T b)
Definition vfp.hh:797
static bool isSnan(fpType val)
Definition vfp.hh:209
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op)
Definition vfp.cc:576
static void setFPExceptions(int exceptions)
Definition vfp.hh:262
static T fpAdd(T a, T b)
Definition vfp.hh:597
static float fpAddS(float a, float b)
Definition vfp.hh:610
static double makeDouble(uint32_t low, uint32_t high)
Definition vfp.hh:243
static void vfpFlushToZero(FPSCR &fpscr, fpType &op)
Definition vfp.hh:144
Bitfield< 34 > aarch64
Definition types.hh:81
static double fpDivD(double a, double b)
Definition vfp.hh:640
static void setVfpMicroFlags(VfpMicroMode mode, T &flags)
Definition vfp.hh:66
Bitfield< 19, 16 > fp
static T fpRecps(T a, T b)
Definition vfp.hh:819
float fprSqrtEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:770
float fpRecipEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:852
VfpRoundingMode
Definition vfp.hh:107
@ VfpRoundNearest
Definition vfp.hh:108
@ VfpRoundZero
Definition vfp.hh:111
@ VfpRoundAway
Definition vfp.hh:112
@ VfpRoundUpward
Definition vfp.hh:109
@ VfpRoundDown
Definition vfp.hh:110
static T fpMaxNum(T a, T b)
Definition vfp.hh:741
Bitfield< 4 > pc
FloatType fmin(FloatType a, FloatType b)
Definition utility.hh:377
Bitfield< 3 > x
Definition pagetable.hh:74
FloatType fmax(FloatType a, FloatType b)
Definition utility.hh:389
Bitfield< 4 > op
Definition types.hh:83
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
uint16_t RegIndex
Definition types.hh:176
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

Generated on Mon Jan 13 2025 04:28:19 for gem5 by doxygen 1.9.8