38#ifndef __ARCH_ARM_INSTS_VFP_HH__
39#define __ARCH_ARM_INSTS_VFP_HH__
70 flags[StaticInst::IsMicroop] =
true;
73 flags[StaticInst::IsMicroop] =
74 flags[StaticInst::IsFirstMicroop] =
true;
77 flags[StaticInst::IsMicroop] =
78 flags[StaticInst::IsLastMicroop] =
true;
84 flags[StaticInst::IsDelayedCommit] =
true;
115static inline float bitsToFp(uint64_t,
float);
116static inline double bitsToFp(uint64_t,
double);
117static inline uint32_t
fpToBits(
float);
118static inline uint64_t
fpToBits(
double);
120template <
class fpType>
125 if (std::fpclassify(
op) == FP_SUBNORMAL) {
126 uint64_t bitMask = 0x1ULL << (
sizeof(fpType) * 8 - 1);
133template <
class fpType>
139 return flush1 || flush2;
142template <
class fpType>
151template <
class fpType>
159static inline uint32_t
171static inline uint64_t
207template <
class fpType>
211 const bool single = (
sizeof(fpType) ==
sizeof(
float));
212 const uint64_t qnan =
213 single ? 0x7fc00000 : 0x7ff8000000000000ULL;
222template <
class fpType>
225template <
class fpType>
226fpType
fixDest(FPSCR fpscr, fpType
val, fpType op1, fpType op2);
228template <
class fpType>
234uint16_t
vcvtFpSFpH(FPSCR &fpscr,
bool flush,
bool defaultNan,
236uint16_t
vcvtFpDFpH(FPSCR &fpscr,
bool flush,
bool defaultNan,
240double vcvtFpHFpD(FPSCR &fpscr,
bool defaultNan,
bool ahp, uint16_t
op);
246 return bitsToFp((uint64_t)low | ((uint64_t)
high << 32), junk);
249static inline uint32_t
255static inline uint32_t
264 feraiseexcept(exceptions);
275 bool roundAwayFix =
false;
278 rmode = fegetround();
301 panic(
"Unsupported roundMode %d\n", roundMode);
304 __asm__ __volatile__(
"" :
"=m" (rmode) :
"m" (rmode));
307 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
310 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
313 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
317 int fpType = std::fpclassify(
val);
318 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
319 if (fpType == FP_NAN) {
323 }
else if (origVal !=
val) {
326 if (origVal -
val > 0.5)
328 else if (
val - origVal > 0.5)
340 if ( (
error > 0.5) ||
356 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
359 bool outOfRange =
false;
360 int64_t result = (int64_t)
val;
365 finalVal = (int16_t)
val;
366 }
else if (
width == 32) {
367 finalVal =(int32_t)
val;
368 }
else if (
width == 64) {
375 int64_t minVal = ~mask(
width-1);
376 if ((
double)
val < minVal) {
381 if ((
double)
val > maxVal) {
386 bool isNeg =
val < 0;
391 outOfRange = ((uint64_t) result >> (
width - 1)) !=
398 outOfRange |=
val < result;
400 finalVal = 1LL << (
width-1);
403 outOfRange |=
val > result;
413 exceptions &= ~FeInexact;
418 if ((
double)
val < 0) {
420 exceptions &= ~FeInexact;
428 exceptions &= ~FeInexact;
446 bool roundAwayFix =
false;
449 rmode = fegetround();
472 panic(
"Unsupported roundMode %d\n", roundMode);
475 __asm__ __volatile__(
"" :
"=m" (rmode) :
"m" (rmode));
476 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
479 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
482 __asm__ __volatile__(
"" :
"=m" (
val) :
"m" (
val));
486 exceptions &= ~FeInexact;
489 int fpType = std::fpclassify(
val);
490 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
491 if (fpType == FP_NAN) {
496 bool single = (
sizeof(T) ==
sizeof(
float));
497 uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
503 }
else if (origVal !=
val) {
506 if (origVal -
val > 0.5)
508 else if (
val - origVal > 0.5)
520 if ( (
error > 0.5) ||
538 fpType = std::fpclassify(
val);
539 if (fpType == FP_ZERO) {
540 bool single = (
sizeof(T) ==
sizeof(
float));
541 uint64_t
mask = single ? 0x80000000 : 0x8000000000000000ULL;
578 return (idx % 32) < 8;
585 OpClass __opClass,
bool _wide) :
659 const bool single = (
sizeof(T) ==
sizeof(
float));
671 bool inf1 = (std::fpclassify(
a) == FP_INFINITE);
672 bool inf2 = (std::fpclassify(
b) == FP_INFINITE);
673 bool zero1 = (std::fpclassify(
a) == FP_ZERO);
674 bool zero2 = (std::fpclassify(
b) == FP_ZERO);
675 if ((inf1 && zero2) || (zero1 && inf2)) {
712 if (
sizeof(T) ==
sizeof(
float))
713 result = fmaf(op1, op2, addend);
715 result = fma(op1, op2, addend);
721 uint64_t bitMask = 0x1ULL << ((
sizeof(T) * 8) - 1);
743 const bool single = (
sizeof(T) ==
sizeof(
float));
744 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
751 if (!std::signbit(
a) && std::signbit(
b))
764 return fpMaxNum<T>(
a,
b);
771 const bool single = (
sizeof(T) ==
sizeof(
float));
772 const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
779 if (std::signbit(
a) && !std::signbit(
b))
792 return fpMinNum<T>(
a,
b);
799 int fpClassA = std::fpclassify(
a);
800 int fpClassB = std::fpclassify(
b);
804 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
805 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
809 fpClassAxB = std::fpclassify(aXb);
810 if (fpClassAxB == FP_SUBNORMAL) {
814 return (3.0 - (
a *
b)) / 2.0;
821 int fpClassA = std::fpclassify(
a);
822 int fpClassB = std::fpclassify(
b);
826 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
827 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
831 fpClassAxB = std::fpclassify(aXb);
832 if (fpClassAxB == FP_SUBNORMAL) {
836 return 2.0 - (
a *
b);
843 int fpClassA = std::fpclassify(
a);
844 int fpClassB = std::fpclassify(
b);
848 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
849 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
853 fpClassAxB = std::fpclassify(aXb);
854 if (fpClassAxB == FP_SUBNORMAL) {
858 return (3.0 - (
a *
b)) / 2.0;
864 int fpClassA = std::fpclassify(
a);
865 int fpClassB = std::fpclassify(
b);
869 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
870 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
874 fpClassAxB = std::fpclassify(aXb);
875 if (fpClassAxB == FP_SUBNORMAL) {
879 return 2.0 - (
a *
b);
888 if (
a -
val == 0.5) {
889 if ( (((
int)
a) & 1) == 0 )
val += 1.0;
891 else if (
a -
val == -0.5) {
892 if ( (((
int)
a) & 1) == 0 )
val -= 1.0;
903 PredOp(mnem, _machInst, __opClass)
907 doOp(
float op1,
float op2)
const
909 panic(
"Unimplemented version of doOp called.\n");
915 panic(
"Unimplemented version of doOp called.\n");
919 doOp(
double op1,
double op2)
const
921 panic(
"Unimplemented version of doOp called.\n");
927 panic(
"Unimplemented version of doOp called.\n");
934 return bitsToFp((uint64_t)low | ((uint64_t)
high << 32), junk);
949 template <
class fpType>
951 processNans(FPSCR &fpscr,
bool &done,
bool defaultNan,
952 fpType op1, fpType op2)
const;
954 template <
class fpType>
956 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
957 fpType (*func)(fpType, fpType, fpType),
958 bool flush,
bool defaultNan, uint32_t
rMode)
const;
960 template <
class fpType>
962 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
963 fpType (*func)(fpType, fpType),
964 bool flush,
bool defaultNan, uint32_t
rMode)
const;
966 template <
class fpType>
968 unaryOp(FPSCR &fpscr, fpType op1,
969 fpType (*func)(fpType),
970 bool flush, uint32_t
rMode)
const;
976 if (
flags[IsLastMicroop]) {
978 }
else if (
flags[IsMicroop]) {
989 if (
flags[IsLastMicroop]) {
991 }
else if (
flags[IsMicroop]) {
1003 return unaryOp(fpscr,
x,sqrtf,fpscr.fz,fpscr.rMode);
1011 return unaryOp(fpscr,
x,sqrt,fpscr.fz,fpscr.rMode);
1026 FpOp(mnem, _machInst, __opClass),
1043 FpOp(mnem, _machInst, __opClass),
1060 FpOp(mnem, _machInst, __opClass),
dest(_dest),
op1(_op1)
1078 FpOp(mnem, _machInst, __opClass),
dest(_dest),
imm(_imm)
1097 FpOp(mnem, _machInst, __opClass),
dest(_dest),
op1(_op1),
imm(_imm)
1116 FpOp(mnem, _machInst, __opClass),
dest(_dest),
op1(_op1),
op2(_op2)
1137 FpOp(mnem, _machInst, __opClass),
dest(_dest),
op1(_op1),
op2(_op2),
1158 FpOp(mnem, _machInst, __opClass),
dest(_dest),
op1(_op1),
op2(_op2),
1180 FpOp(mnem, _machInst, __opClass),
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _op1, RegIndex _op2, ConditionCode _condCode, uint8_t _defCc)
FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, ConditionCode _condCode)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
void advancePC(ThreadContext *tc) const override
void advancePC(PCStateBase &pcState) const override
float fpSqrt(FPSCR fpscr, float x) const
uint32_t dblLow(double val) const
fpType unaryOp(FPSCR &fpscr, fpType op1, fpType(*func)(fpType), bool flush, uint32_t rMode) const
fpType processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const
uint32_t dblHi(double val) const
virtual double doOp(double op1) const
fpType ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, fpType(*func)(fpType, fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
virtual float doOp(float op1) const
fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType(*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
virtual double doOp(double op1, double op2) const
double fpSqrt(FPSCR fpscr, double x) const
FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
virtual float doOp(float op1, float op2) const
double dbl(uint32_t low, uint32_t high) const
FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, VfpMicroMode mode=VfpNotAMicroop)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
FpRegRegRegCondOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, ConditionCode _cond, VfpMicroMode mode=VfpNotAMicroop)
FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, uint64_t _imm, VfpMicroMode mode=VfpNotAMicroop)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, VfpMicroMode mode=VfpNotAMicroop)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, RegIndex _dest, RegIndex _op1, RegIndex _op2, RegIndex _op3, VfpMicroMode mode=VfpNotAMicroop)
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Base class for predicated macro-operations.
Base class for predicated integer operations.
static bool inScalarBank(RegIndex idx)
void nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2)
RegIndex addStride(RegIndex idx, unsigned stride)
VfpMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, bool _wide)
std::bitset< Num_Flags > flags
Flag values for this instruction.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual const PCStateBase & pcState() const =0
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
uint32_t unsignedRecipEstimate(uint32_t op)
static uint32_t fpToBits(float)
fpType fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
static uint32_t highFromDouble(double val)
static double fpMulD(double a, double b)
double vfpSFixedToFpD(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
T GEM5_NO_OPTIMIZE vfpFpRint(T val, bool exact, bool defaultNan, bool useRmode=true, VfpRoundingMode roundMode=VfpRoundZero)
static T fpRIntX(T a, FPSCR &fpscr)
static const uint32_t FpscrExcMask
static float fpRSqrtsS(float a, float b)
static float fpDivS(float a, float b)
double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
static T fpMulX(T a, T b)
static float fpRecpsS(float a, float b)
float vfpUFixedToFpS(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
uint32_t unsignedRSqrtEstimate(uint32_t op)
VfpSavedState prepFpState(uint32_t rMode)
float fixFpDFpSDest(FPSCR fpscr, double val)
static float fpMulS(float a, float b)
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
double fixFpSFpDDest(FPSCR fpscr, float val)
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
static T fpMinNum(T a, T b)
float vfpSFixedToFpS(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
static uint32_t lowFromDouble(double val)
fpType fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
double vfpUFixedToFpD(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Bitfield< 21, 20 > stride
static float bitsToFp(uint64_t, float)
static T fpMulAdd(T op1, T op2, T addend)
static double fpAddD(double a, double b)
static float fpSubS(float a, float b)
uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, double op)
static double fpSubD(double a, double b)
uint64_t GEM5_NO_OPTIMIZE vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool useRmode=true, VfpRoundingMode roundMode=VfpRoundZero, bool aarch64=false)
FPSCR fpStandardFPSCRValue(const FPSCR &fpscr)
static T fpRSqrts(T a, T b)
static bool isSnan(fpType val)
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op)
static void setFPExceptions(int exceptions)
static float fpAddS(float a, float b)
static double makeDouble(uint32_t low, uint32_t high)
static void vfpFlushToZero(FPSCR &fpscr, fpType &op)
static double fpDivD(double a, double b)
static void setVfpMicroFlags(VfpMicroMode mode, T &flags)
static T fpRecps(T a, T b)
float fprSqrtEstimate(FPSCR &fpscr, float op)
float fpRecipEstimate(FPSCR &fpscr, float op)
static T fpMaxNum(T a, T b)
FloatType fmin(FloatType a, FloatType b)
FloatType fmax(FloatType a, FloatType b)
Copyright (c) 2024 Arm Limited All rights reserved.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)