89 DPRINTF(VEGA,
"Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
90 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
91 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
110 vdst[lane] = src_dpp[lane];
116 vdst[lane] = src[lane];
160 sdst = src[src_lane];
196 std::frexp(src[lane],&exp);
199 }
else if (
std::isinf(src[lane]) || exp > 30) {
200 if (std::signbit(src[lane])) {
201 vdst[lane] = INT_MIN;
203 vdst[lane] = INT_MAX;
350 std::frexp(src[lane],&exp);
354 if (std::signbit(src[lane])) {
357 vdst[lane] = UINT_MAX;
359 }
else if (exp > 31) {
360 vdst[lane] = UINT_MAX;
401 std::frexp(src[lane],&exp);
404 }
else if (
std::isinf(src[lane]) || exp > 30) {
405 if (std::signbit(src[lane])) {
406 vdst[lane] = INT_MIN;
408 vdst[lane] = INT_MAX;
470 float tmp = src[lane];
473 vdst[lane] = (out.
data >> 16);
510 vdst[lane] = float(tmp);
546 vdst[lane] = (
VecElemI32)std::floor(src[lane] + 0.5);
582 vdst[lane] = (
VecElemI32)std::floor(src[lane]);
851 std::frexp(src[lane],&exp);
855 if (std::signbit(src[lane])) {
858 vdst[lane] = UINT_MAX;
860 }
else if (exp > 31) {
861 vdst[lane] = UINT_MAX;
934 vdst[lane] = std::trunc(src[lane]);
970 vdst[lane] = std::ceil(src[lane]);
1030 Wavefront *wf = gpuDynInst->wavefront();
1041 vdst[lane] = std::floor(src[lane]);
1065 Wavefront *wf = gpuDynInst->wavefront();
1077 vdst[lane] = std::modf(src[lane], &int_part);
1101 Wavefront *wf = gpuDynInst->wavefront();
1112 vdst[lane] = std::trunc(src[lane]);
1137 Wavefront *wf = gpuDynInst->wavefront();
1148 vdst[lane] = std::ceil(src[lane]);
1172 Wavefront *wf = gpuDynInst->wavefront();
1208 Wavefront *wf = gpuDynInst->wavefront();
1219 vdst[lane] = std::floor(src[lane]);
1243 Wavefront *wf = gpuDynInst->wavefront();
1254 vdst[lane] = std::pow(2.0, src[lane]);
1278 Wavefront *wf = gpuDynInst->wavefront();
1289 vdst[lane] = std::log2(src[lane]);
1313 Wavefront *wf = gpuDynInst->wavefront();
1324 vdst[lane] = 1.0 / src[lane];
1350 Wavefront *wf = gpuDynInst->wavefront();
1361 vdst[lane] = 1.0 / src[lane];
1385 Wavefront *wf = gpuDynInst->wavefront();
1396 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1420 Wavefront *wf = gpuDynInst->wavefront();
1431 if (std::fpclassify(src[lane]) == FP_ZERO) {
1432 vdst[lane] = +INFINITY;
1436 if (std::signbit(src[lane])) {
1442 vdst[lane] = 1.0 / src[lane];
1467 Wavefront *wf = gpuDynInst->wavefront();
1478 if (std::fpclassify(src[lane]) == FP_ZERO) {
1479 vdst[lane] = +INFINITY;
1483 && !std::signbit(src[lane])) {
1485 }
else if (std::signbit(src[lane])) {
1488 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1513 Wavefront *wf = gpuDynInst->wavefront();
1524 vdst[lane] = std::sqrt(src[lane]);
1548 Wavefront *wf = gpuDynInst->wavefront();
1559 vdst[lane] = std::sqrt(src[lane]);
1585 Wavefront *wf = gpuDynInst->wavefront();
1598 if (src[lane] < -256.0 || src[lane] > 256.0) {
1601 vdst[lane] = std::sin(src[lane] * 2.0 * pi.
rawData());
1628 Wavefront *wf = gpuDynInst->wavefront();
1641 if (src[lane] < -256.0 || src[lane] > 256.0) {
1644 vdst[lane] = std::cos(src[lane] * 2.0 * pi.
rawData());
1669 Wavefront *wf = gpuDynInst->wavefront();
1680 vdst[lane] = ~src[lane];
1704 Wavefront *wf = gpuDynInst->wavefront();
1739 Wavefront *wf = gpuDynInst->wavefront();
1774 Wavefront *wf = gpuDynInst->wavefront();
1809 Wavefront *wf = gpuDynInst->wavefront();
1830 :
Inst_VOP1(iFmt,
"v_frexp_exp_i32_f64")
1845 Wavefront *wf = gpuDynInst->wavefront();
1860 std::frexp(src[lane], &exp);
1886 Wavefront *wf = gpuDynInst->wavefront();
1898 vdst[lane] = src[lane];
1901 vdst[lane] = std::frexp(src[lane], &exp);
1926 Wavefront *wf = gpuDynInst->wavefront();
1938 vdst[lane] = std::modf(src[lane], &int_part);
1948 :
Inst_VOP1(iFmt,
"v_frexp_exp_i32_f32")
1967 Wavefront *wf = gpuDynInst->wavefront();
1982 std::frexp(src[lane], &exp);
2013 Wavefront *wf = gpuDynInst->wavefront();
2025 vdst[lane] = src[lane];
2028 vdst[lane] = std::frexp(src[lane], &exp);
2072 Wavefront *wf = gpuDynInst->wavefront();
2083 vdst[lane] = src[lane];
2319 :
Inst_VOP1(iFmt,
"v_frexp_exp_i16_f16")
2505 Wavefront *wf = gpuDynInst->wavefront();
2516 vdst[lane] = std::pow(2.0, src[lane]);
2540 Wavefront *wf = gpuDynInst->wavefront();
2551 vdst[lane] = std::log2(src[lane]);
2573 Wavefront *wf = gpuDynInst->wavefront();
2586 vdst[lane] = src[lane];
const std::string _opcode
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_ACCVGPR_MOV_B32()
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *)
~Inst_VOP1__V_BFREV_B32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *)
Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CLREXCP(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_COS_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_COS_F32(InFmt_VOP1 *)
Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F16_F32()
Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F16_I16()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F16_U16()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_F16()
Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_F64()
Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_I32()
Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_U32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_UBYTE0()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_UBYTE1()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *)
Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_UBYTE2()
~Inst_VOP1__V_CVT_F32_UBYTE3()
Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_F32()
Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_I32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_U32()
Inst_VOP1__V_CVT_FLR_I32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_FLR_I32_F32()
~Inst_VOP1__V_CVT_I16_F16()
Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_I32_F32()
Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_I32_F64()
Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *)
Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_OFF_F32_I4()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_RPI_I32_F32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_RPI_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_U16_F16()
Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *)
Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_U32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_U32_F64()
Inst_VOP1__V_EXP_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_EXP_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *)
~Inst_VOP1__V_EXP_LEGACY_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FLOOR_F16()
Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FLOOR_F32()
Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *)
Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FLOOR_F64()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FRACT_F16()
Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FRACT_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FRACT_F64()
Inst_VOP1__V_FREXP_EXP_I16_F16(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_EXP_I16_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FREXP_EXP_I32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_EXP_I32_F32(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_EXP_I32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_EXP_I32_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F16()
Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F64()
Inst_VOP1__V_LOG_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_LOG_F32(InFmt_VOP1 *)
Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *)
~Inst_VOP1__V_LOG_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_MOV_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_MOV_B64(InFmt_VOP1 *)
~Inst_VOP1__V_MOV_FED_B32()
Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_NOP(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_NOT_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F64(InFmt_VOP1 *)
~Inst_VOP1__V_RCP_IFLAG_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_READFIRSTLANE_B32(InFmt_VOP1 *)
~Inst_VOP1__V_READFIRSTLANE_B32()
~Inst_VOP1__V_RNDNE_F16()
Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_RNDNE_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *)
Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_RNDNE_F64()
Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SIN_F16(InFmt_VOP1 *)
Inst_VOP1__V_SIN_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *)
Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F16()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F64()
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr int findLsbSet(uint64_t val)
Returns the bit position of the LSB that is set in the input That function will either use a builtin ...
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
ScalarRegI32 findFirstOne(T val)
ScalarRegI32 findFirstOneMsb(T val)
T roundNearestEven(T val)
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)