87 DPRINTF(VEGA,
"Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
88 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
89 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
108 vdst[lane] = src_dpp[lane];
114 vdst[lane] = src[lane];
155 sdst = src[src_lane];
188 std::frexp(src[lane],&exp);
191 }
else if (
std::isinf(src[lane]) || exp > 30) {
192 if (std::signbit(src[lane])) {
193 vdst[lane] = INT_MIN;
195 vdst[lane] = INT_MAX;
330 std::frexp(src[lane],&exp);
334 if (std::signbit(src[lane])) {
337 vdst[lane] = UINT_MAX;
339 }
else if (exp > 31) {
340 vdst[lane] = UINT_MAX;
378 std::frexp(src[lane],&exp);
381 }
else if (
std::isinf(src[lane]) || exp > 30) {
382 if (std::signbit(src[lane])) {
383 vdst[lane] = INT_MIN;
385 vdst[lane] = INT_MAX;
444 float tmp = src[lane];
447 vdst[lane] = (out.
data >> 16);
481 vdst[lane] = float(tmp);
514 vdst[lane] = (
VecElemI32)std::floor(src[lane] + 0.5);
547 vdst[lane] = (
VecElemI32)std::floor(src[lane]);
795 std::frexp(src[lane],&exp);
799 if (std::signbit(src[lane])) {
802 vdst[lane] = UINT_MAX;
804 }
else if (exp > 31) {
805 vdst[lane] = UINT_MAX;
872 vdst[lane] = std::trunc(src[lane]);
905 vdst[lane] = std::ceil(src[lane]);
970 vdst[lane] = std::floor(src[lane]);
1003 vdst[lane] = std::modf(src[lane], &int_part);
1027 Wavefront *wf = gpuDynInst->wavefront();
1035 vdst[lane] = std::trunc(src[lane]);
1060 Wavefront *wf = gpuDynInst->wavefront();
1068 vdst[lane] = std::ceil(src[lane]);
1092 Wavefront *wf = gpuDynInst->wavefront();
1125 Wavefront *wf = gpuDynInst->wavefront();
1133 vdst[lane] = std::floor(src[lane]);
1157 Wavefront *wf = gpuDynInst->wavefront();
1165 vdst[lane] = std::pow(2.0, src[lane]);
1189 Wavefront *wf = gpuDynInst->wavefront();
1197 vdst[lane] = std::log2(src[lane]);
1221 Wavefront *wf = gpuDynInst->wavefront();
1229 vdst[lane] = 1.0 / src[lane];
1255 Wavefront *wf = gpuDynInst->wavefront();
1263 vdst[lane] = 1.0 / src[lane];
1287 Wavefront *wf = gpuDynInst->wavefront();
1295 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1319 Wavefront *wf = gpuDynInst->wavefront();
1327 if (std::fpclassify(src[lane]) == FP_ZERO) {
1328 vdst[lane] = +INFINITY;
1332 if (std::signbit(src[lane])) {
1338 vdst[lane] = 1.0 / src[lane];
1363 Wavefront *wf = gpuDynInst->wavefront();
1371 if (std::fpclassify(src[lane]) == FP_ZERO) {
1372 vdst[lane] = +INFINITY;
1376 && !std::signbit(src[lane])) {
1378 }
else if (std::signbit(src[lane])) {
1381 vdst[lane] = 1.0 / std::sqrt(src[lane]);
1406 Wavefront *wf = gpuDynInst->wavefront();
1414 vdst[lane] = std::sqrt(src[lane]);
1438 Wavefront *wf = gpuDynInst->wavefront();
1446 vdst[lane] = std::sqrt(src[lane]);
1472 Wavefront *wf = gpuDynInst->wavefront();
1482 if (src[lane] < -256.0 || src[lane] > 256.0) {
1485 vdst[lane] = std::sin(src[lane] * 2.0 * pi.
rawData());
1512 Wavefront *wf = gpuDynInst->wavefront();
1522 if (src[lane] < -256.0 || src[lane] > 256.0) {
1525 vdst[lane] = std::cos(src[lane] * 2.0 * pi.
rawData());
1550 Wavefront *wf = gpuDynInst->wavefront();
1558 vdst[lane] = ~src[lane];
1582 Wavefront *wf = gpuDynInst->wavefront();
1614 Wavefront *wf = gpuDynInst->wavefront();
1646 Wavefront *wf = gpuDynInst->wavefront();
1678 Wavefront *wf = gpuDynInst->wavefront();
1696 :
Inst_VOP1(iFmt,
"v_frexp_exp_i32_f64")
1711 Wavefront *wf = gpuDynInst->wavefront();
1723 std::frexp(src[lane], &exp);
1749 Wavefront *wf = gpuDynInst->wavefront();
1758 vdst[lane] = src[lane];
1761 vdst[lane] = std::frexp(src[lane], &exp);
1786 Wavefront *wf = gpuDynInst->wavefront();
1795 vdst[lane] = std::modf(src[lane], &int_part);
1805 :
Inst_VOP1(iFmt,
"v_frexp_exp_i32_f32")
1824 Wavefront *wf = gpuDynInst->wavefront();
1836 std::frexp(src[lane], &exp);
1867 Wavefront *wf = gpuDynInst->wavefront();
1876 vdst[lane] = src[lane];
1879 vdst[lane] = std::frexp(src[lane], &exp);
1923 Wavefront *wf = gpuDynInst->wavefront();
1934 vdst[lane] = src[lane];
2170 :
Inst_VOP1(iFmt,
"v_frexp_exp_i16_f16")
2356 Wavefront *wf = gpuDynInst->wavefront();
2364 vdst[lane] = std::pow(2.0, src[lane]);
2388 Wavefront *wf = gpuDynInst->wavefront();
2396 vdst[lane] = std::log2(src[lane]);
2418 Wavefront *wf = gpuDynInst->wavefront();
2428 vdst[lane] = src[lane];
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_ACCVGPR_MOV_B32()
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *)
~Inst_VOP1__V_BFREV_B32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *)
Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CLREXCP(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_COS_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_COS_F32(InFmt_VOP1 *)
Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F16_F32()
Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F16_I16()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F16_U16()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_F16()
Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_F64()
Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_I32()
Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_U32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F32_UBYTE0()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_UBYTE1()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *)
Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_F32_UBYTE2()
~Inst_VOP1__V_CVT_F32_UBYTE3()
Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_F32()
Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_I32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_F64_U32()
Inst_VOP1__V_CVT_FLR_I32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_FLR_I32_F32()
~Inst_VOP1__V_CVT_I16_F16()
Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_I32_F32()
Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_I32_F64()
Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *)
Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_OFF_F32_I4()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_RPI_I32_F32(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_RPI_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_U16_F16()
Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *)
Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_CVT_U32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *)
~Inst_VOP1__V_CVT_U32_F64()
Inst_VOP1__V_EXP_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_EXP_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *)
~Inst_VOP1__V_EXP_LEGACY_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FLOOR_F16()
Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FLOOR_F32()
Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *)
Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FLOOR_F64()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FRACT_F16()
Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FRACT_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FRACT_F64()
Inst_VOP1__V_FREXP_EXP_I16_F16(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_EXP_I16_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_FREXP_EXP_I32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_EXP_I32_F32(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_EXP_I32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_EXP_I32_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F16()
Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *)
~Inst_VOP1__V_FREXP_MANT_F64()
Inst_VOP1__V_LOG_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_LOG_F32(InFmt_VOP1 *)
Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *)
~Inst_VOP1__V_LOG_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_MOV_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_MOV_B64(InFmt_VOP1 *)
~Inst_VOP1__V_MOV_FED_B32()
Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_NOP(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_NOT_B32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_F64(InFmt_VOP1 *)
~Inst_VOP1__V_RCP_IFLAG_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_READFIRSTLANE_B32(InFmt_VOP1 *)
~Inst_VOP1__V_READFIRSTLANE_B32()
~Inst_VOP1__V_RNDNE_F16()
Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_RNDNE_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *)
Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
~Inst_VOP1__V_RNDNE_F64()
Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SIN_F16(InFmt_VOP1 *)
Inst_VOP1__V_SIN_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *)
Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *)
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F16()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F32()
void execute(GPUDynInstPtr) override
Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *)
~Inst_VOP1__V_TRUNC_F64()
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr int findLsbSet(uint64_t val)
Returns the bit position of the LSB that is set in the input That function will either use a builtin ...
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
ScalarRegI32 findFirstOne(T val)
ScalarRegI32 findFirstOneMsb(T val)
T roundNearestEven(T val)
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)