34#include "debug/VEGA.hh"
76 =
bits(vcc.
rawData(), lane) ? src1[lane] : src0[lane];
114 DPRINTF(VEGA,
"Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
115 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
116 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
117 "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
124 extData.iFmt_VOP_DPP.BANK_MASK,
125 extData.iFmt_VOP_DPP.ROW_MASK);
131 vdst[lane] = src0_dpp[lane] + src1[lane];
137 vdst[lane] = src0[lane] + src1[lane];
176 vdst[lane] = src0[lane] - src1[lane];
214 vdst[lane] = src1[lane] - src0[lane];
251 vdst[lane] = src0[lane] * src1[lane];
291 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
292 std::fpclassify(src0[lane]) == FP_ZERO) &&
293 !std::signbit(src0[lane])) {
296 }
else if (!std::signbit(src1[lane])) {
301 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
302 std::fpclassify(src0[lane]) == FP_ZERO) &&
303 std::signbit(src0[lane])) {
306 }
else if (std::signbit(src1[lane])) {
312 !std::signbit(src0[lane])) {
313 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
314 std::fpclassify(src1[lane]) == FP_ZERO) {
316 }
else if (!std::signbit(src1[lane])) {
317 vdst[lane] = +INFINITY;
319 vdst[lane] = -INFINITY;
322 std::signbit(src0[lane])) {
323 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
324 std::fpclassify(src1[lane]) == FP_ZERO) {
326 }
else if (std::signbit(src1[lane])) {
327 vdst[lane] = +INFINITY;
329 vdst[lane] = -INFINITY;
332 vdst[lane] = src0[lane] * src1[lane];
411 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
437 if (wf->execMask(lane)) {
438 vdst[lane] =
bits(src0[lane], 23, 0) *
439 bits(src1[lane], 23, 0);
478 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
515 vdst[lane] = std::fmin(src0[lane], src1[lane]);
552 vdst[lane] = std::fmax(src0[lane], src1[lane]);
588 vdst[lane] = std::min(src0[lane], src1[lane]);
624 vdst[lane] = std::max(src0[lane], src1[lane]);
660 vdst[lane] = std::min(src0[lane], src1[lane]);
696 vdst[lane] = std::max(src0[lane], src1[lane]);
734 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
772 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
815 origSrc0_sdwa.
read();
818 DPRINTF(VEGA,
"Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
819 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
820 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
821 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
825 extData.iFmt_VOP_SDWA.SRC0_SEL,
826 extData.iFmt_VOP_SDWA.SRC0_SEXT,
827 extData.iFmt_VOP_SDWA.SRC0_NEG,
828 extData.iFmt_VOP_SDWA.SRC0_ABS,
829 extData.iFmt_VOP_SDWA.SRC1_SEL,
830 extData.iFmt_VOP_SDWA.SRC1_SEXT,
831 extData.iFmt_VOP_SDWA.SRC1_NEG,
832 extData.iFmt_VOP_SDWA.SRC1_ABS);
839 vdst[lane] = src1[lane] <<
bits(src0_sdwa[lane], 4, 0);
840 origVdst[lane] = vdst[lane];
848 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
887 DPRINTF(VEGA,
"Handling V_AND_B32 SRC DPP. SRC0: register v[%d], "
888 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
889 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
890 "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
897 extData.iFmt_VOP_DPP.BANK_MASK,
898 extData.iFmt_VOP_DPP.ROW_MASK);
904 vdst[lane] = src0_dpp[lane] & src1[lane];
910 vdst[lane] = src0[lane] & src1[lane];
954 origSrc0_sdwa.
read();
957 DPRINTF(VEGA,
"Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
958 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
959 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
960 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
964 extData.iFmt_VOP_SDWA.SRC0_SEL,
965 extData.iFmt_VOP_SDWA.SRC0_SEXT,
966 extData.iFmt_VOP_SDWA.SRC0_NEG,
967 extData.iFmt_VOP_SDWA.SRC0_ABS,
968 extData.iFmt_VOP_SDWA.SRC1_SEL,
969 extData.iFmt_VOP_SDWA.SRC1_SEXT,
970 extData.iFmt_VOP_SDWA.SRC1_NEG,
971 extData.iFmt_VOP_SDWA.SRC1_ABS);
978 vdst[lane] = src0_sdwa[lane] | src1[lane];
979 origVdst[lane] = vdst[lane];
987 vdst[lane] = src0[lane] | src1[lane];
1012 Wavefront *wf = gpuDynInst->wavefront();
1025 vdst[lane] = src0[lane] ^ src1[lane];
1051 Wavefront *wf = gpuDynInst->wavefront();
1066 DPRINTF(VEGA,
"Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
1067 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
1068 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
1069 "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
1070 extData.iFmt_VOP_DPP.DPP_CTRL,
1071 extData.iFmt_VOP_DPP.SRC0_ABS,
1072 extData.iFmt_VOP_DPP.SRC0_NEG,
1073 extData.iFmt_VOP_DPP.SRC1_ABS,
1074 extData.iFmt_VOP_DPP.SRC1_NEG,
1076 extData.iFmt_VOP_DPP.BANK_MASK,
1077 extData.iFmt_VOP_DPP.ROW_MASK);
1083 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
1090 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1119 Wavefront *wf = gpuDynInst->wavefront();
1133 vdst[lane] = std::fma(src0[lane],
k, src1[lane]);
1161 Wavefront *wf = gpuDynInst->wavefront();
1175 vdst[lane] = std::fma(src0[lane], src1[lane],
k);
1202 Wavefront *wf = gpuDynInst->wavefront();
1222 origSrc0_sdwa.
read();
1225 DPRINTF(VEGA,
"Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
1226 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
1227 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
1228 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
1232 extData.iFmt_VOP_SDWA.SRC0_SEL,
1233 extData.iFmt_VOP_SDWA.SRC0_SEXT,
1234 extData.iFmt_VOP_SDWA.SRC0_NEG,
1235 extData.iFmt_VOP_SDWA.SRC0_ABS,
1236 extData.iFmt_VOP_SDWA.SRC1_SEL,
1237 extData.iFmt_VOP_SDWA.SRC1_SEXT,
1238 extData.iFmt_VOP_SDWA.SRC1_NEG,
1239 extData.iFmt_VOP_SDWA.SRC1_ABS);
1246 vdst[lane] = src0_sdwa[lane] + src1[lane];
1247 origVdst[lane] = vdst[lane];
1249 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1257 vdst[lane] = src0[lane] + src1[lane];
1259 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1288 Wavefront *wf = gpuDynInst->wavefront();
1302 vdst[lane] = src0[lane] - src1[lane];
1303 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1331 Wavefront *wf = gpuDynInst->wavefront();
1345 vdst[lane] = src1[lane] - src0[lane];
1346 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1376 Wavefront *wf = gpuDynInst->wavefront();
1391 vdst[lane] = src0[lane] + src1[lane]
1396 >= 0x100000000 ? 1 : 0);
1426 Wavefront *wf = gpuDynInst->wavefront();
1442 = src0[lane] - src1[lane] -
bits(vcc.
rawData(), lane);
1444 > src0[lane] ? 1 : 0);
1475 Wavefront *wf = gpuDynInst->wavefront();
1491 = src1[lane] - src0[lane] -
bits(vcc.
rawData(), lane);
1493 > src1[lane] ? 1 : 0);
1677 Wavefront *wf = gpuDynInst->wavefront();
1690 vdst[lane] = src0[lane] + src1[lane];
1714 Wavefront *wf = gpuDynInst->wavefront();
1727 vdst[lane] = src0[lane] - src1[lane];
1752 Wavefront *wf = gpuDynInst->wavefront();
1765 vdst[lane] = src1[lane] - src0[lane];
1789 Wavefront *wf = gpuDynInst->wavefront();
1802 vdst[lane] = src0[lane] * src1[lane];
1826 Wavefront *wf = gpuDynInst->wavefront();
1839 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1864 Wavefront *wf = gpuDynInst->wavefront();
1877 vdst[lane] = src1[lane] >> src0[lane];
1902 Wavefront *wf = gpuDynInst->wavefront();
1915 vdst[lane] = src1[lane] >> src0[lane];
1982 Wavefront *wf = gpuDynInst->wavefront();
1995 vdst[lane] = std::max(src0[lane], src1[lane]);
2018 Wavefront *wf = gpuDynInst->wavefront();
2031 vdst[lane] = std::max(src0[lane], src1[lane]);
2054 Wavefront *wf = gpuDynInst->wavefront();
2067 vdst[lane] = std::min(src0[lane], src1[lane]);
2090 Wavefront *wf = gpuDynInst->wavefront();
2103 vdst[lane] = std::min(src0[lane], src1[lane]);
2146 Wavefront *wf = gpuDynInst->wavefront();
2165 origSrc0_sdwa.
read();
2168 DPRINTF(VEGA,
"Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
2169 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
2170 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
2171 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
2175 extData.iFmt_VOP_SDWA.SRC0_SEL,
2176 extData.iFmt_VOP_SDWA.SRC0_SEXT,
2177 extData.iFmt_VOP_SDWA.SRC0_NEG,
2178 extData.iFmt_VOP_SDWA.SRC0_ABS,
2179 extData.iFmt_VOP_SDWA.SRC1_SEL,
2180 extData.iFmt_VOP_SDWA.SRC1_SEXT,
2181 extData.iFmt_VOP_SDWA.SRC1_NEG,
2182 extData.iFmt_VOP_SDWA.SRC1_ABS);
2189 vdst[lane] = src0_sdwa[lane] + src1[lane];
2190 origVdst[lane] = vdst[lane];
2198 vdst[lane] = src0[lane] + src1[lane];
2222 Wavefront *wf = gpuDynInst->wavefront();
2235 vdst[lane] = src0[lane] - src1[lane];
2258 Wavefront *wf = gpuDynInst->wavefront();
2271 vdst[lane] = src1[lane] - src0[lane];
2294 Wavefront *wf = gpuDynInst->wavefront();
2308 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2331 Wavefront *wf = gpuDynInst->wavefront();
2345 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2368 Wavefront *wf = gpuDynInst->wavefront();
2382 vdst[lane] = ~(src0[lane] ^ src1[lane]);
const std::string _opcode
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADDC_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U32(InFmt_VOP2 *)
Inst_VOP2__V_AND_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_ASHRREV_I16()
Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *)
~Inst_VOP2__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_FMAC_F64(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B32()
Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B16()
Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B32()
Inst_VOP2__V_MAC_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADAK_F16()
Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *)
~Inst_VOP2__V_MADAK_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F16()
Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F32()
Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_I32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_F16(InFmt_VOP2 *)
Inst_VOP2__V_MIN_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_F16(InFmt_VOP2 *)
Inst_VOP2__V_MUL_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_I32_I24()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_U32_U24()
Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *)
~Inst_VOP2__V_MUL_I32_I24()
~Inst_VOP2__V_MUL_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *)
Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_U32_U24()
Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBBREV_CO_U32()
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUBB_CO_U32()
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_CO_U32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_F16()
Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_F32()
Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_U16()
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUB_CO_U32()
Inst_VOP2__V_SUB_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U32(InFmt_VOP2 *)
Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_XOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
classes that represnt vector/scalar operands in VEGA ISA.
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
VecOperand< VecElemF32, true > ConstVecOperandF32
VecOperand< VecElemU32, false > VecOperandU32
VecOperand< VecElemF64, true > ConstVecOperandF64
VecOperand< VecElemI16, false, 1 > VecOperandI16
VecOperand< VecElemI32, true > ConstVecOperandI32
VecOperand< VecElemU32, true > ConstVecOperandU32
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
VecOperand< VecElemU16, false, 1 > VecOperandU16
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
const int NumVecElemPerVecReg(64)
VecOperand< VecElemI32, false > VecOperandI32
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
VecOperand< VecElemF64, false > VecOperandF64
VecOperand< VecElemF32, false > VecOperandF32
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)