34#include "debug/VEGA.hh"
76 =
bits(vcc.
rawData(), lane) ? src1[lane] : src0[lane];
114 DPRINTF(VEGA,
"Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
115 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
116 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
117 "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
124 extData.iFmt_VOP_DPP.BANK_MASK,
125 extData.iFmt_VOP_DPP.ROW_MASK);
131 vdst[lane] = src0_dpp[lane] + src1[lane];
137 vdst[lane] = src0[lane] + src1[lane];
176 vdst[lane] = src0[lane] - src1[lane];
214 vdst[lane] = src1[lane] - src0[lane];
251 vdst[lane] = src0[lane] * src1[lane];
291 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
292 std::fpclassify(src0[lane]) == FP_ZERO) &&
293 !std::signbit(src0[lane])) {
296 }
else if (!std::signbit(src1[lane])) {
301 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
302 std::fpclassify(src0[lane]) == FP_ZERO) &&
303 std::signbit(src0[lane])) {
306 }
else if (std::signbit(src1[lane])) {
312 !std::signbit(src0[lane])) {
313 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
314 std::fpclassify(src1[lane]) == FP_ZERO) {
316 }
else if (!std::signbit(src1[lane])) {
317 vdst[lane] = +INFINITY;
319 vdst[lane] = -INFINITY;
322 std::signbit(src0[lane])) {
323 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
324 std::fpclassify(src1[lane]) == FP_ZERO) {
326 }
else if (std::signbit(src1[lane])) {
327 vdst[lane] = +INFINITY;
329 vdst[lane] = -INFINITY;
332 vdst[lane] = src0[lane] * src1[lane];
411 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
437 if (wf->execMask(lane)) {
438 vdst[lane] =
bits(src0[lane], 23, 0) *
439 bits(src1[lane], 23, 0);
478 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
515 vdst[lane] = std::fmin(src0[lane], src1[lane]);
552 vdst[lane] = std::fmax(src0[lane], src1[lane]);
588 vdst[lane] = std::min(src0[lane], src1[lane]);
624 vdst[lane] = std::max(src0[lane], src1[lane]);
660 vdst[lane] = std::min(src0[lane], src1[lane]);
696 vdst[lane] = std::max(src0[lane], src1[lane]);
724 if (wf->execMask(lane)) {
725 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
764 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
807 origSrc0_sdwa.
read();
810 DPRINTF(VEGA,
"Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
811 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
812 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
813 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
817 extData.iFmt_VOP_SDWA.SRC0_SEL,
818 extData.iFmt_VOP_SDWA.SRC0_SEXT,
819 extData.iFmt_VOP_SDWA.SRC0_NEG,
820 extData.iFmt_VOP_SDWA.SRC0_ABS,
821 extData.iFmt_VOP_SDWA.SRC1_SEL,
822 extData.iFmt_VOP_SDWA.SRC1_SEXT,
823 extData.iFmt_VOP_SDWA.SRC1_NEG,
824 extData.iFmt_VOP_SDWA.SRC1_ABS);
831 vdst[lane] = src1[lane] <<
bits(src0_sdwa[lane], 4, 0);
832 origVdst[lane] = vdst[lane];
840 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
868 if (wf->execMask(lane)) {
869 vdst[lane] = src0[lane] & src1[lane];
913 origSrc0_sdwa.
read();
916 DPRINTF(VEGA,
"Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
917 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
918 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
919 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
923 extData.iFmt_VOP_SDWA.SRC0_SEL,
924 extData.iFmt_VOP_SDWA.SRC0_SEXT,
925 extData.iFmt_VOP_SDWA.SRC0_NEG,
926 extData.iFmt_VOP_SDWA.SRC0_ABS,
927 extData.iFmt_VOP_SDWA.SRC1_SEL,
928 extData.iFmt_VOP_SDWA.SRC1_SEXT,
929 extData.iFmt_VOP_SDWA.SRC1_NEG,
930 extData.iFmt_VOP_SDWA.SRC1_ABS);
937 vdst[lane] = src0_sdwa[lane] | src1[lane];
938 origVdst[lane] = vdst[lane];
946 vdst[lane] = src0[lane] | src1[lane];
984 vdst[lane] = src0[lane] ^ src1[lane];
1005 Wavefront *wf = gpuDynInst->wavefront();
1023 src0d[lane] = src0_dpp[lane];
1027 src0d[lane] = src0[lane];
1034 a1.data = uint16_t(
bits(src0d[lane], 15, 0));
1035 a2.
data = uint16_t(
bits(src0d[lane], 31, 16));
1036 b1.data = uint16_t(
bits(src1[lane], 15, 0));
1037 b2.data = uint16_t(
bits(src1[lane], 31, 16));
1039 vdst[lane] += float(
a1) * float(
b1);
1040 vdst[lane] += float(a2) * float(
b2);
1066 Wavefront *wf = gpuDynInst->wavefront();
1081 DPRINTF(VEGA,
"Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
1082 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
1083 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
1084 "BANK_MASK: %d, ROW_MASK: %d\n",
extData.iFmt_VOP_DPP.SRC0,
1085 extData.iFmt_VOP_DPP.DPP_CTRL,
1086 extData.iFmt_VOP_DPP.SRC0_ABS,
1087 extData.iFmt_VOP_DPP.SRC0_NEG,
1088 extData.iFmt_VOP_DPP.SRC1_ABS,
1089 extData.iFmt_VOP_DPP.SRC1_NEG,
1091 extData.iFmt_VOP_DPP.BANK_MASK,
1092 extData.iFmt_VOP_DPP.ROW_MASK);
1098 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
1105 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1134 Wavefront *wf = gpuDynInst->wavefront();
1148 vdst[lane] = std::fma(src0[lane],
k, src1[lane]);
1176 Wavefront *wf = gpuDynInst->wavefront();
1190 vdst[lane] = std::fma(src0[lane], src1[lane],
k);
1217 Wavefront *wf = gpuDynInst->wavefront();
1237 origSrc0_sdwa.
read();
1240 DPRINTF(VEGA,
"Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
1241 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
1242 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
1243 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
1247 extData.iFmt_VOP_SDWA.SRC0_SEL,
1248 extData.iFmt_VOP_SDWA.SRC0_SEXT,
1249 extData.iFmt_VOP_SDWA.SRC0_NEG,
1250 extData.iFmt_VOP_SDWA.SRC0_ABS,
1251 extData.iFmt_VOP_SDWA.SRC1_SEL,
1252 extData.iFmt_VOP_SDWA.SRC1_SEXT,
1253 extData.iFmt_VOP_SDWA.SRC1_NEG,
1254 extData.iFmt_VOP_SDWA.SRC1_ABS);
1261 vdst[lane] = src0_sdwa[lane] + src1[lane];
1262 origVdst[lane] = vdst[lane];
1264 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1272 vdst[lane] = src0[lane] + src1[lane];
1274 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1303 Wavefront *wf = gpuDynInst->wavefront();
1317 vdst[lane] = src0[lane] - src1[lane];
1318 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1346 Wavefront *wf = gpuDynInst->wavefront();
1360 vdst[lane] = src1[lane] - src0[lane];
1361 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1391 Wavefront *wf = gpuDynInst->wavefront();
1406 vdst[lane] = src0[lane] + src1[lane]
1411 >= 0x100000000 ? 1 : 0);
1441 Wavefront *wf = gpuDynInst->wavefront();
1457 = src0[lane] - src1[lane] -
bits(vcc.
rawData(), lane);
1459 > src0[lane] ? 1 : 0);
1490 Wavefront *wf = gpuDynInst->wavefront();
1506 = src1[lane] - src0[lane] -
bits(vcc.
rawData(), lane);
1508 > src1[lane] ? 1 : 0);
1692 Wavefront *wf = gpuDynInst->wavefront();
1705 vdst[lane] = src0[lane] + src1[lane];
1729 Wavefront *wf = gpuDynInst->wavefront();
1742 vdst[lane] = src0[lane] - src1[lane];
1767 Wavefront *wf = gpuDynInst->wavefront();
1780 vdst[lane] = src1[lane] - src0[lane];
1804 Wavefront *wf = gpuDynInst->wavefront();
1817 vdst[lane] = src0[lane] * src1[lane];
1844 if (wf->execMask(lane)) {
1845 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1871 Wavefront *wf = gpuDynInst->wavefront();
1884 vdst[lane] = src1[lane] >> src0[lane];
1909 Wavefront *wf = gpuDynInst->wavefront();
1922 vdst[lane] = src1[lane] >> src0[lane];
1989 Wavefront *wf = gpuDynInst->wavefront();
2002 vdst[lane] = std::max(src0[lane], src1[lane]);
2025 Wavefront *wf = gpuDynInst->wavefront();
2038 vdst[lane] = std::max(src0[lane], src1[lane]);
2061 Wavefront *wf = gpuDynInst->wavefront();
2074 vdst[lane] = std::min(src0[lane], src1[lane]);
2097 Wavefront *wf = gpuDynInst->wavefront();
2110 vdst[lane] = std::min(src0[lane], src1[lane]);
2153 Wavefront *wf = gpuDynInst->wavefront();
2172 origSrc0_sdwa.
read();
2175 DPRINTF(VEGA,
"Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
2176 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
2177 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
2178 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
2182 extData.iFmt_VOP_SDWA.SRC0_SEL,
2183 extData.iFmt_VOP_SDWA.SRC0_SEXT,
2184 extData.iFmt_VOP_SDWA.SRC0_NEG,
2185 extData.iFmt_VOP_SDWA.SRC0_ABS,
2186 extData.iFmt_VOP_SDWA.SRC1_SEL,
2187 extData.iFmt_VOP_SDWA.SRC1_SEXT,
2188 extData.iFmt_VOP_SDWA.SRC1_NEG,
2189 extData.iFmt_VOP_SDWA.SRC1_ABS);
2196 vdst[lane] = src0_sdwa[lane] + src1[lane];
2197 origVdst[lane] = vdst[lane];
2205 vdst[lane] = src0[lane] + src1[lane];
2229 Wavefront *wf = gpuDynInst->wavefront();
2242 vdst[lane] = src0[lane] - src1[lane];
2265 Wavefront *wf = gpuDynInst->wavefront();
2278 vdst[lane] = src1[lane] - src0[lane];
2301 Wavefront *wf = gpuDynInst->wavefront();
2315 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2338 Wavefront *wf = gpuDynInst->wavefront();
2352 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2375 Wavefront *wf = gpuDynInst->wavefront();
2389 vdst[lane] = ~(src0[lane] ^ src1[lane]);
const std::string _opcode
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADDC_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U32(InFmt_VOP2 *)
Inst_VOP2__V_AND_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_ASHRREV_I16()
Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *)
~Inst_VOP2__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_CNDMASK_B32()
Inst_VOP2__V_DOT2C_F32_BF16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_DOT2C_F32_BF16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_FMAC_F64(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B32()
Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B16()
Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B32()
Inst_VOP2__V_MAC_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADAK_F16()
Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *)
~Inst_VOP2__V_MADAK_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F16()
Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F32()
Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_I32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_F16(InFmt_VOP2 *)
Inst_VOP2__V_MIN_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_F16(InFmt_VOP2 *)
Inst_VOP2__V_MUL_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_I32_I24()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_U32_U24()
Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *)
~Inst_VOP2__V_MUL_I32_I24()
~Inst_VOP2__V_MUL_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *)
Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_U32_U24()
Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBBREV_CO_U32()
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUBB_CO_U32()
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_CO_U32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_F16()
Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_F32()
Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_U16()
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUB_CO_U32()
Inst_VOP2__V_SUB_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U32(InFmt_VOP2 *)
Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_XOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
mxfp< fp16_e8m7_info > mxbfloat16
classes that represnt vector/scalar operands in VEGA ISA.
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
VecOperand< VecElemF32, true > ConstVecOperandF32
VecOperand< VecElemU32, false > VecOperandU32
VecOperand< VecElemF64, true > ConstVecOperandF64
VecOperand< VecElemI16, false, 1 > VecOperandI16
VecOperand< VecElemI32, true > ConstVecOperandI32
VecOperand< VecElemU32, true > ConstVecOperandU32
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
VecOperand< VecElemU16, false, 1 > VecOperandU16
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
const int NumVecElemPerVecReg(64)
VecOperand< VecElemI32, false > VecOperandI32
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
VecOperand< VecElemF64, false > VecOperandF64
VecOperand< VecElemF32, false > VecOperandF32
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)