34#include "debug/VEGA.hh"
76 =
bits(vcc.
rawData(), lane) ? src1[lane] : src0[lane];
114 DPRINTF(VEGA,
"Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
115 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
116 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
131 vdst[lane] = src0_dpp[lane] + src1[lane];
137 vdst[lane] = src0[lane] + src1[lane];
176 vdst[lane] = src0[lane] - src1[lane];
214 vdst[lane] = src1[lane] - src0[lane];
251 vdst[lane] = src0[lane] * src1[lane];
291 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
292 std::fpclassify(src0[lane]) == FP_ZERO) &&
293 !std::signbit(src0[lane])) {
296 }
else if (!std::signbit(src1[lane])) {
301 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
302 std::fpclassify(src0[lane]) == FP_ZERO) &&
303 std::signbit(src0[lane])) {
306 }
else if (std::signbit(src1[lane])) {
312 !std::signbit(src0[lane])) {
313 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
314 std::fpclassify(src1[lane]) == FP_ZERO) {
316 }
else if (!std::signbit(src1[lane])) {
317 vdst[lane] = +INFINITY;
319 vdst[lane] = -INFINITY;
322 std::signbit(src0[lane])) {
323 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
324 std::fpclassify(src1[lane]) == FP_ZERO) {
326 }
else if (std::signbit(src1[lane])) {
327 vdst[lane] = +INFINITY;
329 vdst[lane] = -INFINITY;
332 vdst[lane] = src0[lane] * src1[lane];
369 vdst[lane] = sext<24>(
bits(src0[lane], 23, 0))
370 * sext<24>(
bits(src1[lane], 23, 0));
411 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
437 if (wf->execMask(lane)) {
438 vdst[lane] =
bits(src0[lane], 23, 0) *
439 bits(src1[lane], 23, 0);
444 vop2Helper<ConstVecOperandU32, VecOperandU32>(gpuDynInst, opImpl);
478 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
515 vdst[lane] = std::fmin(src0[lane], src1[lane]);
552 vdst[lane] = std::fmax(src0[lane], src1[lane]);
588 vdst[lane] = std::min(src0[lane], src1[lane]);
624 vdst[lane] = std::max(src0[lane], src1[lane]);
660 vdst[lane] = std::min(src0[lane], src1[lane]);
696 vdst[lane] = std::max(src0[lane], src1[lane]);
734 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
772 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
815 origSrc0_sdwa.
read();
818 DPRINTF(VEGA,
"Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
819 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
820 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
821 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
839 vdst[lane] = src1[lane] <<
bits(src0_sdwa[lane], 4, 0);
840 origVdst[lane] = vdst[lane];
848 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
887 DPRINTF(VEGA,
"Handling V_AND_B32 SRC DPP. SRC0: register v[%d], "
888 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
889 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
904 vdst[lane] = src0_dpp[lane] & src1[lane];
910 vdst[lane] = src0[lane] & src1[lane];
954 origSrc0_sdwa.
read();
957 DPRINTF(VEGA,
"Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
958 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
959 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
960 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
978 vdst[lane] = src0_sdwa[lane] | src1[lane];
979 origVdst[lane] = vdst[lane];
987 vdst[lane] = src0[lane] | src1[lane];
1012 Wavefront *wf = gpuDynInst->wavefront();
1025 vdst[lane] = src0[lane] ^ src1[lane];
1051 Wavefront *wf = gpuDynInst->wavefront();
1066 DPRINTF(VEGA,
"Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
1067 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
1068 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
1083 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
1090 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1119 Wavefront *wf = gpuDynInst->wavefront();
1133 vdst[lane] = std::fma(src0[lane],
k, src1[lane]);
1161 Wavefront *wf = gpuDynInst->wavefront();
1175 vdst[lane] = std::fma(src0[lane], src1[lane],
k);
1202 Wavefront *wf = gpuDynInst->wavefront();
1222 origSrc0_sdwa.
read();
1225 DPRINTF(VEGA,
"Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
1226 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
1227 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
1228 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
1246 vdst[lane] = src0_sdwa[lane] + src1[lane];
1247 origVdst[lane] = vdst[lane];
1249 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1257 vdst[lane] = src0[lane] + src1[lane];
1259 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1288 Wavefront *wf = gpuDynInst->wavefront();
1302 vdst[lane] = src0[lane] - src1[lane];
1303 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1331 Wavefront *wf = gpuDynInst->wavefront();
1345 vdst[lane] = src1[lane] - src0[lane];
1346 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1376 Wavefront *wf = gpuDynInst->wavefront();
1391 vdst[lane] = src0[lane] + src1[lane]
1396 >= 0x100000000 ? 1 : 0);
1426 Wavefront *wf = gpuDynInst->wavefront();
1442 = src0[lane] - src1[lane] -
bits(vcc.
rawData(), lane);
1444 > src0[lane] ? 1 : 0);
1475 Wavefront *wf = gpuDynInst->wavefront();
1491 = src1[lane] - src0[lane] -
bits(vcc.
rawData(), lane);
1493 > src1[lane] ? 1 : 0);
1677 Wavefront *wf = gpuDynInst->wavefront();
1690 vdst[lane] = src0[lane] + src1[lane];
1714 Wavefront *wf = gpuDynInst->wavefront();
1727 vdst[lane] = src0[lane] - src1[lane];
1752 Wavefront *wf = gpuDynInst->wavefront();
1765 vdst[lane] = src1[lane] - src0[lane];
1789 Wavefront *wf = gpuDynInst->wavefront();
1802 vdst[lane] = src0[lane] * src1[lane];
1826 Wavefront *wf = gpuDynInst->wavefront();
1839 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1864 Wavefront *wf = gpuDynInst->wavefront();
1877 vdst[lane] = src1[lane] >> src0[lane];
1902 Wavefront *wf = gpuDynInst->wavefront();
1915 vdst[lane] = src1[lane] >> src0[lane];
1982 Wavefront *wf = gpuDynInst->wavefront();
1995 vdst[lane] = std::max(src0[lane], src1[lane]);
2018 Wavefront *wf = gpuDynInst->wavefront();
2031 vdst[lane] = std::max(src0[lane], src1[lane]);
2054 Wavefront *wf = gpuDynInst->wavefront();
2067 vdst[lane] = std::min(src0[lane], src1[lane]);
2090 Wavefront *wf = gpuDynInst->wavefront();
2103 vdst[lane] = std::min(src0[lane], src1[lane]);
2146 Wavefront *wf = gpuDynInst->wavefront();
2165 origSrc0_sdwa.
read();
2168 DPRINTF(VEGA,
"Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
2169 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
2170 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
2171 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
2189 vdst[lane] = src0_sdwa[lane] + src1[lane];
2190 origVdst[lane] = vdst[lane];
2198 vdst[lane] = src0[lane] + src1[lane];
2222 Wavefront *wf = gpuDynInst->wavefront();
2235 vdst[lane] = src0[lane] - src1[lane];
2258 Wavefront *wf = gpuDynInst->wavefront();
2271 vdst[lane] = src1[lane] - src0[lane];
2294 Wavefront *wf = gpuDynInst->wavefront();
2308 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2331 Wavefront *wf = gpuDynInst->wavefront();
2345 vdst[lane] = ~(src0[lane] ^ src1[lane]);
const std::string _opcode
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADDC_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U32(InFmt_VOP2 *)
Inst_VOP2__V_AND_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_ASHRREV_I16()
Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *)
~Inst_VOP2__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *)
~Inst_VOP2__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B32()
Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B16()
Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B32()
Inst_VOP2__V_MAC_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADAK_F16()
Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *)
~Inst_VOP2__V_MADAK_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F16()
Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F32()
Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_I32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_F16(InFmt_VOP2 *)
Inst_VOP2__V_MIN_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_F16(InFmt_VOP2 *)
Inst_VOP2__V_MUL_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_I32_I24()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_U32_U24()
Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *)
~Inst_VOP2__V_MUL_I32_I24()
~Inst_VOP2__V_MUL_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *)
Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_U32_U24()
Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBBREV_CO_U32()
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUBB_CO_U32()
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_CO_U32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_F16()
Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_F32()
Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_U16()
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUB_CO_U32()
Inst_VOP2__V_SUB_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U32(InFmt_VOP2 *)
Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_XOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)