34#include "debug/VEGA.hh"
73 =
bits(vcc.
rawData(), lane) ? src1[lane] : src0[lane];
109 DPRINTF(VEGA,
"Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
110 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
111 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
126 vdst[lane] = src0_dpp[lane] + src1[lane];
132 vdst[lane] = src0[lane] + src1[lane];
168 vdst[lane] = src0[lane] - src1[lane];
203 vdst[lane] = src1[lane] - src0[lane];
237 vdst[lane] = src0[lane] * src1[lane];
274 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
275 std::fpclassify(src0[lane]) == FP_ZERO) &&
276 !std::signbit(src0[lane])) {
279 }
else if (!std::signbit(src1[lane])) {
284 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
285 std::fpclassify(src0[lane]) == FP_ZERO) &&
286 std::signbit(src0[lane])) {
289 }
else if (std::signbit(src1[lane])) {
295 !std::signbit(src0[lane])) {
296 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
297 std::fpclassify(src1[lane]) == FP_ZERO) {
299 }
else if (!std::signbit(src1[lane])) {
300 vdst[lane] = +INFINITY;
302 vdst[lane] = -INFINITY;
305 std::signbit(src0[lane])) {
306 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
307 std::fpclassify(src1[lane]) == FP_ZERO) {
309 }
else if (std::signbit(src1[lane])) {
310 vdst[lane] = +INFINITY;
312 vdst[lane] = -INFINITY;
315 vdst[lane] = src0[lane] * src1[lane];
388 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
414 if (wf->execMask(lane)) {
415 vdst[lane] =
bits(src0[lane], 23, 0) *
416 bits(src1[lane], 23, 0);
452 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
486 vdst[lane] = std::fmin(src0[lane], src1[lane]);
520 vdst[lane] = std::fmax(src0[lane], src1[lane]);
553 vdst[lane] = std::min(src0[lane], src1[lane]);
586 vdst[lane] = std::max(src0[lane], src1[lane]);
619 vdst[lane] = std::min(src0[lane], src1[lane]);
652 vdst[lane] = std::max(src0[lane], src1[lane]);
687 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
722 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
763 origSrc0_sdwa.
read();
766 DPRINTF(VEGA,
"Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
767 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
768 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
769 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
787 vdst[lane] = src1[lane] <<
bits(src0_sdwa[lane], 4, 0);
788 origVdst[lane] = vdst[lane];
796 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
833 DPRINTF(VEGA,
"Handling V_AND_B32 SRC DPP. SRC0: register v[%d], "
834 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
835 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
850 vdst[lane] = src0_dpp[lane] & src1[lane];
856 vdst[lane] = src0[lane] & src1[lane];
898 origSrc0_sdwa.
read();
901 DPRINTF(VEGA,
"Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
902 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
903 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
904 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
922 vdst[lane] = src0_sdwa[lane] | src1[lane];
923 origVdst[lane] = vdst[lane];
931 vdst[lane] = src0[lane] | src1[lane];
966 vdst[lane] = src0[lane] ^ src1[lane];
1005 DPRINTF(VEGA,
"Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
1006 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
1007 "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
1022 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
1029 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1058 Wavefront *wf = gpuDynInst->wavefront();
1069 vdst[lane] = std::fma(src0[lane],
k, src1[lane]);
1097 Wavefront *wf = gpuDynInst->wavefront();
1108 vdst[lane] = std::fma(src0[lane], src1[lane],
k);
1135 Wavefront *wf = gpuDynInst->wavefront();
1153 origSrc0_sdwa.
read();
1156 DPRINTF(VEGA,
"Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
1157 "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
1158 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
1159 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
1177 vdst[lane] = src0_sdwa[lane] + src1[lane];
1178 origVdst[lane] = vdst[lane];
1180 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1188 vdst[lane] = src0[lane] + src1[lane];
1190 + (
VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
1219 Wavefront *wf = gpuDynInst->wavefront();
1230 vdst[lane] = src0[lane] - src1[lane];
1231 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1259 Wavefront *wf = gpuDynInst->wavefront();
1270 vdst[lane] = src1[lane] - src0[lane];
1271 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1301 Wavefront *wf = gpuDynInst->wavefront();
1313 vdst[lane] = src0[lane] + src1[lane]
1318 >= 0x100000000 ? 1 : 0);
1348 Wavefront *wf = gpuDynInst->wavefront();
1361 = src0[lane] - src1[lane] -
bits(vcc.
rawData(), lane);
1363 > src0[lane] ? 1 : 0);
1394 Wavefront *wf = gpuDynInst->wavefront();
1407 = src1[lane] - src0[lane] -
bits(vcc.
rawData(), lane);
1409 > src1[lane] ? 1 : 0);
1593 Wavefront *wf = gpuDynInst->wavefront();
1603 vdst[lane] = src0[lane] + src1[lane];
1627 Wavefront *wf = gpuDynInst->wavefront();
1637 vdst[lane] = src0[lane] - src1[lane];
1662 Wavefront *wf = gpuDynInst->wavefront();
1672 vdst[lane] = src1[lane] - src0[lane];
1696 Wavefront *wf = gpuDynInst->wavefront();
1706 vdst[lane] = src0[lane] * src1[lane];
1730 Wavefront *wf = gpuDynInst->wavefront();
1740 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1765 Wavefront *wf = gpuDynInst->wavefront();
1775 vdst[lane] = src1[lane] >> src0[lane];
1800 Wavefront *wf = gpuDynInst->wavefront();
1810 vdst[lane] = src1[lane] >> src0[lane];
1877 Wavefront *wf = gpuDynInst->wavefront();
1887 vdst[lane] = std::max(src0[lane], src1[lane]);
1910 Wavefront *wf = gpuDynInst->wavefront();
1920 vdst[lane] = std::max(src0[lane], src1[lane]);
1943 Wavefront *wf = gpuDynInst->wavefront();
1953 vdst[lane] = std::min(src0[lane], src1[lane]);
1976 Wavefront *wf = gpuDynInst->wavefront();
1986 vdst[lane] = std::min(src0[lane], src1[lane]);
2029 Wavefront *wf = gpuDynInst->wavefront();
2046 origSrc0_sdwa.
read();
2049 DPRINTF(VEGA,
"Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
2050 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
2051 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
2052 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
2070 vdst[lane] = src0_sdwa[lane] + src1[lane];
2071 origVdst[lane] = vdst[lane];
2079 vdst[lane] = src0[lane] + src1[lane];
2103 Wavefront *wf = gpuDynInst->wavefront();
2113 vdst[lane] = src0[lane] - src1[lane];
2136 Wavefront *wf = gpuDynInst->wavefront();
2146 vdst[lane] = src1[lane] - src0[lane];
2169 Wavefront *wf = gpuDynInst->wavefront();
2180 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
2203 Wavefront *wf = gpuDynInst->wavefront();
2214 vdst[lane] = ~(src0[lane] ^ src1[lane]);
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADDC_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ADD_U32(InFmt_VOP2 *)
Inst_VOP2__V_AND_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_ASHRREV_I16()
Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *)
~Inst_VOP2__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *)
~Inst_VOP2__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *)
~Inst_VOP2__V_LSHLREV_B32()
Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B16()
Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_LSHRREV_B32()
Inst_VOP2__V_MAC_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAC_F32(InFmt_VOP2 *)
Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADAK_F16()
Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *)
~Inst_VOP2__V_MADAK_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F16()
Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MADMK_F32()
Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_F32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_I32(InFmt_VOP2 *)
Inst_VOP2__V_MAX_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MAX_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_F16(InFmt_VOP2 *)
Inst_VOP2__V_MIN_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_I32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MIN_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_F16(InFmt_VOP2 *)
Inst_VOP2__V_MUL_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_I32_I24()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_HI_U32_U24()
Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *)
~Inst_VOP2__V_MUL_I32_I24()
~Inst_VOP2__V_MUL_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *)
Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_MUL_U32_U24()
Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBBREV_CO_U32()
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUBB_CO_U32()
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_CO_U32()
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_F16()
Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_F32()
Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *)
~Inst_VOP2__V_SUBREV_U16()
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
~Inst_VOP2__V_SUBREV_U32()
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *)
~Inst_VOP2__V_SUB_CO_U32()
Inst_VOP2__V_SUB_F16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_F32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U16(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_SUB_U32(InFmt_VOP2 *)
Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
Inst_VOP2__V_XOR_B32(InFmt_VOP2 *)
void execute(GPUDynInstPtr) override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
constexpr unsigned NumVecElemPerVecReg
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)