32#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
40#include "debug/GPUExec.hh"
41#include "debug/VEGA.hh"
219 gpu_dyn_inst->scalarAddr =
vaddr;
233 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
249 gpu_dyn_inst->scalarAddr =
vaddr;
279 T src0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
281 T origSrc0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
282 T origSrc1(gpuDynInst,
instData.VSRC1);
285 origSrc0_sdwa.read();
288 DPRINTF(VEGA,
"Handling %s SRC SDWA. SRC0: register v[%d], "
289 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, SRC0_SEXT: "
290 "%d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, SRC1_SEXT: %d, "
291 "SRC1_NEG: %d, SRC1_ABS: %d\n",
295 extData.iFmt_VOP_SDWA.SRC0_SEXT,
297 extData.iFmt_VOP_SDWA.SRC1_SEL,
298 extData.iFmt_VOP_SDWA.SRC1_SEXT,
299 extData.iFmt_VOP_SDWA.SRC1_NEG,
300 extData.iFmt_VOP_SDWA.SRC1_ABS);
311 T origVdst(gpuDynInst,
instData.VDST);
316 origVdst[lane] = vdst[lane];
326 T src0_dpp(gpuDynInst,
extData.iFmt_VOP_DPP.SRC0);
329 DPRINTF(VEGA,
"Handling %s SRC DPP. SRC0: register v[%d], "
330 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, SRC1_ABS: %d, "
331 "SRC1_NEG: %d, BC: %d, BANK_MASK: %d, ROW_MASK: %d\n",
343 template<
typename ConstT,
typename T>
357 fOpImpl(src0_sdwa, src1, vdst, wf);
360 T src0_dpp =
dppHelper(gpuDynInst, src1);
361 fOpImpl(src0_dpp, src1, vdst, wf);
369 ConstT const_src0(gpuDynInst,
instData.SRC0);
370 const_src0.readSrc();
373 vdst[lane] = const_src0[lane];
375 fOpImpl(vdst, src1, vdst, wf);
428 bool sign_ext,
bool neg,
bool abs)
434 int low_bit = 0, high_bit = 0;
458 rv =
bits(dword, high_bit, low_bit);
460 uint32_t sign_bit = 1 << high_bit;
463 if (std::is_integral_v<T> && std::is_unsigned_v<T>) {
464 panic_if(neg,
"SWDAB negation operation on unsigned type!\n");
465 panic_if(sign_ext,
"SWDAB sign extend on unsigned type!\n");
470 if (std::is_integral_v<T>) {
472 if ((rv & sign_bit) && std::is_signed_v<T>) {
473 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
474 rv = std::abs(
static_cast<long long>(rv)) & 0xFFFFFFFF;
478 rv = rv &
mask(high_bit);
483 if (std::is_integral_v<T>) {
486 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
491 rv = rv ^
mask(high_bit);
496 if (std::is_integral_v<T>) {
498 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
504 panic(
"SDWAB sign extend set for non-integral type!\n");
515 DPRINTF(VEGA,
"Handling %s SRC SDWA. SRC0: register %s[%d], "
516 "sDst s[%d], sDst type %s, SRC0_SEL: %d, SRC0_SEXT: %d "
517 "SRC0_NEG: %d, SRC0_ABS: %d, SRC1: register %s[%d], "
518 "SRC1_SEL: %d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: "
520 (
extData.iFmt_VOP_SDWAB.S0 ?
"s" :
"v"),
523 (
extData.iFmt_VOP_SDWAB.SD ?
"SGPR" :
"VCC"),
524 extData.iFmt_VOP_SDWAB.SRC0_SEL,
525 extData.iFmt_VOP_SDWAB.SRC0_SEXT,
526 extData.iFmt_VOP_SDWAB.SRC0_NEG,
527 extData.iFmt_VOP_SDWAB.SRC0_ABS,
528 (
extData.iFmt_VOP_SDWAB.S1 ?
"s" :
"v"),
530 extData.iFmt_VOP_SDWAB.SRC1_SEL,
531 extData.iFmt_VOP_SDWAB.SRC1_SEXT,
532 extData.iFmt_VOP_SDWAB.SRC1_NEG,
533 extData.iFmt_VOP_SDWAB.SRC1_ABS);
536 int src0_idx =
extData.iFmt_VOP_SDWAB.SRC0;
537 src0_idx += (
extData.iFmt_VOP_SDWAB.S0 == 0) ? 0x100 : 0;
541 src1_idx += (
extData.iFmt_VOP_SDWAB.S1 == 0) ? 0x100 : 0;
544 int sdst_idx = (
extData.iFmt_VOP_SDWAB.SD == 1) ?
562 if (gpuDynInst->wavefront()->execMask(lane)) {
564 extData.iFmt_VOP_SDWAB.SRC0_SEXT,
565 extData.iFmt_VOP_SDWAB.SRC0_NEG,
566 extData.iFmt_VOP_SDWAB.SRC0_ABS);
568 extData.iFmt_VOP_SDWAB.SRC1_SEXT,
569 extData.iFmt_VOP_SDWAB.SRC1_NEG,
570 extData.iFmt_VOP_SDWAB.SRC1_ABS);
622 if constexpr (std::is_floating_point_v<T>) {
623 if (omod == 1)
return val * T(2.0f);
624 if (omod == 2)
return val * T(4.0f);
625 if (omod == 3)
return val / T(2.0f);
627 assert(std::is_integral_v<T>);
628 if (omod == 1)
return val * T(2);
629 if (omod == 2)
return val * T(4);
630 if (omod == 3)
return val / T(2);
690 T (*fOpImpl)(T, T,
bool))
707 T upper_val = fOpImpl(
word<T>(S0[lane], opHi, negHi, 0),
708 word<T>(S1[lane], opHi, negHi, 1),
710 T lower_val = fOpImpl(
word<T>(S0[lane], opLo, negLo, 0),
711 word<T>(S1[lane], opLo, negLo, 1),
715 *
reinterpret_cast<uint16_t*
>(&upper_val);
717 *
reinterpret_cast<uint16_t*
>(&lower_val);
719 D[lane] = upper_raw << 16 | lower_raw;
728 T (*fOpImpl)(T, T, T,
bool))
747 T upper_val = fOpImpl(
word<T>(S0[lane], opHi, negHi, 0),
748 word<T>(S1[lane], opHi, negHi, 1),
749 word<T>(S2[lane], opHi, negHi, 2),
751 T lower_val = fOpImpl(
word<T>(S0[lane], opLo, negLo, 0),
752 word<T>(S1[lane], opLo, negLo, 1),
753 word<T>(S2[lane], opLo, negLo, 2),
757 *
reinterpret_cast<uint16_t*
>(&upper_val);
759 *
reinterpret_cast<uint16_t*
>(&lower_val);
761 D[lane] = upper_raw << 16 | lower_raw;
770 uint32_t (*fOpImpl)(uint32_t, uint32_t, uint32_t,
bool))
804 uint32_t dword1 = (dword1h << 16) | dword1l;
805 uint32_t dword2 = (dword2h << 16) | dword2l;
811 D[lane] = fOpImpl(dword1, dword2, S2[lane], clamp);
823 word(uint32_t
data,
int opSel,
int neg,
int opSelBit)
826 static_assert(
sizeof(T) == 2);
828 bool select =
bits(opSel, opSelBit, opSelBit);
829 uint16_t raw = select ?
bits(
data, 31, 16)
834 bool negate =
bits(neg, opSelBit, opSelBit);
839 return *
reinterpret_cast<T*
>(&raw);
883 if (gpuDynInst->exec_mask[lane]) {
886 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
899 if (gpuDynInst->exec_mask[lane]) {
901 for (
int i = 0;
i < N; ++
i) {
903 gpuDynInst->d_data))[lane * N +
i]
918 if (gpuDynInst->exec_mask[lane]) {
919 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
920 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
922 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
924 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
937 if (gpuDynInst->exec_mask[lane]) {
940 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
952 if (gpuDynInst->exec_mask[lane]) {
954 for (
int i = 0;
i < N; ++
i) {
958 gpuDynInst->d_data))[lane * N +
i]);
971 if (gpuDynInst->exec_mask[lane]) {
972 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
973 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
975 gpuDynInst->d_data))[lane * 2]);
977 gpuDynInst->d_data))[lane * 2 + 1]);
989 if (gpuDynInst->exec_mask[lane]) {
993 gpuDynInst->makeAtomicOpFunctor<T>(
994 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
995 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]);
997 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
1006 Wavefront *wf = gpuDynInst->wavefront();
1010 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
1033 template<
typename T>
1040 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1043 gpuDynInst->exec_mask = old_exec_mask;
1054 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1057 gpuDynInst->exec_mask = old_exec_mask;
1060 template<
typename T>
1067 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1070 gpuDynInst->exec_mask = old_exec_mask;
1080 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1083 gpuDynInst->exec_mask = old_exec_mask;
1086 template<
typename T>
1093 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1096 gpuDynInst->exec_mask = old_exec_mask;
1103 gpuDynInst->resetEntireStatusVector();
1104 gpuDynInst->setStatusVector(0, 1);
1105 RequestPtr req = std::make_shared<Request>(0, 0, 0,
1106 gpuDynInst->computeUnit()->
1108 gpuDynInst->wfDynId);
1109 gpuDynInst->setRequestFlags(req);
1110 gpuDynInst->computeUnit()->
1134 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
1137 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
1144 Addr buffer_offset = 0;
1147 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
1156 if (gpuDynInst->exec_mask[lane]) {
1157 vaddr = base_addr + s_offset.rawData();
1163 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
1165 buf_off = v_off[lane] + inst_offset;
1170 Addr idx_msb = buf_idx / idx_stride;
1171 Addr idx_lsb = buf_idx % idx_stride;
1172 Addr off_msb = buf_off / elem_size;
1173 Addr off_lsb = buf_off % elem_size;
1174 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
1175 "idx_stride = %llx, elem_size = %llx, "
1176 "idx_msb = %llx, idx_lsb = %llx, "
1177 "off_msb = %llx, off_lsb = %llx\n",
1178 lane, idx_stride, elem_size, idx_msb, idx_lsb,
1181 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
1182 * idx_stride + idx_lsb * elem_size + off_lsb;
1184 buffer_offset = buf_off +
stride * buf_idx;
1196 if (buffer_offset >=
1198 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
1199 "lane = %d, buffer_offset = %llx, "
1200 "const_stride = %llx, "
1201 "const_num_records = %llx\n",
1202 lane, buf_off +
stride * buf_idx,
1212 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
1213 "lane = %d, offset = %llx, "
1215 "const_num_records = %llx\n",
1216 lane, buf_off, buf_idx,
1223 vaddr += buffer_offset;
1225 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
1226 "vaddr = %llx, base_addr = %llx, "
1227 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
1230 gpuDynInst->addr.at(lane) =
vaddr;
1308 template<
typename T>
1312 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1314 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1317 static_assert(
sizeof(T) <= 4);
1319 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1320 Wavefront *wf = gpuDynInst->wavefront();
1322 if (gpuDynInst->exec_mask[lane]) {
1324 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
1335 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1337 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1339 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1340 Wavefront *wf = gpuDynInst->wavefront();
1342 if (gpuDynInst->exec_mask[lane]) {
1344 for (
int i = 0;
i < N; ++
i) {
1346 gpuDynInst->d_data))[lane * N +
i]
1355 template<
typename T>
1359 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1361 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1364 static_assert(
sizeof(T) <= 4);
1366 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1367 Wavefront *wf = gpuDynInst->wavefront();
1369 if (gpuDynInst->exec_mask[lane]) {
1372 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
1382 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1384 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1387 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1388 Wavefront *wf = gpuDynInst->wavefront();
1390 if (gpuDynInst->exec_mask[lane]) {
1392 for (
int i = 0;
i < N; ++
i) {
1396 gpuDynInst->d_data))[lane * N +
i]);
1403 template<
typename T>
1409 assert(gpuDynInst->executedAs() != enums::SC_PRIVATE);
1411 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1413 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1414 Wavefront *wf = gpuDynInst->wavefront();
1416 if (gpuDynInst->exec_mask[lane]) {
1419 gpuDynInst->makeAtomicOpFunctor<T>(
1420 &(
reinterpret_cast<T*
>(
1421 gpuDynInst->a_data))[lane],
1422 &(
reinterpret_cast<T*
>(
1423 gpuDynInst->x_data))[lane]);
1426 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
1428 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
1468 }
else if (saddr != 0x7f) {
1489 [[maybe_unused]]
int elemSize;
1490 [[maybe_unused]]
auto staticInst =
1491 gpuDynInst->staticInstruction();
1492 if (gpuDynInst->isLoad()) {
1493 elemSize = staticInst->getOperandSize(2);
1495 assert(gpuDynInst->isStore());
1496 elemSize = staticInst->getOperandSize(1);
1501 assert((
offset % elemSize) == 0);
1502 assert((swizzleOffset % 4) == 0);
1506 if (gpuDynInst->exec_mask[lane]) {
1507 swizzleOffset +=
instData.SVE ? voffset[lane] : 0;
1508 gpuDynInst->addr.at(lane) = flat_scratch_addr
1527 if (gpuDynInst->exec_mask[lane]) {
1531 gpuDynInst->addr.at(lane) = flat_scratch_addr
1538 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1540 gpuDynInst->staticInstruction()->executed_as =
1544 gpuDynInst->staticInstruction()->executed_as =
1546 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1553 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL &&
isFlat())
1555 gpuDynInst->computeUnit()->globalMemoryPipe
1556 .issueRequest(gpuDynInst);
1557 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1559 gpuDynInst->computeUnit()->localMemoryPipe
1560 .issueRequest(gpuDynInst);
1562 assert(gpuDynInst->executedAs() == enums::SC_PRIVATE);
1563 gpuDynInst->computeUnit()->globalMemoryPipe
1564 .issueRequest(gpuDynInst);
1574 template<
typename RegT,
typename LaneT,
int CmpRegOffset = 0>
1578 Wavefront *wf = gpuDynInst->wavefront();
1580 if (gpuDynInst->exec_mask.none()) {
1591 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1592 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1595 RegT cmp(gpuDynInst,
extData.DATA + CmpRegOffset);
1598 if constexpr (CmpRegOffset) {
1605 if (gpuDynInst->exec_mask[lane]) {
1606 if constexpr (CmpRegOffset) {
1608 gpuDynInst->x_data))[lane] =
data[lane];
1610 gpuDynInst->a_data))[lane] = cmp[lane];
1612 (
reinterpret_cast<LaneT*
>(gpuDynInst->a_data))[lane]
1623 template<
typename RegT,
typename LaneT>
1628 RegT vdst(gpuDynInst,
extData.VDST);
1631 if (gpuDynInst->exec_mask[lane]) {
1632 vdst[lane] = (
reinterpret_cast<LaneT*
>(
1633 gpuDynInst->d_data))[lane];
1650 static_assert(N > 1);
1654 for (
int dword = 0; dword < N; ++dword) {
1657 gpuDynInst->d_data))[lane * N + dword];
1661 for (
int dword = 0; dword < N; ++dword) {
1663 gpuDynInst->d_data))[lane * N + dword] =
1664 data[lane * N + dword];
1672 return (
extData.SADDR != 0x7f);
1695 if (gpuDynInst->exec_mask[lane]) {
1697 gpuDynInst->addr.at(lane) =
1708 if (gpuDynInst->exec_mask[lane]) {
1709 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
1722 return ((
offset / 4) * 4 * 64) + (
offset % 4) + (tid * 4);
1728 return gpuDynInst->computeUnit()->shader->getScratchBase();
const std::string & opcode() const
bool isFlatGlobal() const
const std::string _opcode
bool isFlatScratch() const
void write(const uint32_t index, const T value)
a write operation
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
T read(const uint32_t index)
a read operation
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
Inst_DS(InFmt_DS *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
void generateFlatDisassembly()
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
void initFlatOperandInfo()
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void swizzleData(GPUDynInstPtr gpuDynInst)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
Addr readFlatScratch(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
VecElemI32 swizzleAddr(VecElemI32 offset, int tid)
void generateGlobalScratchDisassembly()
void initOperandInfo() override
void initGlobalScratchOperandInfo()
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
int instSize() const override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
T dppHelper(GPUDynInstPtr gpuDynInst, T &src1)
void initOperandInfo() override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
int instSize() const override
T sdwaSrcHelper(GPUDynInstPtr gpuDynInst, T &src1)
void sdwaDstHelper(GPUDynInstPtr gpuDynInst, T &vdst)
bool hasSecondDword(InFmt_VOP2 *)
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
Inst_VOP3A(InFmt_VOP3A *, const std::string &opcode, bool sgpr_dst)
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
T omodModifier(T val, unsigned omod)
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
Inst_VOP3B(InFmt_VOP3B *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOP3P_MAI *)
void generateDisassembly() override
void initOperandInfo() override
Inst_VOP3P_MAI(InFmt_VOP3P_MAI *, const std::string &opcode)
int instSize() const override
InFmt_VOP3P_MAI_1 extData
void dotHelper(GPUDynInstPtr gpuDynInst, uint32_t(*fOpImpl)(uint32_t, uint32_t, uint32_t, bool))
void initOperandInfo() override
T word(uint32_t data, int opSel, int neg, int opSelBit)
void generateDisassembly() override
int instSize() const override
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, T, bool))
bool hasSecondDword(InFmt_VOP3P *)
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, bool))
Inst_VOP3P(InFmt_VOP3P *, const std::string &opcode)
bool hasSecondDword(InFmt_VOPC *)
uint32_t sdwabSelect(uint32_t dword, const SDWASelVals sel, bool sign_ext, bool neg, bool abs)
void generateDisassembly() override
void sdwabHelper(GPUDynInstPtr gpuDynInst, int(*cmpFunc)(T, T))
void initOperandInfo() override
int instSize() const override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
VEGAGPUStaticInst(const std::string &opcode)
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
void decVMemInstsIssued()
void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
void decLGKMInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
#define panic(...)
This implements a cprintf based panic() function.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Bitfield< 21, 20 > stride
classes that represnt vector/scalar operands in VEGA ISA.
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
VecOperand< VecElemU32, false > VecOperandU32
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
VecOperand< VecElemU32, true > ConstVecOperandU32
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
const int NumVecElemPerVecReg(64)
VecOperand< VecElemU64, true > ConstVecOperandU64
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
void initScratchReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.