32#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
40#include "debug/GPUExec.hh"
41#include "debug/VEGA.hh"
196 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
207 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
219 gpu_dyn_inst->scalarAddr =
vaddr;
233 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
249 gpu_dyn_inst->scalarAddr =
vaddr;
285 origSrc0_sdwa.read();
288 DPRINTF(VEGA,
"Handling %s SRC SDWA. SRC0: register v[%d], "
289 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, SRC0_SEXT: "
290 "%d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, SRC1_SEXT: %d, "
291 "SRC1_NEG: %d, SRC1_ABS: %d\n",
316 origVdst[lane] = vdst[lane];
329 DPRINTF(VEGA,
"Handling %s SRC DPP. SRC0: register v[%d], "
330 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, SRC1_ABS: %d, "
331 "SRC1_NEG: %d, BC: %d, BANK_MASK: %d, ROW_MASK: %d\n",
343 template<
typename ConstT,
typename T>
357 fOpImpl(src0_sdwa, src1, vdst, wf);
360 T src0_dpp =
dppHelper(gpuDynInst, src1);
361 fOpImpl(src0_dpp, src1, vdst, wf);
370 const_src0.readSrc();
373 vdst[lane] = const_src0[lane];
375 fOpImpl(vdst, src1, vdst, wf);
428 bool sign_ext,
bool neg,
bool abs)
434 int low_bit = 0, high_bit = 0;
458 rv =
bits(dword, high_bit, low_bit);
460 uint32_t sign_bit = 1 << high_bit;
463 if (std::is_integral_v<T> && std::is_unsigned_v<T>) {
464 panic_if(neg,
"SWDAB negation operation on unsigned type!\n");
465 panic_if(sign_ext,
"SWDAB sign extend on unsigned type!\n");
470 if (std::is_integral_v<T>) {
472 if ((rv & sign_bit) && std::is_signed_v<T>) {
473 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
474 rv = std::abs(
static_cast<long long>(rv)) & 0xFFFFFFFF;
478 rv = rv &
mask(high_bit);
483 if (std::is_integral_v<T>) {
486 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
491 rv = rv ^
mask(high_bit);
496 if (std::is_integral_v<T>) {
498 rv =
sext(rv, high_bit + 1) & 0xFFFFFFFF;
504 panic(
"SDWAB sign extend set for non-integral type!\n");
515 DPRINTF(VEGA,
"Handling %s SRC SDWA. SRC0: register %s[%d], "
516 "sDst s[%d], sDst type %s, SRC0_SEL: %d, SRC0_SEXT: %d "
517 "SRC0_NEG: %d, SRC0_ABS: %d, SRC1: register %s[%d], "
518 "SRC1_SEL: %d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: "
562 if (gpuDynInst->wavefront()->execMask(lane)) {
563 T
a = sdwabSelect<T>(src0[lane], src0_sel,
567 T
b = sdwabSelect<T>(src1[lane], src1_sel,
622 if constexpr (std::is_floating_point_v<T>) {
623 if (omod == 1)
return val * T(2.0f);
624 if (omod == 2)
return val * T(4.0f);
625 if (omod == 3)
return val / T(2.0f);
627 assert(std::is_integral_v<T>);
628 if (omod == 1)
return val * T(2);
629 if (omod == 2)
return val * T(4);
630 if (omod == 3)
return val / T(2);
690 T (*fOpImpl)(T, T,
bool))
707 T upper_val = fOpImpl(word<T>(S0[lane], opHi, negHi, 0),
708 word<T>(S1[lane], opHi, negHi, 1),
710 T lower_val = fOpImpl(word<T>(S0[lane], opLo, negLo, 0),
711 word<T>(S1[lane], opLo, negLo, 1),
715 *
reinterpret_cast<uint16_t*
>(&upper_val);
717 *
reinterpret_cast<uint16_t*
>(&lower_val);
719 D[lane] = upper_raw << 16 | lower_raw;
728 T (*fOpImpl)(T, T, T,
bool))
747 T upper_val = fOpImpl(word<T>(S0[lane], opHi, negHi, 0),
748 word<T>(S1[lane], opHi, negHi, 1),
749 word<T>(S2[lane], opHi, negHi, 2),
751 T lower_val = fOpImpl(word<T>(S0[lane], opLo, negLo, 0),
752 word<T>(S1[lane], opLo, negLo, 1),
753 word<T>(S2[lane], opLo, negLo, 2),
757 *
reinterpret_cast<uint16_t*
>(&upper_val);
759 *
reinterpret_cast<uint16_t*
>(&lower_val);
761 D[lane] = upper_raw << 16 | lower_raw;
770 uint32_t (*fOpImpl)(uint32_t, uint32_t, uint32_t,
bool))
796 word<uint16_t>(S0[lane], opLo, negLo, 0);
798 word<uint16_t>(S0[lane], opHi, negHi, 0);
800 word<uint16_t>(S1[lane], opLo, negLo, 1);
802 word<uint16_t>(S1[lane], opHi, negHi, 1);
804 uint32_t dword1 = (dword1h << 16) | dword1l;
805 uint32_t dword2 = (dword2h << 16) | dword2l;
811 D[lane] = fOpImpl(dword1, dword2, S2[lane], clamp);
823 word(uint32_t
data,
int opSel,
int neg,
int opSelBit)
826 static_assert(
sizeof(T) == 2);
828 bool select =
bits(opSel, opSelBit, opSelBit);
829 uint16_t raw = select ?
bits(
data, 31, 16)
834 bool negate =
bits(neg, opSelBit, opSelBit);
839 return *
reinterpret_cast<T*
>(&raw);
823 word(uint32_t
data,
int opSel,
int neg,
int opSelBit) {
…}
883 if (gpuDynInst->exec_mask[lane]) {
886 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
899 if (gpuDynInst->exec_mask[lane]) {
901 for (
int i = 0;
i < N; ++
i) {
903 gpuDynInst->d_data))[lane * N +
i]
918 if (gpuDynInst->exec_mask[lane]) {
919 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
920 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
922 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
924 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
937 if (gpuDynInst->exec_mask[lane]) {
940 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
952 if (gpuDynInst->exec_mask[lane]) {
954 for (
int i = 0;
i < N; ++
i) {
958 gpuDynInst->d_data))[lane * N +
i]);
971 if (gpuDynInst->exec_mask[lane]) {
972 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
973 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
975 gpuDynInst->d_data))[lane * 2]);
977 gpuDynInst->d_data))[lane * 2 + 1]);
989 if (gpuDynInst->exec_mask[lane]) {
993 gpuDynInst->makeAtomicOpFunctor<T>(
994 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
995 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]);
997 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
1006 Wavefront *wf = gpuDynInst->wavefront();
1010 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
1033 template<
typename T>
1040 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1041 gpuDynInst->exec_mask &= ~oobMask;
1043 gpuDynInst->exec_mask = old_exec_mask;
1054 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1055 gpuDynInst->exec_mask &= ~oobMask;
1057 gpuDynInst->exec_mask = old_exec_mask;
1060 template<
typename T>
1067 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1068 gpuDynInst->exec_mask &= ~oobMask;
1070 gpuDynInst->exec_mask = old_exec_mask;
1080 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1081 gpuDynInst->exec_mask &= ~oobMask;
1083 gpuDynInst->exec_mask = old_exec_mask;
1086 template<
typename T>
1093 VectorMask old_exec_mask = gpuDynInst->exec_mask;
1094 gpuDynInst->exec_mask &= ~oobMask;
1096 gpuDynInst->exec_mask = old_exec_mask;
1103 gpuDynInst->resetEntireStatusVector();
1104 gpuDynInst->setStatusVector(0, 1);
1105 RequestPtr req = std::make_shared<Request>(0, 0, 0,
1106 gpuDynInst->computeUnit()->
1108 gpuDynInst->wfDynId);
1109 gpuDynInst->setRequestFlags(req);
1110 gpuDynInst->computeUnit()->
1134 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
1137 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
1144 Addr buffer_offset = 0;
1147 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
1156 if (gpuDynInst->exec_mask[lane]) {
1157 vaddr = base_addr + s_offset.rawData();
1163 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
1165 buf_off = v_off[lane] + inst_offset;
1170 Addr idx_msb = buf_idx / idx_stride;
1171 Addr idx_lsb = buf_idx % idx_stride;
1172 Addr off_msb = buf_off / elem_size;
1173 Addr off_lsb = buf_off % elem_size;
1174 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
1175 "idx_stride = %llx, elem_size = %llx, "
1176 "idx_msb = %llx, idx_lsb = %llx, "
1177 "off_msb = %llx, off_lsb = %llx\n",
1178 lane, idx_stride, elem_size, idx_msb, idx_lsb,
1181 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
1182 * idx_stride + idx_lsb * elem_size + off_lsb;
1184 buffer_offset = buf_off +
stride * buf_idx;
1196 if (buffer_offset >=
1198 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
1199 "lane = %d, buffer_offset = %llx, "
1200 "const_stride = %llx, "
1201 "const_num_records = %llx\n",
1202 lane, buf_off +
stride * buf_idx,
1212 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
1213 "lane = %d, offset = %llx, "
1215 "const_num_records = %llx\n",
1216 lane, buf_off, buf_idx,
1223 vaddr += buffer_offset;
1225 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
1226 "vaddr = %llx, base_addr = %llx, "
1227 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
1230 gpuDynInst->addr.at(lane) =
vaddr;
1308 template<
typename T>
1312 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1314 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1317 static_assert(
sizeof(T) <= 4);
1319 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1320 Wavefront *wf = gpuDynInst->wavefront();
1322 if (gpuDynInst->exec_mask[lane]) {
1324 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
1335 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1337 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1339 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1340 Wavefront *wf = gpuDynInst->wavefront();
1342 if (gpuDynInst->exec_mask[lane]) {
1344 for (
int i = 0;
i < N; ++
i) {
1346 gpuDynInst->d_data))[lane * N +
i]
1355 template<
typename T>
1359 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1361 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1364 static_assert(
sizeof(T) <= 4);
1366 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1367 Wavefront *wf = gpuDynInst->wavefront();
1369 if (gpuDynInst->exec_mask[lane]) {
1372 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
1382 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1384 }
else if (gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1385 swizzleData<N>(gpuDynInst);
1387 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1388 Wavefront *wf = gpuDynInst->wavefront();
1390 if (gpuDynInst->exec_mask[lane]) {
1392 for (
int i = 0;
i < N; ++
i) {
1396 gpuDynInst->d_data))[lane * N +
i]);
1403 template<
typename T>
1409 assert(gpuDynInst->executedAs() != enums::SC_PRIVATE);
1411 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1413 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1414 Wavefront *wf = gpuDynInst->wavefront();
1416 if (gpuDynInst->exec_mask[lane]) {
1419 gpuDynInst->makeAtomicOpFunctor<T>(
1420 &(
reinterpret_cast<T*
>(
1421 gpuDynInst->a_data))[lane],
1422 &(
reinterpret_cast<T*
>(
1423 gpuDynInst->x_data))[lane]);
1426 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
1428 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
1468 }
else if (saddr != 0x7f) {
1482 auto staticInst = gpuDynInst->staticInstruction();
1483 if (gpuDynInst->isLoad()) {
1484 elemSize = staticInst->getOperandSize(2);
1486 assert(gpuDynInst->isStore());
1487 elemSize = staticInst->getOperandSize(1);
1492 if (gpuDynInst->exec_mask[lane]) {
1493 swizzleOffset +=
instData.
SVE ? voffset[lane] : 0;
1494 gpuDynInst->addr.at(lane) = flat_scratch_addr
1509 auto staticInst = gpuDynInst->staticInstruction();
1510 if (gpuDynInst->isLoad()) {
1511 elemSize = staticInst->getOperandSize(2);
1513 assert(gpuDynInst->isStore());
1514 elemSize = staticInst->getOperandSize(1);
1518 if (gpuDynInst->exec_mask[lane]) {
1522 gpuDynInst->addr.at(lane) = flat_scratch_addr
1529 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1531 gpuDynInst->staticInstruction()->executed_as =
1535 gpuDynInst->staticInstruction()->executed_as =
1537 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1544 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL &&
isFlat())
1546 gpuDynInst->computeUnit()->globalMemoryPipe
1547 .issueRequest(gpuDynInst);
1548 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1550 gpuDynInst->computeUnit()->localMemoryPipe
1551 .issueRequest(gpuDynInst);
1553 assert(gpuDynInst->executedAs() == enums::SC_PRIVATE);
1554 gpuDynInst->computeUnit()->globalMemoryPipe
1555 .issueRequest(gpuDynInst);
1565 template<
typename RegT,
typename LaneT,
int CmpRegOffset = 0>
1569 Wavefront *wf = gpuDynInst->wavefront();
1571 if (gpuDynInst->exec_mask.none()) {
1580 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1581 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1584 RegT cmp(gpuDynInst,
extData.
DATA + CmpRegOffset);
1587 if constexpr (CmpRegOffset) {
1594 if (gpuDynInst->exec_mask[lane]) {
1595 if constexpr (CmpRegOffset) {
1597 gpuDynInst->x_data))[lane] =
data[lane];
1599 gpuDynInst->a_data))[lane] = cmp[lane];
1601 (
reinterpret_cast<LaneT*
>(gpuDynInst->a_data))[lane]
1612 template<
typename RegT,
typename LaneT>
1620 if (gpuDynInst->exec_mask[lane]) {
1621 vdst[lane] = (
reinterpret_cast<LaneT*
>(
1622 gpuDynInst->d_data))[lane];
1639 static_assert(N > 1);
1643 for (
int dword = 0; dword < N; ++dword) {
1646 gpuDynInst->d_data))[lane * N + dword];
1650 for (
int dword = 0; dword < N; ++dword) {
1652 gpuDynInst->d_data))[lane * N + dword] =
1653 data[lane * N + dword];
1684 if (gpuDynInst->exec_mask[lane]) {
1686 gpuDynInst->addr.at(lane) =
1697 if (gpuDynInst->exec_mask[lane]) {
1698 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
1710 return ((
offset / 4) * 4 * 64)
1711 + (
offset % 4) + (lane * elem_size);
1717 return gpuDynInst->computeUnit()->shader->getScratchBase();
const std::string & opcode() const
bool isFlatGlobal() const
const std::string _opcode
bool isFlatScratch() const
void write(const uint32_t index, const T value)
a write operation
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
T read(const uint32_t index)
a read operation
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initOperandInfo() override
int instSize() const override
void generateFlatDisassembly()
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
void initFlatOperandInfo()
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void swizzleData(GPUDynInstPtr gpuDynInst)
void initMemRead(GPUDynInstPtr gpuDynInst)
Addr readFlatScratch(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
VecElemU32 swizzleAddr(VecElemU32 offset, int lane, int elem_size)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
void generateGlobalScratchDisassembly()
void initOperandInfo() override
void initGlobalScratchOperandInfo()
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
int instSize() const override
int instSize() const override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
T dppHelper(GPUDynInstPtr gpuDynInst, T &src1)
void initOperandInfo() override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
int instSize() const override
T sdwaSrcHelper(GPUDynInstPtr gpuDynInst, T &src1)
void sdwaDstHelper(GPUDynInstPtr gpuDynInst, T &vdst)
bool hasSecondDword(InFmt_VOP2 *)
void generateDisassembly() override
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
T omodModifier(T val, unsigned omod)
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOP3P_MAI *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
InFmt_VOP3P_MAI_1 extData
void dotHelper(GPUDynInstPtr gpuDynInst, uint32_t(*fOpImpl)(uint32_t, uint32_t, uint32_t, bool))
void initOperandInfo() override
T word(uint32_t data, int opSel, int neg, int opSelBit)
void generateDisassembly() override
int instSize() const override
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, T, bool))
bool hasSecondDword(InFmt_VOP3P *)
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, bool))
bool hasSecondDword(InFmt_VOPC *)
uint32_t sdwabSelect(uint32_t dword, const SDWASelVals sel, bool sign_ext, bool neg, bool abs)
void generateDisassembly() override
void sdwabHelper(GPUDynInstPtr gpuDynInst, int(*cmpFunc)(T, T))
void initOperandInfo() override
int instSize() const override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
void decVMemInstsIssued()
void decLGKMInstsIssued()
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
#define panic(...)
This implements a cprintf based panic() function.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
Bitfield< 21, 20 > stride
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask