32#ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
39#include "debug/GPUExec.hh"
40#include "debug/VEGA.hh"
218 gpu_dyn_inst->scalarAddr =
vaddr;
232 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
248 gpu_dyn_inst->scalarAddr =
vaddr;
284 origSrc0_sdwa.read();
287 DPRINTF(VEGA,
"Handling %s SRC SDWA. SRC0: register v[%d], "
288 "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, SRC0_SEXT: "
289 "%d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, SRC1_SEXT: %d, "
290 "SRC1_NEG: %d, SRC1_ABS: %d\n",
315 origVdst[lane] = vdst[lane];
328 DPRINTF(VEGA,
"Handling %s SRC DPP. SRC0: register v[%d], "
329 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, SRC1_ABS: %d, "
330 "SRC1_NEG: %d, BC: %d, BANK_MASK: %d, ROW_MASK: %d\n",
342 template<
typename ConstT,
typename T>
356 fOpImpl(src0_sdwa, src1, vdst, wf);
359 T src0_dpp =
dppHelper(gpuDynInst, src1);
360 fOpImpl(src0_dpp, src1, vdst, wf);
369 const_src0.readSrc();
372 vdst[lane] = const_src0[lane];
374 fOpImpl(vdst, src1, vdst, wf);
468 if constexpr (std::is_floating_point_v<T>) {
469 if (omod == 1)
return val * T(2.0f);
470 if (omod == 2)
return val * T(4.0f);
471 if (omod == 3)
return val / T(2.0f);
473 assert(std::is_integral_v<T>);
474 if (omod == 1)
return val * T(2);
475 if (omod == 2)
return val * T(4);
476 if (omod == 3)
return val / T(2);
536 T (*fOpImpl)(T, T,
bool))
553 T upper_val = fOpImpl(
word<T>(S0[lane], opHi, negHi, 0),
554 word<T>(S1[lane], opHi, negHi, 1),
556 T lower_val = fOpImpl(
word<T>(S0[lane], opLo, negLo, 0),
557 word<T>(S1[lane], opLo, negLo, 1),
561 *
reinterpret_cast<uint16_t*
>(&upper_val);
563 *
reinterpret_cast<uint16_t*
>(&lower_val);
565 D[lane] = upper_raw << 16 | lower_raw;
574 T (*fOpImpl)(T, T, T,
bool))
593 T upper_val = fOpImpl(
word<T>(S0[lane], opHi, negHi, 0),
594 word<T>(S1[lane], opHi, negHi, 1),
595 word<T>(S2[lane], opHi, negHi, 2),
597 T lower_val = fOpImpl(
word<T>(S0[lane], opLo, negLo, 0),
598 word<T>(S1[lane], opLo, negLo, 1),
599 word<T>(S2[lane], opLo, negLo, 2),
603 *
reinterpret_cast<uint16_t*
>(&upper_val);
605 *
reinterpret_cast<uint16_t*
>(&lower_val);
607 D[lane] = upper_raw << 16 | lower_raw;
616 uint32_t (*fOpImpl)(uint32_t, uint32_t, uint32_t,
bool))
650 uint32_t dword1 = (dword1h << 16) | dword1l;
651 uint32_t dword2 = (dword2h << 16) | dword2l;
657 D[lane] = fOpImpl(dword1, dword2, S2[lane], clamp);
669 word(uint32_t
data,
int opSel,
int neg,
int opSelBit)
672 static_assert(
sizeof(T) == 2);
674 bool select =
bits(opSel, opSelBit, opSelBit);
675 uint16_t raw = select ?
bits(
data, 31, 16)
680 bool negate =
bits(neg, opSelBit, opSelBit);
685 return *
reinterpret_cast<T*
>(&raw);
729 if (gpuDynInst->exec_mask[lane]) {
732 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
745 if (gpuDynInst->exec_mask[lane]) {
747 for (
int i = 0;
i < N; ++
i) {
749 gpuDynInst->d_data))[lane * N +
i]
764 if (gpuDynInst->exec_mask[lane]) {
765 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
766 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
768 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
770 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
783 if (gpuDynInst->exec_mask[lane]) {
786 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
798 if (gpuDynInst->exec_mask[lane]) {
800 for (
int i = 0;
i < N; ++
i) {
804 gpuDynInst->d_data))[lane * N +
i]);
817 if (gpuDynInst->exec_mask[lane]) {
818 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
819 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
821 gpuDynInst->d_data))[lane * 2]);
823 gpuDynInst->d_data))[lane * 2 + 1]);
835 if (gpuDynInst->exec_mask[lane]) {
839 gpuDynInst->makeAtomicOpFunctor<T>(
840 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
841 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]);
843 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
856 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
886 VectorMask old_exec_mask = gpuDynInst->exec_mask;
887 gpuDynInst->exec_mask &= ~oobMask;
889 gpuDynInst->exec_mask = old_exec_mask;
900 VectorMask old_exec_mask = gpuDynInst->exec_mask;
901 gpuDynInst->exec_mask &= ~oobMask;
903 gpuDynInst->exec_mask = old_exec_mask;
913 VectorMask old_exec_mask = gpuDynInst->exec_mask;
914 gpuDynInst->exec_mask &= ~oobMask;
916 gpuDynInst->exec_mask = old_exec_mask;
926 VectorMask old_exec_mask = gpuDynInst->exec_mask;
927 gpuDynInst->exec_mask &= ~oobMask;
929 gpuDynInst->exec_mask = old_exec_mask;
939 VectorMask old_exec_mask = gpuDynInst->exec_mask;
940 gpuDynInst->exec_mask &= ~oobMask;
942 gpuDynInst->exec_mask = old_exec_mask;
949 gpuDynInst->resetEntireStatusVector();
950 gpuDynInst->setStatusVector(0, 1);
951 RequestPtr req = std::make_shared<Request>(0, 0, 0,
952 gpuDynInst->computeUnit()->
954 gpuDynInst->wfDynId);
955 gpuDynInst->setRequestFlags(req);
956 gpuDynInst->computeUnit()->
980 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
983 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
990 Addr buffer_offset = 0;
993 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
1002 if (gpuDynInst->exec_mask[lane]) {
1003 vaddr = base_addr + s_offset.rawData();
1009 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
1011 buf_off = v_off[lane] + inst_offset;
1016 Addr idx_msb = buf_idx / idx_stride;
1017 Addr idx_lsb = buf_idx % idx_stride;
1018 Addr off_msb = buf_off / elem_size;
1019 Addr off_lsb = buf_off % elem_size;
1020 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
1021 "idx_stride = %llx, elem_size = %llx, "
1022 "idx_msb = %llx, idx_lsb = %llx, "
1023 "off_msb = %llx, off_lsb = %llx\n",
1024 lane, idx_stride, elem_size, idx_msb, idx_lsb,
1027 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
1028 * idx_stride + idx_lsb * elem_size + off_lsb;
1030 buffer_offset = buf_off +
stride * buf_idx;
1042 if (buffer_offset >=
1044 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
1045 "lane = %d, buffer_offset = %llx, "
1046 "const_stride = %llx, "
1047 "const_num_records = %llx\n",
1048 lane, buf_off +
stride * buf_idx,
1058 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
1059 "lane = %d, offset = %llx, "
1061 "const_num_records = %llx\n",
1062 lane, buf_off, buf_idx,
1069 vaddr += buffer_offset;
1071 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
1072 "vaddr = %llx, base_addr = %llx, "
1073 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
1076 gpuDynInst->addr.at(lane) =
vaddr;
1154 template<
typename T>
1158 if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
1159 gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1161 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1162 Wavefront *wf = gpuDynInst->wavefront();
1164 if (gpuDynInst->exec_mask[lane]) {
1166 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
1177 if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
1178 gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1180 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1181 Wavefront *wf = gpuDynInst->wavefront();
1183 if (gpuDynInst->exec_mask[lane]) {
1185 for (
int i = 0;
i < N; ++
i) {
1187 gpuDynInst->d_data))[lane * N +
i]
1196 template<
typename T>
1200 if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
1201 gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1203 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1204 Wavefront *wf = gpuDynInst->wavefront();
1206 if (gpuDynInst->exec_mask[lane]) {
1209 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
1219 if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
1220 gpuDynInst->executedAs() == enums::SC_PRIVATE) {
1222 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1223 Wavefront *wf = gpuDynInst->wavefront();
1225 if (gpuDynInst->exec_mask[lane]) {
1227 for (
int i = 0;
i < N; ++
i) {
1231 gpuDynInst->d_data))[lane * N +
i]);
1238 template<
typename T>
1244 assert(gpuDynInst->executedAs() != enums::SC_PRIVATE);
1246 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
1248 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1249 Wavefront *wf = gpuDynInst->wavefront();
1251 if (gpuDynInst->exec_mask[lane]) {
1254 gpuDynInst->makeAtomicOpFunctor<T>(
1255 &(
reinterpret_cast<T*
>(
1256 gpuDynInst->a_data))[lane],
1257 &(
reinterpret_cast<T*
>(
1258 gpuDynInst->x_data))[lane]);
1261 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
1263 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
1303 }
else if (saddr != 0x7f) {
1317 auto staticInst = gpuDynInst->staticInstruction();
1318 if (gpuDynInst->isLoad()) {
1319 elemSize = staticInst->getOperandSize(2);
1321 assert(gpuDynInst->isStore());
1322 elemSize = staticInst->getOperandSize(1);
1327 if (gpuDynInst->exec_mask[lane]) {
1328 swizzleOffset +=
instData.
SVE ? voffset[lane] : 0;
1329 gpuDynInst->addr.at(lane) = flat_scratch_addr
1330 +
swizzle(swizzleOffset, lane, elemSize);
1344 auto staticInst = gpuDynInst->staticInstruction();
1345 if (gpuDynInst->isLoad()) {
1346 elemSize = staticInst->getOperandSize(2);
1348 assert(gpuDynInst->isStore());
1349 elemSize = staticInst->getOperandSize(1);
1353 if (gpuDynInst->exec_mask[lane]) {
1357 gpuDynInst->addr.at(lane) = flat_scratch_addr
1364 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1366 gpuDynInst->staticInstruction()->executed_as =
1370 gpuDynInst->staticInstruction()->executed_as =
1372 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
1379 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL &&
isFlat())
1381 gpuDynInst->computeUnit()->globalMemoryPipe
1382 .issueRequest(gpuDynInst);
1383 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
1385 gpuDynInst->computeUnit()->localMemoryPipe
1386 .issueRequest(gpuDynInst);
1388 assert(gpuDynInst->executedAs() == enums::SC_PRIVATE);
1389 gpuDynInst->computeUnit()->globalMemoryPipe
1390 .issueRequest(gpuDynInst);
1400 template<
typename RegT,
typename LaneT,
int CmpRegOffset = 0>
1404 Wavefront *wf = gpuDynInst->wavefront();
1406 if (gpuDynInst->exec_mask.none()) {
1415 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1416 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1419 RegT cmp(gpuDynInst,
extData.
DATA + CmpRegOffset);
1422 if constexpr (CmpRegOffset) {
1429 if (gpuDynInst->exec_mask[lane]) {
1430 if constexpr (CmpRegOffset) {
1432 gpuDynInst->x_data))[lane] =
data[lane];
1434 gpuDynInst->a_data))[lane] = cmp[lane];
1436 (
reinterpret_cast<LaneT*
>(gpuDynInst->a_data))[lane]
1447 template<
typename RegT,
typename LaneT>
1455 if (gpuDynInst->exec_mask[lane]) {
1456 vdst[lane] = (
reinterpret_cast<LaneT*
>(
1457 gpuDynInst->d_data))[lane];
1491 if (gpuDynInst->exec_mask[lane]) {
1493 gpuDynInst->addr.at(lane) =
1504 if (gpuDynInst->exec_mask[lane]) {
1505 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
1517 return ((
offset / 4) * 4 * 64)
1518 + (
offset % 4) + (lane * elem_size);
1524 return gpuDynInst->computeUnit()->shader->getScratchBase();
const std::string & opcode() const
bool isFlatGlobal() const
bool isFlatScratch() const
void write(const uint32_t index, const T value)
a write operation
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
T read(const uint32_t index)
a read operation
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
Inst_DS(InFmt_DS *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
void generateFlatDisassembly()
void atomicComplete(GPUDynInstPtr gpuDynInst)
VecElemU32 swizzle(VecElemU32 offset, int lane, int elem_size)
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
void initFlatOperandInfo()
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
Addr readFlatScratch(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
void generateGlobalScratchDisassembly()
void initOperandInfo() override
void initGlobalScratchOperandInfo()
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
int instSize() const override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
T dppHelper(GPUDynInstPtr gpuDynInst, T &src1)
void initOperandInfo() override
void vop2Helper(GPUDynInstPtr gpuDynInst, void(*fOpImpl)(T &, T &, T &, Wavefront *))
int instSize() const override
T sdwaSrcHelper(GPUDynInstPtr gpuDynInst, T &src1)
void sdwaDstHelper(GPUDynInstPtr gpuDynInst, T &vdst)
bool hasSecondDword(InFmt_VOP2 *)
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
Inst_VOP3A(InFmt_VOP3A *, const std::string &opcode, bool sgpr_dst)
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
T omodModifier(T val, unsigned omod)
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
Inst_VOP3B(InFmt_VOP3B *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOP3P_MAI *)
void generateDisassembly() override
void initOperandInfo() override
Inst_VOP3P_MAI(InFmt_VOP3P_MAI *, const std::string &opcode)
int instSize() const override
InFmt_VOP3P_MAI_1 extData
void dotHelper(GPUDynInstPtr gpuDynInst, uint32_t(*fOpImpl)(uint32_t, uint32_t, uint32_t, bool))
void initOperandInfo() override
T word(uint32_t data, int opSel, int neg, int opSelBit)
void generateDisassembly() override
int instSize() const override
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, T, bool))
bool hasSecondDword(InFmt_VOP3P *)
void vop3pHelper(GPUDynInstPtr gpuDynInst, T(*fOpImpl)(T, T, bool))
Inst_VOP3P(InFmt_VOP3P *, const std::string &opcode)
bool hasSecondDword(InFmt_VOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void read() override
read from the vrf.
void write() override
write to the vrf.
void decVMemInstsIssued()
void decLGKMInstsIssued()
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
constexpr unsigned NumVecElemPerVecReg
Bitfield< 21, 20 > stride
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.