32 #ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33 #define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
39 #include "debug/GPUExec.hh"
40 #include "debug/VEGA.hh"
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
218 gpu_dyn_inst->scalarAddr =
vaddr;
232 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
248 gpu_dyn_inst->scalarAddr =
vaddr;
408 if (gpuDynInst->exec_mask[lane]) {
411 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
424 if (gpuDynInst->exec_mask[lane]) {
426 for (
int i = 0;
i < N; ++
i) {
428 gpuDynInst->d_data))[lane * N +
i]
443 if (gpuDynInst->exec_mask[lane]) {
444 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
447 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
449 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
462 if (gpuDynInst->exec_mask[lane]) {
465 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
477 if (gpuDynInst->exec_mask[lane]) {
479 for (
int i = 0;
i < N; ++
i) {
483 gpuDynInst->d_data))[lane * N +
i]);
496 if (gpuDynInst->exec_mask[lane]) {
497 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
500 gpuDynInst->d_data))[lane * 2]);
502 gpuDynInst->d_data))[lane * 2 + 1]);
514 if (gpuDynInst->exec_mask[lane]) {
518 gpuDynInst->makeAtomicOpFunctor<T>(
519 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
520 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]);
522 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
535 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
565 VectorMask old_exec_mask = gpuDynInst->exec_mask;
566 gpuDynInst->exec_mask &= ~
oobMask;
568 gpuDynInst->exec_mask = old_exec_mask;
579 VectorMask old_exec_mask = gpuDynInst->exec_mask;
580 gpuDynInst->exec_mask &= ~
oobMask;
582 gpuDynInst->exec_mask = old_exec_mask;
592 VectorMask old_exec_mask = gpuDynInst->exec_mask;
593 gpuDynInst->exec_mask &= ~
oobMask;
595 gpuDynInst->exec_mask = old_exec_mask;
605 VectorMask old_exec_mask = gpuDynInst->exec_mask;
606 gpuDynInst->exec_mask &= ~
oobMask;
608 gpuDynInst->exec_mask = old_exec_mask;
615 gpuDynInst->resetEntireStatusVector();
616 gpuDynInst->setStatusVector(0, 1);
617 RequestPtr req = std::make_shared<Request>(0, 0, 0,
618 gpuDynInst->computeUnit()->
620 gpuDynInst->wfDynId);
621 gpuDynInst->setRequestFlags(req);
622 gpuDynInst->computeUnit()->
646 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
649 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
656 Addr buffer_offset = 0;
659 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
668 if (gpuDynInst->exec_mask[lane]) {
669 vaddr = base_addr + s_offset.rawData();
675 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
677 buf_off = v_off[lane] + inst_offset;
682 Addr idx_msb = buf_idx / idx_stride;
683 Addr idx_lsb = buf_idx % idx_stride;
684 Addr off_msb = buf_off / elem_size;
685 Addr off_lsb = buf_off % elem_size;
686 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
687 "idx_stride = %llx, elem_size = %llx, "
688 "idx_msb = %llx, idx_lsb = %llx, "
689 "off_msb = %llx, off_lsb = %llx\n",
690 lane, idx_stride, elem_size, idx_msb, idx_lsb,
693 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
694 * idx_stride + idx_lsb * elem_size + off_lsb;
696 buffer_offset = buf_off +
stride * buf_idx;
710 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
711 "lane = %d, buffer_offset = %llx, "
712 "const_stride = %llx, "
713 "const_num_records = %llx\n",
714 lane, buf_off +
stride * buf_idx,
724 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
725 "lane = %d, offset = %llx, "
727 "const_num_records = %llx\n",
728 lane, buf_off, buf_idx,
735 vaddr += buffer_offset;
737 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
738 "vaddr = %llx, base_addr = %llx, "
739 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
742 gpuDynInst->addr.at(lane) =
vaddr;
824 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
826 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
829 if (gpuDynInst->exec_mask[lane]) {
831 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
842 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
844 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
847 if (gpuDynInst->exec_mask[lane]) {
849 for (
int i = 0;
i < N; ++
i) {
851 gpuDynInst->d_data))[lane * N +
i]
864 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
866 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
869 if (gpuDynInst->exec_mask[lane]) {
872 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
882 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
884 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
887 if (gpuDynInst->exec_mask[lane]) {
889 for (
int i = 0;
i < N; ++
i) {
893 gpuDynInst->d_data))[lane * N +
i]);
904 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
906 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
909 if (gpuDynInst->exec_mask[lane]) {
912 gpuDynInst->makeAtomicOpFunctor<T>(
913 &(
reinterpret_cast<T*
>(
914 gpuDynInst->a_data))[lane],
915 &(
reinterpret_cast<T*
>(
916 gpuDynInst->x_data))[lane]);
919 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
921 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
954 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
956 gpuDynInst->staticInstruction()->executed_as =
964 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL &&
isFlat())
966 gpuDynInst->computeUnit()->globalMemoryPipe
967 .issueRequest(gpuDynInst);
968 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
970 gpuDynInst->computeUnit()->localMemoryPipe
971 .issueRequest(gpuDynInst);
973 fatal(
"Unsupported scope for flat instruction.\n");
997 if (gpuDynInst->exec_mask[lane]) {
998 gpuDynInst->addr.at(lane) =
1009 if (gpuDynInst->exec_mask[lane]) {
1010 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
const std::string & opcode() const
bool isFlatGlobal() const
void write(const uint32_t index, const T value)
a write operation
T atomic(const uint32_t index, AtomicOpFunctorPtr amoOp)
an atomic operation
T read(const uint32_t index)
a read operation
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
Inst_DS(InFmt_DS *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void generateDisassembly() override
int instSize() const override
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
void generateFlatDisassembly()
void initFlatOperandInfo()
void calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initGlobalOperandInfo()
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddrVgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr, ScalarRegI32 offset)
void initOperandInfo() override
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void generateGlobalDisassembly()
int instSize() const override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void initOperandInfo() override
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void generateDisassembly() override
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
int instSize() const override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP1 *)
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
void generateDisassembly() override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP1 *)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_VOP2 *)
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
Inst_VOP3A(InFmt_VOP3A *, const std::string &opcode, bool sgpr_dst)
void generateDisassembly() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP3A *)
Inst_VOP3B(InFmt_VOP3B *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3B *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
bool hasSecondDword(InFmt_VOPC *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
#define fatal(...)
This implements a cprintf based fatal() function.
Bitfield< 21, 20 > stride
const int NumVecElemPerVecReg(64)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask