32 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
33 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
39 #include "debug/GCN3.hh"
40 #include "debug/GPUExec.hh"
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
218 gpu_dyn_inst->scalarAddr =
vaddr;
232 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
248 gpu_dyn_inst->scalarAddr =
vaddr;
408 if (gpuDynInst->exec_mask[lane]) {
411 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
424 if (gpuDynInst->exec_mask[lane]) {
426 for (
int i = 0;
i < N; ++
i) {
428 gpuDynInst->d_data))[lane * N +
i]
443 if (gpuDynInst->exec_mask[lane]) {
444 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
447 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
449 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
462 if (gpuDynInst->exec_mask[lane]) {
465 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
477 if (gpuDynInst->exec_mask[lane]) {
479 for (
int i = 0;
i < N; ++
i) {
483 gpuDynInst->d_data))[lane * N +
i]);
496 if (gpuDynInst->exec_mask[lane]) {
497 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
500 gpuDynInst->d_data))[lane * 2]);
502 gpuDynInst->d_data))[lane * 2 + 1]);
514 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
544 VectorMask old_exec_mask = gpuDynInst->exec_mask;
545 gpuDynInst->exec_mask &= ~
oobMask;
547 gpuDynInst->exec_mask = old_exec_mask;
558 VectorMask old_exec_mask = gpuDynInst->exec_mask;
559 gpuDynInst->exec_mask &= ~
oobMask;
561 gpuDynInst->exec_mask = old_exec_mask;
571 VectorMask old_exec_mask = gpuDynInst->exec_mask;
572 gpuDynInst->exec_mask &= ~
oobMask;
574 gpuDynInst->exec_mask = old_exec_mask;
584 VectorMask old_exec_mask = gpuDynInst->exec_mask;
585 gpuDynInst->exec_mask &= ~
oobMask;
587 gpuDynInst->exec_mask = old_exec_mask;
594 gpuDynInst->resetEntireStatusVector();
595 gpuDynInst->setStatusVector(0, 1);
596 RequestPtr req = std::make_shared<Request>(0, 0, 0,
597 gpuDynInst->computeUnit()->
599 gpuDynInst->wfDynId);
600 gpuDynInst->setRequestFlags(req);
601 gpuDynInst->computeUnit()->
625 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
628 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
635 Addr buffer_offset = 0;
638 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
647 if (gpuDynInst->exec_mask[lane]) {
648 vaddr = base_addr + s_offset.rawData();
654 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
656 buf_off = v_off[lane] + inst_offset;
661 Addr idx_msb = buf_idx / idx_stride;
662 Addr idx_lsb = buf_idx % idx_stride;
663 Addr off_msb = buf_off / elem_size;
664 Addr off_lsb = buf_off % elem_size;
665 DPRINTF(GCN3,
"mubuf swizzled lane %d: "
666 "idx_stride = %llx, elem_size = %llx, "
667 "idx_msb = %llx, idx_lsb = %llx, "
668 "off_msb = %llx, off_lsb = %llx\n",
669 lane, idx_stride, elem_size, idx_msb, idx_lsb,
672 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
673 * idx_stride + idx_lsb * elem_size + off_lsb;
675 buffer_offset = buf_off +
stride * buf_idx;
689 DPRINTF(GCN3,
"mubuf out-of-bounds condition 1: "
690 "lane = %d, buffer_offset = %llx, "
691 "const_stride = %llx, "
692 "const_num_records = %llx\n",
693 lane, buf_off +
stride * buf_idx,
703 DPRINTF(GCN3,
"mubuf out-of-bounds condition 2: "
704 "lane = %d, offset = %llx, "
706 "const_num_records = %llx\n",
707 lane, buf_off, buf_idx,
714 vaddr += buffer_offset;
716 DPRINTF(GCN3,
"Calculating mubuf address for lane %d: "
717 "vaddr = %llx, base_addr = %llx, "
718 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
721 gpuDynInst->addr.at(lane) =
vaddr;
803 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
805 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
808 if (gpuDynInst->exec_mask[lane]) {
810 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
821 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
823 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
826 if (gpuDynInst->exec_mask[lane]) {
828 for (
int i = 0;
i < N; ++
i) {
830 gpuDynInst->d_data))[lane * N +
i]
843 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
845 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
848 if (gpuDynInst->exec_mask[lane]) {
851 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
861 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
863 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
866 if (gpuDynInst->exec_mask[lane]) {
868 for (
int i = 0;
i < N; ++
i) {
872 gpuDynInst->d_data))[lane * N +
i]);
883 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
885 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
888 if (gpuDynInst->exec_mask[lane]) {
891 gpuDynInst->makeAtomicOpFunctor<T>(
892 &(
reinterpret_cast<T*
>(
893 gpuDynInst->a_data))[lane],
894 &(
reinterpret_cast<T*
>(
895 gpuDynInst->x_data))[lane]);
898 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
900 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
910 if (gpuDynInst->exec_mask[lane]) {
911 gpuDynInst->addr.at(lane) =
addr[lane];
914 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
const std::string & opcode() const
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void generateDisassembly() override
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_DS(InFmt_DS *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
int instSize() const override
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
Inst_EXP(InFmt_EXP *, const std::string &opcode)
int instSize() const override
void initOperandInfo() override
void initMemRead(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void initOperandInfo() override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initOperandInfo() override
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
int instSize() const override
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
void initOperandInfo() override
int instSize() const override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_SOP1 *)
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
int instSize() const override
void initOperandInfo() override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
void generateDisassembly() override
int instSize() const override
void generateDisassembly() override
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
int instSize() const override
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
void generateDisassembly() override
int instSize() const override
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP1 *)
int instSize() const override
void initOperandInfo() override
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP2 *)
void initOperandInfo() override
int instSize() const override
InFmt_VOP3_SDST_ENC instData
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
void initOperandInfo() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
bool hasSecondDword(InFmt_VOP3 *)
void generateDisassembly() override
Inst_VOP3(InFmt_VOP3 *, const std::string &opcode, bool sgpr_dst)
bool hasSecondDword(InFmt_VOPC *)
void initOperandInfo() override
void generateDisassembly() override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
T read(const uint32_t index)
a read operation
Bitfield< 21, 20 > stride
const int NumVecElemPerVecReg(64)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask