Go to the documentation of this file.
36 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
37 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
43 #include "debug/GCN3.hh"
44 #include "debug/GPUExec.hh"
208 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
219 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
231 gpu_dyn_inst->scalarAddr =
vaddr;
245 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
253 if (!rsrc_desc.stride &&
offset >= rsrc_desc.numRecords) {
254 clamped_offset = rsrc_desc.numRecords;
255 }
else if (rsrc_desc.stride &&
offset
256 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
257 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
260 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
261 gpu_dyn_inst->scalarAddr =
vaddr;
433 if (gpuDynInst->exec_mask[lane]) {
436 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
449 if (gpuDynInst->exec_mask[lane]) {
450 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
451 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
453 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
455 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
468 if (gpuDynInst->exec_mask[lane]) {
471 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
483 if (gpuDynInst->exec_mask[lane]) {
484 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
485 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
487 gpuDynInst->d_data))[lane * 2]);
489 gpuDynInst->d_data))[lane * 2 + 1]);
501 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
533 VectorMask old_exec_mask = gpuDynInst->exec_mask;
534 gpuDynInst->exec_mask &= ~
oobMask;
536 gpuDynInst->exec_mask = old_exec_mask;
547 VectorMask old_exec_mask = gpuDynInst->exec_mask;
548 gpuDynInst->exec_mask &= ~
oobMask;
550 gpuDynInst->exec_mask = old_exec_mask;
560 VectorMask old_exec_mask = gpuDynInst->exec_mask;
561 gpuDynInst->exec_mask &= ~
oobMask;
563 gpuDynInst->exec_mask = old_exec_mask;
573 VectorMask old_exec_mask = gpuDynInst->exec_mask;
574 gpuDynInst->exec_mask &= ~
oobMask;
576 gpuDynInst->exec_mask = old_exec_mask;
583 gpuDynInst->resetEntireStatusVector();
584 gpuDynInst->setStatusVector(0, 1);
585 RequestPtr req = std::make_shared<Request>(0, 0, 0,
586 gpuDynInst->computeUnit()->
588 gpuDynInst->wfDynId);
589 gpuDynInst->setRequestFlags(req);
590 gpuDynInst->computeUnit()->
614 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
617 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
626 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
629 base_addr = rsrc_desc.baseAddr;
631 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
632 + rsrc_desc.stride) : rsrc_desc.stride;
635 if (gpuDynInst->exec_mask[lane]) {
636 vaddr = base_addr + s_offset.rawData();
642 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
644 buf_off = v_off[lane] + inst_offset;
654 if (
stride == 0 || !rsrc_desc.swizzleEn) {
655 if (buf_off +
stride * buf_idx >=
656 rsrc_desc.numRecords - s_offset.rawData()) {
657 DPRINTF(GCN3,
"mubuf out-of-bounds condition 1: "
658 "lane = %d, buffer_offset = %llx, "
659 "const_stride = %llx, "
660 "const_num_records = %llx\n",
661 lane, buf_off +
stride * buf_idx,
662 stride, rsrc_desc.numRecords);
668 if (
stride != 0 && rsrc_desc.swizzleEn) {
669 if (buf_idx >= rsrc_desc.numRecords ||
671 DPRINTF(GCN3,
"mubuf out-of-bounds condition 2: "
672 "lane = %d, offset = %llx, "
674 "const_num_records = %llx\n",
675 lane, buf_off, buf_idx,
676 rsrc_desc.numRecords);
682 if (rsrc_desc.swizzleEn) {
683 Addr idx_stride = 8 << rsrc_desc.idxStride;
684 Addr elem_size = 2 << rsrc_desc.elemSize;
685 Addr idx_msb = buf_idx / idx_stride;
686 Addr idx_lsb = buf_idx % idx_stride;
687 Addr off_msb = buf_off / elem_size;
688 Addr off_lsb = buf_off % elem_size;
689 DPRINTF(GCN3,
"mubuf swizzled lane %d: "
690 "idx_stride = %llx, elem_size = %llx, "
691 "idx_msb = %llx, idx_lsb = %llx, "
692 "off_msb = %llx, off_lsb = %llx\n",
693 lane, idx_stride, elem_size, idx_msb, idx_lsb,
697 * idx_stride + idx_lsb * elem_size + off_lsb);
702 DPRINTF(GCN3,
"Calculating mubuf address for lane %d: "
703 "vaddr = %llx, base_addr = %llx, "
704 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
707 gpuDynInst->addr.at(lane) =
vaddr;
823 if (gpuDynInst->exec_mask[lane]) {
824 gpuDynInst->addr.at(lane) =
addr[lane];
827 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
837 #endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
bool hasSecondDword(InFmt_VOPC *)
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
const int NumVecElemPerVecReg(64)
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
int instSize() const override
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
std::shared_ptr< Request > RequestPtr
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_MTBUF *)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP2 *)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool hasSecondDword(InFmt_VOP2 *)
int instSize() const override
int instSize() const override
void generateDisassembly() override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
InFmt_VOP3_SDST_ENC instData
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
bool isScalarRegister(int opIdx) override
const std::string & opcode() const
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
bool hasSecondDword(InFmt_SOP1 *)
void initMemRead(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
classes that represnt vector/scalar operands in GCN3 ISA.
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
bool hasSecondDword(InFmt_VOP3 *)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool isScalarRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP1 *)
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
bool isVectorRegister(int opIdx) override
Inst_DS(InFmt_DS *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
T read(const uint32_t index)
a read operation
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
bool hasSecondDword(InFmt_SOPK *)
int instSize() const override
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
Bitfield< 21, 20 > stride
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
Inst_VOP3(InFmt_VOP3 *, const std::string &opcode, bool sgpr_dst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void initMemRead(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
int instSize() const override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
Generated on Wed Sep 30 2020 14:02:07 for gem5 by doxygen 1.8.17