Go to the documentation of this file.
34 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
35 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
41 #include "debug/GCN3.hh"
42 #include "debug/GPUExec.hh"
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
217 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
229 gpu_dyn_inst->scalarAddr =
vaddr;
243 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
251 if (!rsrc_desc.stride &&
offset >= rsrc_desc.numRecords) {
252 clamped_offset = rsrc_desc.numRecords;
253 }
else if (rsrc_desc.stride &&
offset
254 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
255 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
258 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
259 gpu_dyn_inst->scalarAddr =
vaddr;
431 if (gpuDynInst->exec_mask[lane]) {
434 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
447 if (gpuDynInst->exec_mask[lane]) {
448 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
449 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
451 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
453 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
466 if (gpuDynInst->exec_mask[lane]) {
469 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
481 if (gpuDynInst->exec_mask[lane]) {
482 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
483 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
485 gpuDynInst->d_data))[lane * 2]);
487 gpuDynInst->d_data))[lane * 2 + 1]);
499 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
531 VectorMask old_exec_mask = gpuDynInst->exec_mask;
532 gpuDynInst->exec_mask &= ~
oobMask;
534 gpuDynInst->exec_mask = old_exec_mask;
545 VectorMask old_exec_mask = gpuDynInst->exec_mask;
546 gpuDynInst->exec_mask &= ~
oobMask;
548 gpuDynInst->exec_mask = old_exec_mask;
558 VectorMask old_exec_mask = gpuDynInst->exec_mask;
559 gpuDynInst->exec_mask &= ~
oobMask;
561 gpuDynInst->exec_mask = old_exec_mask;
571 VectorMask old_exec_mask = gpuDynInst->exec_mask;
572 gpuDynInst->exec_mask &= ~
oobMask;
574 gpuDynInst->exec_mask = old_exec_mask;
581 gpuDynInst->resetEntireStatusVector();
582 gpuDynInst->setStatusVector(0, 1);
583 RequestPtr req = std::make_shared<Request>(0, 0, 0,
584 gpuDynInst->computeUnit()->
586 gpuDynInst->wfDynId);
587 gpuDynInst->setRequestFlags(req);
588 gpuDynInst->computeUnit()->
612 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
615 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
624 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
627 base_addr = rsrc_desc.baseAddr;
629 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
630 + rsrc_desc.stride) : rsrc_desc.stride;
633 if (gpuDynInst->exec_mask[lane]) {
634 vaddr = base_addr + s_offset.rawData();
640 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
642 buf_off = v_off[lane] + inst_offset;
652 if (
stride == 0 || !rsrc_desc.swizzleEn) {
653 if (buf_off +
stride * buf_idx >=
654 rsrc_desc.numRecords - s_offset.rawData()) {
655 DPRINTF(GCN3,
"mubuf out-of-bounds condition 1: "
656 "lane = %d, buffer_offset = %llx, "
657 "const_stride = %llx, "
658 "const_num_records = %llx\n",
659 lane, buf_off +
stride * buf_idx,
660 stride, rsrc_desc.numRecords);
666 if (
stride != 0 && rsrc_desc.swizzleEn) {
667 if (buf_idx >= rsrc_desc.numRecords ||
669 DPRINTF(GCN3,
"mubuf out-of-bounds condition 2: "
670 "lane = %d, offset = %llx, "
672 "const_num_records = %llx\n",
673 lane, buf_off, buf_idx,
674 rsrc_desc.numRecords);
680 if (rsrc_desc.swizzleEn) {
681 Addr idx_stride = 8 << rsrc_desc.idxStride;
682 Addr elem_size = 2 << rsrc_desc.elemSize;
683 Addr idx_msb = buf_idx / idx_stride;
684 Addr idx_lsb = buf_idx % idx_stride;
685 Addr off_msb = buf_off / elem_size;
686 Addr off_lsb = buf_off % elem_size;
687 DPRINTF(GCN3,
"mubuf swizzled lane %d: "
688 "idx_stride = %llx, elem_size = %llx, "
689 "idx_msb = %llx, idx_lsb = %llx, "
690 "off_msb = %llx, off_lsb = %llx\n",
691 lane, idx_stride, elem_size, idx_msb, idx_lsb,
695 * idx_stride + idx_lsb * elem_size + off_lsb);
700 DPRINTF(GCN3,
"Calculating mubuf address for lane %d: "
701 "vaddr = %llx, base_addr = %llx, "
702 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
705 gpuDynInst->addr.at(lane) =
vaddr;
821 if (gpuDynInst->exec_mask[lane]) {
822 gpuDynInst->addr.at(lane) =
addr[lane];
825 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
835 #endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
bool hasSecondDword(InFmt_VOPC *)
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
const int NumVecElemPerVecReg(64)
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
int instSize() const override
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
std::shared_ptr< Request > RequestPtr
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_MTBUF *)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP2 *)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool hasSecondDword(InFmt_VOP2 *)
int instSize() const override
int instSize() const override
void generateDisassembly() override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
InFmt_VOP3_SDST_ENC instData
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
bool isScalarRegister(int opIdx) override
const std::string & opcode() const
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
bool hasSecondDword(InFmt_SOP1 *)
void initMemRead(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
classes that represnt vector/scalar operands in GCN3 ISA.
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
bool hasSecondDword(InFmt_VOP3 *)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool isScalarRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP1 *)
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
bool isVectorRegister(int opIdx) override
Inst_DS(InFmt_DS *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
T read(const uint32_t index)
a read operation
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
bool hasSecondDword(InFmt_SOPK *)
int instSize() const override
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
Bitfield< 21, 20 > stride
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
Inst_VOP3(InFmt_VOP3 *, const std::string &opcode, bool sgpr_dst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void initMemRead(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
int instSize() const override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
Generated on Tue Mar 23 2021 19:41:23 for gem5 by doxygen 1.8.17