Go to the documentation of this file.
   36 #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ 
   37 #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ 
   43 #include "debug/GCN3.hh" 
   44 #include "debug/GPUExec.hh" 
  208             initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
 
  219             initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
 
  231             gpu_dyn_inst->scalarAddr = 
vaddr;
 
  245             std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
 
  253             if (!rsrc_desc.stride && 
offset >= rsrc_desc.numRecords) {
 
  254                 clamped_offset = rsrc_desc.numRecords;
 
  255             } 
else if (rsrc_desc.stride && 
offset 
  256                        > (rsrc_desc.stride * rsrc_desc.numRecords)) {
 
  257                 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
 
  260             Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
 
  261             gpu_dyn_inst->scalarAddr = 
vaddr;
 
  433                 if (gpuDynInst->exec_mask[lane]) {
 
  436                     (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
 
  449                 if (gpuDynInst->exec_mask[lane]) {
 
  450                     Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
 
  451                     Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
 
  453                     (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
 
  455                     (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
 
  468                 if (gpuDynInst->exec_mask[lane]) {
 
  471                         (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
 
  483                 if (gpuDynInst->exec_mask[lane]) {
 
  484                     Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
 
  485                     Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
 
  487                         gpuDynInst->d_data))[lane * 2]);
 
  489                         gpuDynInst->d_data))[lane * 2 + 1]);
 
  501                     gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
 
  533             VectorMask old_exec_mask = gpuDynInst->exec_mask;
 
  534             gpuDynInst->exec_mask &= ~
oobMask;
 
  536             gpuDynInst->exec_mask = old_exec_mask;
 
  547             VectorMask old_exec_mask = gpuDynInst->exec_mask;
 
  548             gpuDynInst->exec_mask &= ~
oobMask;
 
  550             gpuDynInst->exec_mask = old_exec_mask;
 
  560             VectorMask old_exec_mask = gpuDynInst->exec_mask;
 
  561             gpuDynInst->exec_mask &= ~
oobMask;
 
  563             gpuDynInst->exec_mask = old_exec_mask;
 
  573             VectorMask old_exec_mask = gpuDynInst->exec_mask;
 
  574             gpuDynInst->exec_mask &= ~
oobMask;
 
  576             gpuDynInst->exec_mask = old_exec_mask;
 
  583             gpuDynInst->resetEntireStatusVector();
 
  584             gpuDynInst->setStatusVector(0, 1);
 
  585             RequestPtr req = std::make_shared<Request>(0, 0, 0,
 
  586                                        gpuDynInst->computeUnit()->
 
  588                                        gpuDynInst->wfDynId);
 
  589             gpuDynInst->setRequestFlags(req);
 
  590             gpuDynInst->computeUnit()->
 
  614         template<
typename VOFF, 
typename VIDX, 
typename SRSRC, 
typename SOFF>
 
  617             SRSRC s_rsrc_desc, SOFF s_offset, 
int inst_offset)
 
  626             std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
 
  629             base_addr = rsrc_desc.baseAddr;
 
  631             stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
 
  632                 + rsrc_desc.stride) : rsrc_desc.stride;
 
  635                 if (gpuDynInst->exec_mask[lane]) {
 
  636                     vaddr = base_addr + s_offset.rawData();
 
  642                     buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
 
  644                     buf_off = v_off[lane] + inst_offset;
 
  654                     if (
stride == 0 || !rsrc_desc.swizzleEn) {
 
  655                         if (buf_off + 
stride * buf_idx >=
 
  656                             rsrc_desc.numRecords - s_offset.rawData()) {
 
  657                             DPRINTF(GCN3, 
"mubuf out-of-bounds condition 1: " 
  658                                     "lane = %d, buffer_offset = %llx, " 
  659                                     "const_stride = %llx, " 
  660                                     "const_num_records = %llx\n",
 
  661                                     lane, buf_off + 
stride * buf_idx,
 
  662                                     stride, rsrc_desc.numRecords);
 
  668                     if (
stride != 0 && rsrc_desc.swizzleEn) {
 
  669                         if (buf_idx >= rsrc_desc.numRecords ||
 
  671                             DPRINTF(GCN3, 
"mubuf out-of-bounds condition 2: " 
  672                                     "lane = %d, offset = %llx, " 
  674                                     "const_num_records = %llx\n",
 
  675                                     lane, buf_off, buf_idx,
 
  676                                     rsrc_desc.numRecords);
 
  682                     if (rsrc_desc.swizzleEn) {
 
  683                         Addr idx_stride = 8 << rsrc_desc.idxStride;
 
  684                         Addr elem_size = 2 << rsrc_desc.elemSize;
 
  685                         Addr idx_msb = buf_idx / idx_stride;
 
  686                         Addr idx_lsb = buf_idx % idx_stride;
 
  687                         Addr off_msb = buf_off / elem_size;
 
  688                         Addr off_lsb = buf_off % elem_size;
 
  689                         DPRINTF(GCN3, 
"mubuf swizzled lane %d: " 
  690                                 "idx_stride = %llx, elem_size = %llx, " 
  691                                 "idx_msb = %llx, idx_lsb = %llx, " 
  692                                 "off_msb = %llx, off_lsb = %llx\n",
 
  693                                 lane, idx_stride, elem_size, idx_msb, idx_lsb,
 
  697                             * idx_stride + idx_lsb * elem_size + off_lsb);
 
  702                     DPRINTF(GCN3, 
"Calculating mubuf address for lane %d: " 
  703                             "vaddr = %llx, base_addr = %llx, " 
  704                             "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
 
  707                     gpuDynInst->addr.at(lane) = 
vaddr;
 
  823                 if (gpuDynInst->exec_mask[lane]) {
 
  824                     gpuDynInst->addr.at(lane) = 
addr[lane];
 
  827             gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
 
  837 #endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ 
  
Inst_SMEM(InFmt_SMEM *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
bool hasSecondDword(InFmt_VOPC *)
Inst_VINTRP(InFmt_VINTRP *, const std::string &opcode)
const int NumVecElemPerVecReg(64)
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
Inst_MTBUF(InFmt_MTBUF *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
int instSize() const override
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
int instSize() const override
std::shared_ptr< Request > RequestPtr
Inst_VOP1(InFmt_VOP1 *, const std::string &opcode)
bool hasSecondDword(InFmt_MTBUF *)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
bool hasSecondDword(InFmt_SOP2 *)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool hasSecondDword(InFmt_VOP2 *)
int instSize() const override
int instSize() const override
void generateDisassembly() override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
InFmt_VOP3_SDST_ENC instData
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
bool isScalarRegister(int opIdx) override
const std::string & opcode() const
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
bool hasSecondDword(InFmt_SOP1 *)
void initMemRead(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isVectorRegister(int opIdx) override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_SOPC(InFmt_SOPC *, const std::string &opcode)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
classes that represnt vector/scalar operands in GCN3 ISA.
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
Inst_SOP2(InFmt_SOP2 *, const std::string &opcode)
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
int instSize() const override
void generateDisassembly() override
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
bool isScalarRegister(int opIdx) override
Inst_SOP1(InFmt_SOP1 *, const std::string &opcode)
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
Inst_MIMG(InFmt_MIMG *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
bool hasSecondDword(InFmt_VOP3 *)
Inst_SOPK(InFmt_SOPK *, const std::string &opcode)
bool isScalarRegister(int opIdx) override
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool isScalarRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
Inst_EXP(InFmt_EXP *, const std::string &opcode)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP1 *)
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
bool isVectorRegister(int opIdx) override
Inst_DS(InFmt_DS *, const std::string &opcode)
int instSize() const override
void generateDisassembly() override
T read(const uint32_t index)
a read operation
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
bool hasSecondDword(InFmt_SOPK *)
int instSize() const override
Inst_VOP2(InFmt_VOP2 *, const std::string &opcode)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
Bitfield< 21, 20 > stride
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
bool isVectorRegister(int opIdx) override
int instSize() const override
void generateDisassembly() override
Inst_VOP3(InFmt_VOP3 *, const std::string &opcode, bool sgpr_dst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
int instSize() const override
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
Inst_VOPC(InFmt_VOPC *, const std::string &opcode)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
bool isVectorRegister(int opIdx) override
bool isScalarRegister(int opIdx) override
void initMemRead(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
void generateDisassembly() override
int instSize() const override
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
bool isScalarRegister(int opIdx) override
void generateDisassembly() override
bool isVectorRegister(int opIdx) override
Generated on Thu Mar 18 2021 12:09:13 for gem5 by  doxygen 1.8.17