gem5
v20.1.0.0
|
#include <instructions.hh>
Public Member Functions | |
Inst_DS__DS_BPERMUTE_B32 (InFmt_DS *) | |
~Inst_DS__DS_BPERMUTE_B32 () | |
int | getNumOperands () override |
int | numDstRegOperands () override |
int | numSrcRegOperands () override |
int | getOperandSize (int opIdx) override |
bool | isSrcOperand (int opIdx) override |
bool | isDstOperand (int opIdx) override |
void | execute (GPUDynInstPtr) override |
Public Member Functions inherited from Gcn3ISA::Inst_DS | |
Inst_DS (InFmt_DS *, const std::string &opcode) | |
~Inst_DS () | |
int | instSize () const override |
void | generateDisassembly () override |
bool | isScalarRegister (int opIdx) override |
bool | isVectorRegister (int opIdx) override |
int | getRegisterIndex (int opIdx, GPUDynInstPtr gpuDynInst) override |
Public Member Functions inherited from Gcn3ISA::GCN3GPUStaticInst | |
GCN3GPUStaticInst (const std::string &opcode) | |
~GCN3GPUStaticInst () | |
bool | isFlatScratchRegister (int opIdx) override |
bool | isExecMaskRegister (int opIdx) override |
int | coalescerTokenCount () const override |
Return the number of tokens needed by the coalescer. More... | |
ScalarRegU32 | srcLiteral () const override |
Public Member Functions inherited from GPUStaticInst | |
GPUStaticInst (const std::string &opcode) | |
virtual | ~GPUStaticInst () |
void | instAddr (int inst_addr) |
int | instAddr () const |
int | nextInstAddr () const |
void | instNum (int num) |
int | instNum () |
void | ipdInstNum (int num) |
int | ipdInstNum () const |
const std::string & | disassemble () |
int | numDstVecOperands () |
int | numSrcVecOperands () |
int | numDstVecDWORDs () |
int | numSrcVecDWORDs () |
int | numOpdDWORDs (int operandIdx) |
bool | isALU () const |
bool | isBranch () const |
bool | isCondBranch () const |
bool | isNop () const |
bool | isReturn () const |
bool | isEndOfKernel () const |
bool | isKernelLaunch () const |
bool | isSDWAInst () const |
bool | isDPPInst () const |
bool | isUnconditionalJump () const |
bool | isSpecialOp () const |
bool | isWaitcnt () const |
bool | isBarrier () const |
bool | isMemSync () const |
bool | isMemRef () const |
bool | isFlat () const |
bool | isLoad () const |
bool | isStore () const |
bool | isAtomic () const |
bool | isAtomicNoRet () const |
bool | isAtomicRet () const |
bool | isScalar () const |
bool | readsSCC () const |
bool | writesSCC () const |
bool | readsVCC () const |
bool | writesVCC () const |
bool | readsEXEC () const |
bool | writesEXEC () const |
bool | readsMode () const |
bool | writesMode () const |
bool | ignoreExec () const |
bool | isAtomicAnd () const |
bool | isAtomicOr () const |
bool | isAtomicXor () const |
bool | isAtomicCAS () const |
bool | isAtomicExch () const |
bool | isAtomicAdd () const |
bool | isAtomicSub () const |
bool | isAtomicInc () const |
bool | isAtomicDec () const |
bool | isAtomicMax () const |
bool | isAtomicMin () const |
bool | isArgLoad () const |
bool | isGlobalMem () const |
bool | isLocalMem () const |
bool | isArgSeg () const |
bool | isGlobalSeg () const |
bool | isGroupSeg () const |
bool | isKernArgSeg () const |
bool | isPrivateSeg () const |
bool | isReadOnlySeg () const |
bool | isSpillSeg () const |
bool | isGloballyCoherent () const |
Coherence domain of a memory instruction. More... | |
bool | isSystemCoherent () const |
bool | isF16 () const |
bool | isF32 () const |
bool | isF64 () const |
bool | isFMA () const |
bool | isMAC () const |
bool | isMAD () const |
virtual void | initiateAcc (GPUDynInstPtr gpuDynInst) |
virtual void | completeAcc (GPUDynInstPtr gpuDynInst) |
virtual uint32_t | getTargetPc () |
void | setFlag (Flags flag) |
const std::string & | opcode () const |
Additional Inherited Members | |
Public Attributes inherited from GPUStaticInst | |
Enums::StorageClassType | executed_as |
Static Public Attributes inherited from GPUStaticInst | |
static uint64_t | dynamic_id_count |
Protected Member Functions inherited from Gcn3ISA::Inst_DS | |
template<typename T > | |
void | initMemRead (GPUDynInstPtr gpuDynInst, Addr offset) |
template<typename T > | |
void | initDualMemRead (GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) |
template<typename T > | |
void | initMemWrite (GPUDynInstPtr gpuDynInst, Addr offset) |
template<typename T > | |
void | initDualMemWrite (GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) |
void | calcAddr (GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr) |
Protected Member Functions inherited from Gcn3ISA::GCN3GPUStaticInst | |
void | panicUnimplemented () const |
Protected Attributes inherited from Gcn3ISA::Inst_DS | |
InFmt_DS | instData |
InFmt_DS_1 | extData |
Protected Attributes inherited from Gcn3ISA::GCN3GPUStaticInst | |
ScalarRegU32 | _srcLiteral |
if the instruction has a src literal - an immediate value that is part of the instruction stream - we store that here More... | |
Protected Attributes inherited from GPUStaticInst | |
const std::string | _opcode |
std::string | disassembly |
int | _instNum |
int | _instAddr |
int | srcVecOperands |
int | dstVecOperands |
int | srcVecDWORDs |
int | dstVecDWORDs |
int | _ipdInstNum |
Identifier of the immediate post-dominator instruction. More... | |
std::bitset< Num_Flags > | _flags |
Definition at line 61958 of file instructions.hh.
Gcn3ISA::Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32 | ( | InFmt_DS * | iFmt | ) |
While this operation doesn't actually use DS storage we classify it as a load here because it does a writeback to a VGPR, which fits in better with the LDS pipeline logic.
Definition at line 32574 of file instructions.cc.
References GPUStaticInst::setFlag().
Gcn3ISA::Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32 | ( | ) |
Definition at line 32586 of file instructions.cc.
|
overridevirtual |
One of the offset fields can be used for the index. It is assumed OFFSET0 would be used, as OFFSET1 is typically only used for DS ops that operate on two disparate pieces of data.
The address provided is a byte address, but VGPRs are 4 bytes, so we must divide by 4 to get the actual VGPR index. Additionally, the index is calculated modulo the WF size, 64 in this case, so we simply extract bits 7-2.
If the shuffled index corresponds to a lane that is inactive then this instruction writes a 0 to the active lane in VDST.
Implements GPUStaticInst.
Definition at line 32592 of file instructions.cc.
References addr, Gcn3ISA::InFmt_DS_1::ADDR, bits(), data, Gcn3ISA::InFmt_DS_1::DATA0, Wavefront::decLGKMInstsIssued(), Wavefront::execMask(), Wavefront::execUnitId, Gcn3ISA::Inst_DS::extData, MipsISA::index, Gcn3ISA::Inst_DS::instData, Gcn3ISA::NumVecElemPerVecReg(), Gcn3ISA::InFmt_DS::OFFSET0, Gcn3ISA::InFmt_DS::OFFSET1, panic_if, Wavefront::rdLmReqsInPipe, Wavefront::validateRequestCounters(), Gcn3ISA::InFmt_DS_1::VDST, and Gcn3ISA::VecOperand< DataType, Const, NumDwords >::write().
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61965 of file instructions.hh.
References numDstRegOperands(), and numSrcRegOperands().
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 61974 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 62006 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 61990 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61970 of file instructions.hh.
Referenced by getNumOperands().
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61971 of file instructions.hh.
Referenced by getNumOperands().