gem5
v20.1.0.0
|
#include <instructions.hh>
Public Member Functions | |
Inst_DS__DS_SWIZZLE_B32 (InFmt_DS *) | |
~Inst_DS__DS_SWIZZLE_B32 () | |
int | getNumOperands () override |
int | numDstRegOperands () override |
int | numSrcRegOperands () override |
int | getOperandSize (int opIdx) override |
bool | isSrcOperand (int opIdx) override |
bool | isDstOperand (int opIdx) override |
void | execute (GPUDynInstPtr) override |
Public Member Functions inherited from Gcn3ISA::Inst_DS | |
Inst_DS (InFmt_DS *, const std::string &opcode) | |
~Inst_DS () | |
int | instSize () const override |
void | generateDisassembly () override |
bool | isScalarRegister (int opIdx) override |
bool | isVectorRegister (int opIdx) override |
int | getRegisterIndex (int opIdx, GPUDynInstPtr gpuDynInst) override |
Public Member Functions inherited from Gcn3ISA::GCN3GPUStaticInst | |
GCN3GPUStaticInst (const std::string &opcode) | |
~GCN3GPUStaticInst () | |
bool | isFlatScratchRegister (int opIdx) override |
bool | isExecMaskRegister (int opIdx) override |
int | coalescerTokenCount () const override |
Return the number of tokens needed by the coalescer. More... | |
ScalarRegU32 | srcLiteral () const override |
Public Member Functions inherited from GPUStaticInst | |
GPUStaticInst (const std::string &opcode) | |
virtual | ~GPUStaticInst () |
void | instAddr (int inst_addr) |
int | instAddr () const |
int | nextInstAddr () const |
void | instNum (int num) |
int | instNum () |
void | ipdInstNum (int num) |
int | ipdInstNum () const |
const std::string & | disassemble () |
int | numDstVecOperands () |
int | numSrcVecOperands () |
int | numDstVecDWORDs () |
int | numSrcVecDWORDs () |
int | numOpdDWORDs (int operandIdx) |
bool | isALU () const |
bool | isBranch () const |
bool | isCondBranch () const |
bool | isNop () const |
bool | isReturn () const |
bool | isEndOfKernel () const |
bool | isKernelLaunch () const |
bool | isSDWAInst () const |
bool | isDPPInst () const |
bool | isUnconditionalJump () const |
bool | isSpecialOp () const |
bool | isWaitcnt () const |
bool | isBarrier () const |
bool | isMemSync () const |
bool | isMemRef () const |
bool | isFlat () const |
bool | isLoad () const |
bool | isStore () const |
bool | isAtomic () const |
bool | isAtomicNoRet () const |
bool | isAtomicRet () const |
bool | isScalar () const |
bool | readsSCC () const |
bool | writesSCC () const |
bool | readsVCC () const |
bool | writesVCC () const |
bool | readsEXEC () const |
bool | writesEXEC () const |
bool | readsMode () const |
bool | writesMode () const |
bool | ignoreExec () const |
bool | isAtomicAnd () const |
bool | isAtomicOr () const |
bool | isAtomicXor () const |
bool | isAtomicCAS () const |
bool | isAtomicExch () const |
bool | isAtomicAdd () const |
bool | isAtomicSub () const |
bool | isAtomicInc () const |
bool | isAtomicDec () const |
bool | isAtomicMax () const |
bool | isAtomicMin () const |
bool | isArgLoad () const |
bool | isGlobalMem () const |
bool | isLocalMem () const |
bool | isArgSeg () const |
bool | isGlobalSeg () const |
bool | isGroupSeg () const |
bool | isKernArgSeg () const |
bool | isPrivateSeg () const |
bool | isReadOnlySeg () const |
bool | isSpillSeg () const |
bool | isGloballyCoherent () const |
Coherence domain of a memory instruction. More... | |
bool | isSystemCoherent () const |
bool | isF16 () const |
bool | isF32 () const |
bool | isF64 () const |
bool | isFMA () const |
bool | isMAC () const |
bool | isMAD () const |
virtual void | initiateAcc (GPUDynInstPtr gpuDynInst) |
virtual void | completeAcc (GPUDynInstPtr gpuDynInst) |
virtual uint32_t | getTargetPc () |
void | setFlag (Flags flag) |
const std::string & | opcode () const |
Additional Inherited Members | |
Public Attributes inherited from GPUStaticInst | |
Enums::StorageClassType | executed_as |
Static Public Attributes inherited from GPUStaticInst | |
static uint64_t | dynamic_id_count |
Protected Member Functions inherited from Gcn3ISA::Inst_DS | |
template<typename T > | |
void | initMemRead (GPUDynInstPtr gpuDynInst, Addr offset) |
template<typename T > | |
void | initDualMemRead (GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) |
template<typename T > | |
void | initMemWrite (GPUDynInstPtr gpuDynInst, Addr offset) |
template<typename T > | |
void | initDualMemWrite (GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) |
void | calcAddr (GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr) |
Protected Member Functions inherited from Gcn3ISA::GCN3GPUStaticInst | |
void | panicUnimplemented () const |
Protected Attributes inherited from Gcn3ISA::Inst_DS | |
InFmt_DS | instData |
InFmt_DS_1 | extData |
Protected Attributes inherited from Gcn3ISA::GCN3GPUStaticInst | |
ScalarRegU32 | _srcLiteral |
if the instruction has a src literal - an immediate value that is part of the instruction stream - we store that here More... | |
Protected Attributes inherited from GPUStaticInst | |
const std::string | _opcode |
std::string | disassembly |
int | _instNum |
int | _instAddr |
int | srcVecOperands |
int | dstVecOperands |
int | srcVecDWORDs |
int | dstVecDWORDs |
int | _ipdInstNum |
Identifier of the immediate post-dominator instruction. More... | |
std::bitset< Num_Flags > | _flags |
Definition at line 61832 of file instructions.hh.
Gcn3ISA::Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32 | ( | InFmt_DS * | iFmt | ) |
Definition at line 32386 of file instructions.cc.
References GPUStaticInst::setFlag().
Gcn3ISA::Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32 | ( | ) |
Definition at line 32392 of file instructions.cc.
|
overridevirtual |
The "DS pattern" is comprised of both offset fields. That is, the swizzle pattern between lanes. Bit 15 of the DS pattern dictates which swizzle mode to use. There are two different swizzle patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use QDMode else use Bit-masks mode. The remaining bits dictate how to swizzle the lanes.
QDMode: Chunks the lanes into 4s and swizzles among them. Bits 7:6 dictate where lane 3 (of the current chunk) gets its date, 5:4 lane 2, etc.
Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 is the and_mask. Each lane is swizzled by performing the appropriate operation using these masks.
This operation allows data sharing between groups of four consecutive threads. Note the increment by 4 in the for loop.
Implements GPUStaticInst.
Definition at line 32399 of file instructions.cc.
References bits(), data, Gcn3ISA::InFmt_DS_1::DATA0, Wavefront::execUnitId, Gcn3ISA::Inst_DS::extData, MipsISA::index, Gcn3ISA::Inst_DS::instData, Gcn3ISA::NumVecElemPerVecReg(), Gcn3ISA::InFmt_DS::OFFSET0, Gcn3ISA::InFmt_DS::OFFSET1, panic_if, Wavefront::rdLmReqsInPipe, Wavefront::validateRequestCounters(), Gcn3ISA::InFmt_DS_1::VDST, and Gcn3ISA::VecOperand< DataType, Const, NumDwords >::write().
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61839 of file instructions.hh.
References numDstRegOperands(), and numSrcRegOperands().
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 61848 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 61876 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Reimplemented from Gcn3ISA::GCN3GPUStaticInst.
Definition at line 61862 of file instructions.hh.
References fatal.
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61844 of file instructions.hh.
Referenced by getNumOperands().
|
inlineoverridevirtual |
Implements GPUStaticInst.
Definition at line 61845 of file instructions.hh.
Referenced by getNumOperands().