32 #ifndef __GPU_DYN_INST_HH__ 
   33 #define __GPU_DYN_INST_HH__ 
   42 #include "debug/GPUMem.hh" 
   43 #include "enums/StorageClassType.hh" 
  120                uint64_t instSeqNum);
 
  155     bool isOpcode(
const std::string& opcodeStr) 
const;
 
  156     bool isOpcode(
const std::string& opcodeStr,
 
  157                   const std::string& extStr) 
const;
 
  308             return std::make_unique<AtomicOpAnd<c0>>(*reg0);
 
  310             return std::make_unique<AtomicOpOr<c0>>(*reg0);
 
  312             return std::make_unique<AtomicOpXor<c0>>(*reg0);
 
  314             return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, 
cu);
 
  316             return std::make_unique<AtomicOpExch<c0>>(*reg0);
 
  318             return std::make_unique<AtomicOpAdd<c0>>(*reg0);
 
  320             return std::make_unique<AtomicOpSub<c0>>(*reg0);
 
  322             return std::make_unique<AtomicOpInc<c0>>();
 
  324             return std::make_unique<AtomicOpDec<c0>>();
 
  326             return std::make_unique<AtomicOpMax<c0>>(*reg0);
 
  328             return std::make_unique<AtomicOpMin<c0>>(*reg0);
 
  330             fatal(
"Unrecognized atomic operation");
 
  385         assert((newVal >= 0) && (newVal <= 2));
 
  421                 DPRINTF(GPUMem, 
"CU%d: WF[%d][%d]: lane: %d has %d pending " 
  429             DPRINTF(GPUMem, 
"CU%d: WF[%d][%d]: all lanes have no pending" 
  439         std::string statusVec_str = 
"[";
 
  446         statusVec_str += 
"]";
 
  448         return statusVec_str;
 
AtomicOpFunctor * clone()
ComputeUnit * computeUnit
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
gem5::ComputeUnit::ComputeUnitStats stats
bool isKernelLaunch() const
std::unordered_map< Addr, std::vector< int > > StatusVector
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
void decrementStatusVector(int lane)
bool isUnconditionalJump() const
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
StatusVector memStatusVector
bool hasSourceSgpr() const
GPUStaticInst * staticInstruction()
int getLaneStatus(int lane) const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
void resetStatusVector(int lane)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
void resetEntireStatusVector()
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
std::vector< Tick > getRoundTripTime() const
TheGpuISA::ScalarRegU32 srcLiteral() const
Tick getAccessTime() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
void setRequestFlags(RequestPtr req) const
std::string printStatusVector() const
bool writesFlatScratch() const
bool allLanesZero() const
void execute(GPUDynInstPtr gpuDynInst)
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
bool isKernArgSeg() const
void setStatusVector(int lane, int newVal)
void setAccessTime(Tick currentTime)
int maxSrcScalarRegOpSize
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
const std::vector< int > virtIndices
const int opIdx
Index of this operand within the set of its parent instruction's operand list.
const std::vector< int > physIndices
RegisterOperandInfo(int op_idx, int num_dwords, const std::vector< int > &virt_indices, const std::vector< int > &phys_indices)
const int numDWORDs
Size of this operand in DWORDs.
int virtIdx(int reg_num=0) const
We typically only need the first virtual register for the operand regardless of its size.
int numRegisters() const
The number of registers required to store this operand.
RegisterOperandInfo()=delete
@ SLC_BIT
user-policy flags
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
#define fatal(...)
This implements a cprintf based fatal() function.
constexpr unsigned NumVecElemPerVecReg
const int RegSizeDWords
Size of a single-precision register in DWords.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
const std::string to_string(sc_enc enc)
statistics::Scalar numFailedCASOps
statistics::Scalar numCASOps