32#ifndef __GPU_DYN_INST_HH__
33#define __GPU_DYN_INST_HH__
42#include "debug/GPUMem.hh"
43#include "enums/StorageClassType.hh"
120 uint64_t instSeqNum);
155 bool isOpcode(
const std::string& opcodeStr)
const;
156 bool isOpcode(
const std::string& opcodeStr,
157 const std::string& extStr)
const;
312 return std::make_unique<AtomicOpAnd<c0>>(*reg0);
314 return std::make_unique<AtomicOpOr<c0>>(*reg0);
316 return std::make_unique<AtomicOpXor<c0>>(*reg0);
318 return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1,
cu);
320 return std::make_unique<AtomicOpExch<c0>>(*reg0);
322 return std::make_unique<AtomicOpAdd<c0>>(*reg0);
324 return std::make_unique<AtomicOpSub<c0>>(*reg0);
326 return std::make_unique<AtomicOpInc<c0>>();
328 return std::make_unique<AtomicOpDec<c0>>();
330 return std::make_unique<AtomicOpMax<c0>>(*reg0);
332 return std::make_unique<AtomicOpMin<c0>>(*reg0);
334 fatal(
"Unrecognized atomic operation");
370 assert(
statusVector.size() == TheGpuISA::NumVecElemPerVecReg);
371 for (
int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
389 assert((newVal >= 0) && (newVal <= 2));
422 for (
int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
425 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: lane: %d has %d pending "
433 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: all lanes have no pending"
443 std::string statusVec_str =
"[";
447 for (
int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
450 statusVec_str +=
"]";
452 return statusVec_str;
AtomicOpFunctor * clone()
ComputeUnit * computeUnit
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
gem5::ComputeUnit::ComputeUnitStats stats
bool isFlatScratch() const
bool isKernelLaunch() const
std::unordered_map< Addr, std::vector< int > > StatusVector
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
std::vector< Tick > getRoundTripTime() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
void decrementStatusVector(int lane)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
StatusVector memStatusVector
bool hasSourceSgpr() const
int getLaneStatus(int lane) const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
void resetStatusVector(int lane)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
void resetEntireStatusVector()
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
TheGpuISA::ScalarRegU32 srcLiteral() const
Tick getAccessTime() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
void setRequestFlags(RequestPtr req) const
std::string printStatusVector() const
bool writesFlatScratch() const
bool allLanesZero() const
void execute(GPUDynInstPtr gpuDynInst)
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
bool isKernArgSeg() const
void setStatusVector(int lane, int newVal)
void setAccessTime(Tick currentTime)
int maxSrcScalarRegOpSize
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
const std::vector< int > virtIndices
const int opIdx
Index of this operand within the set of its parent instruction's operand list.
const std::vector< int > physIndices
RegisterOperandInfo(int op_idx, int num_dwords, const std::vector< int > &virt_indices, const std::vector< int > &phys_indices)
const int numDWORDs
Size of this operand in DWORDs.
int virtIdx(int reg_num=0) const
We typically only need the first virtual register for the operand regardless of its size.
int numRegisters() const
The number of registers required to store this operand.
RegisterOperandInfo()=delete
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
@ SLC_BIT
user-policy flags
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
statistics::Scalar numFailedCASOps
statistics::Scalar numCASOps