34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
47 (
Addr)0), numScalarReqs(0), isSaveRestore(false),
48 _staticInst(static_inst), _seqNum(instSeqNum),
49 maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
61 for (
int i = 0;
i < (16 *
sizeof(uint32_t)); ++
i) {
89 DPRINTF(GPUInst,
"%s: generating operand info for %d operands\n",
261 const std::string& extStr)
const
297enums::StorageClassType
307 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
316 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector="
563 if (dstScalarOp.isFlatScratch())
573 if (srcScalarOp.isFlatScratch())
771 fatal(
"flat access is in GPUVM APE\n");
772 }
else if (
bits(
addr[lane], 63, 47) != 0x1FFFF &&
775 fatal(
"flat access at addr %#x has a memory violation\n",
798 }
else if (
executedAs() == enums::SC_PRIVATE) {
817 assert(!(
bits(
addr[lane], 63, 47) != 0x1FFFF
846 panic(
"Invalid memory operation!\n");
869 panic(
"Invalid memory operation!\n");
871 }
else if (
executedAs() == enums::SC_PRIVATE) {
902 uint32_t physSgprIdx =
929 panic(
"Invalid memory operation!\n");
934 panic(
"flat addr %#llx maps to bad segment %d\n",
941TheGpuISA::ScalarRegU32
960 assert(number_pages_touched);
969 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
970 std::make_pair(1, it.second)));
974 ret.first->second.first++;
975 ret.first->second.second += it.second;
1007 }
else if (hopId == 0) {
pageDataStruct pageAccesses
LdsState & getLds() const
std::map< Addr, int > pagesTouched
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
gem5::ComputeUnit::ComputeUnitStats stats
bool isKernelLaunch() const
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool hasSourceSgpr() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
TheGpuISA::ScalarRegU32 srcLiteral() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
bool writesFlatScratch() const
void execute(GPUDynInstPtr gpuDynInst)
bool isKernArgSeg() const
int maxSrcScalarRegOpSize
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit()
virtual int numDstRegOperands()=0
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
const std::string & disassemble()
const std::vector< OperandInfo > & dstVecRegOperands() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isReadOnlySeg() const
enums::StorageClassType executed_as
const std::string & opcode() const
bool isPrivateSeg() const
const std::vector< OperandInfo > & srcOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
bool isKernArgSeg() const
bool isEndOfKernel() const
bool isCondBranch() const
const std::vector< OperandInfo > & dstOperands() const
bool isKernelLaunch() const
bool isFlatGlobal() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & srcVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
int numDstScalarOperands()
virtual int getNumOperands()=0
int numSrcScalarOperands()
bool isAtomicExch() const
bool isAtomicNoRet() const
bool isUnconditionalJump() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
AddrRange getAddrRange() const
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & ldsApe() const
bool isLdsApe(Addr addr) const
Addr getHiddenPrivateBase()
ComputeUnit * computeUnit
void decVMemInstsIssued()
void decLGKMInstsIssued()
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Addr size() const
Get the size of the address range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
statistics::Scalar dynamicLMemInstrCnt
statistics::Scalar dynamicGMemInstrCnt
statistics::Distribution pageDivergenceDist
statistics::Scalar dynamicFlatMemInstrCnt