34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
47 (
Addr)0), numScalarReqs(0), isSaveRestore(false),
48 _staticInst(static_inst), _seqNum(instSeqNum),
49 maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
61 for (
int i = 0;
i < (16 *
sizeof(uint32_t)); ++
i) {
89 DPRINTF(GPUInst,
"%s: generating operand info for %d operands\n",
261 const std::string& extStr)
const
297enums::StorageClassType
307 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
316 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector="
569 if (dstScalarOp.isFlatScratch())
579 if (srcScalarOp.isFlatScratch())
795 fatal(
"flat access is in GPUVM APE\n");
796 }
else if (
bits(
addr[lane], 63, 47) != 0x1FFFF &&
799 fatal(
"flat access at addr %#x has a memory violation\n",
822 }
else if (
executedAs() == enums::SC_PRIVATE) {
841 assert(!(
bits(
addr[lane], 63, 47) != 0x1FFFF
870 panic(
"Invalid memory operation!\n");
893 panic(
"Invalid memory operation!\n");
895 }
else if (
executedAs() == enums::SC_PRIVATE) {
927 if (
wavefront()->gfxVersion == GfxVersion::gfx942) {
930 for (
int lane = 0; lane <
cu->
wfSize(); ++lane) {
942 uint32_t physSgprIdx =
947 uint32_t size =
cu->
srf[
simdId]->read(physSgprIdx);
950 for (
int lane = 0; lane <
cu->
wfSize(); ++lane) {
974 panic(
"Invalid memory operation!\n");
980 panic(
"flat addr %#llx maps to bad segment %d\n",
987TheGpuISA::ScalarRegU32
1006 assert(number_pages_touched);
1015 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
1016 std::make_pair(1, it.second)));
1020 ret.first->second.first++;
1021 ret.first->second.second += it.second;
1053 }
else if (hopId == 0) {
pageDataStruct pageAccesses
LdsState & getLds() const
std::map< Addr, int > pagesTouched
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
gem5::ComputeUnit::ComputeUnitStats stats
bool isFlatScratch() const
bool isKernelLaunch() const
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool hasSourceSgpr() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
TheGpuISA::ScalarRegU32 srcLiteral() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
bool writesFlatScratch() const
void execute(GPUDynInstPtr gpuDynInst)
bool isKernArgSeg() const
int maxSrcScalarRegOpSize
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit()
virtual int numDstRegOperands()=0
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
const std::string & disassemble()
const std::vector< OperandInfo > & dstVecRegOperands() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isReadOnlySeg() const
enums::StorageClassType executed_as
const std::string & opcode() const
bool isPrivateSeg() const
const std::vector< OperandInfo > & srcOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
bool isKernArgSeg() const
bool isEndOfKernel() const
bool isCondBranch() const
const std::vector< OperandInfo > & dstOperands() const
bool isKernelLaunch() const
bool isFlatGlobal() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & srcVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
int numDstScalarOperands()
virtual int getNumOperands()=0
int numSrcScalarOperands()
bool isAtomicExch() const
bool isAtomicNoRet() const
bool isUnconditionalJump() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
bool isFlatScratch() const
AddrRange getAddrRange() const
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & ldsApe() const
bool isLdsApe(Addr addr) const
Addr getHiddenPrivateBase()
ComputeUnit * computeUnit
void decVMemInstsIssued()
void decLGKMInstsIssued()
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Addr size() const
Get the size of the address range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
statistics::Scalar dynamicLMemInstrCnt
statistics::Scalar dynamicGMemInstrCnt
statistics::Distribution pageDivergenceDist
statistics::Scalar dynamicFlatMemInstrCnt