34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
61 for (
int i = 0;
i < (16 *
sizeof(uint32_t)); ++
i) {
89 DPRINTF(GPUInst,
"%s: generating operand info for %d operands\n",
261 const std::string& extStr)
const
263 return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264 _staticInst->opcode().find(extStr) != std::string::npos;
270 return _staticInst->opcode().find(opcodeStr) != std::string::npos;
297enums::StorageClassType
307 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
316 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector="
498 for (
const auto& srcOp :
_staticInst->srcOperands())
508 for (
const auto& dstOp :
_staticInst->dstOperands())
548 for (
const auto& dstOp :
_staticInst->dstOperands())
558 for (
const auto& srcOp :
_staticInst->srcOperands())
569 if (dstScalarOp.isFlatScratch())
579 if (srcScalarOp.isFlatScratch())
801 fatal(
"flat access is in GPUVM APE\n");
802 }
else if (
bits(
addr[lane], 63, 47) != 0x1FFFF &&
805 fatal(
"flat access at addr %#x has a memory violation\n",
828 }
else if (
executedAs() == enums::SC_PRIVATE) {
847 assert(!(
bits(
addr[lane], 63, 47) != 0x1FFFF
876 panic(
"Invalid memory operation!\n");
900 panic(
"Invalid memory operation!\n");
902 }
else if (
executedAs() == enums::SC_PRIVATE) {
934 if (
wavefront()->gfxVersion == GfxVersion::gfx942 ||
935 wavefront()->gfxVersion == GfxVersion::gfx950) {
938 for (
int lane = 0; lane <
cu->wfSize(); ++lane) {
942 addr[lane] =
addr[lane] -
cu->shader->getScratchBase()
950 uint32_t physSgprIdx =
951 cu->registerManager->mapSgpr(
wavefront(), numSgprs - 4);
954 cu->registerManager->mapSgpr(
wavefront(), numSgprs - 3);
955 uint32_t size =
cu->srf[
simdId]->read(physSgprIdx);
958 for (
int lane = 0; lane <
cu->wfSize(); ++lane) {
961 cu->shader->getHiddenPrivateBase() -
962 cu->shader->getScratchBase();
983 panic(
"Invalid memory operation!\n");
989 panic(
"flat addr %#llx maps to bad segment %d\n",
996TheGpuISA::ScalarRegU32
1007 cu->stats.dynamicLMemInstrCnt++;
1009 cu->stats.dynamicFlatMemInstrCnt++;
1014 int number_pages_touched =
cu->pagesTouched.size();
1015 assert(number_pages_touched);
1016 cu->stats.pageDivergenceDist.sample(number_pages_touched);
1020 for (
auto it :
cu->pagesTouched) {
1023 ret =
cu->pageAccesses
1024 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
1025 std::make_pair(1, it.second)));
1029 ret.first->second.first++;
1030 ret.first->second.second += it.second;
1034 cu->pagesTouched.clear();
1039 cu->stats.dynamicGMemInstrCnt++;
1062 }
else if (hopId == 0) {
bool isFlatScratch() const
bool isKernelLaunch() const
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool hasSourceSgpr() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
TheGpuISA::ScalarRegU32 srcLiteral() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
bool writesFlatScratch() const
void execute(GPUDynInstPtr gpuDynInst)
bool isKernArgSeg() const
int maxSrcScalarRegOpSize
bool isAtomicPkAddBF16() const
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit()
GPUExecContext(ComputeUnit *_cu, Wavefront *_wf)
enums::StorageClassType executed_as
const ApertureRegister & ldsApe() const
std::set< InstSeqNum > lgkmIssued
ComputeUnit * computeUnit
void decVMemInstsIssued()
void decLGKMInstsIssued()
std::set< InstSeqNum > vmemIssued
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask