34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
61 for (
int i = 0;
i < (16 *
sizeof(uint32_t)); ++
i) {
89 DPRINTF(GPUInst,
"%s: generating operand info for %d operands\n",
261 const std::string& extStr)
const
263 return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264 _staticInst->opcode().find(extStr) != std::string::npos;
270 return _staticInst->opcode().find(opcodeStr) != std::string::npos;
297enums::StorageClassType
307 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
316 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: mempacket status bitvector="
498 for (
const auto& srcOp :
_staticInst->srcOperands())
508 for (
const auto& dstOp :
_staticInst->dstOperands())
548 for (
const auto& dstOp :
_staticInst->dstOperands())
558 for (
const auto& srcOp :
_staticInst->srcOperands())
569 if (dstScalarOp.isFlatScratch())
579 if (srcScalarOp.isFlatScratch())
795 fatal(
"flat access is in GPUVM APE\n");
796 }
else if (
bits(
addr[lane], 63, 47) != 0x1FFFF &&
799 fatal(
"flat access at addr %#x has a memory violation\n",
822 }
else if (
executedAs() == enums::SC_PRIVATE) {
841 assert(!(
bits(
addr[lane], 63, 47) != 0x1FFFF
870 panic(
"Invalid memory operation!\n");
894 panic(
"Invalid memory operation!\n");
896 }
else if (
executedAs() == enums::SC_PRIVATE) {
928 if (
wavefront()->gfxVersion == GfxVersion::gfx942) {
931 for (
int lane = 0; lane <
cu->wfSize(); ++lane) {
935 addr[lane] =
addr[lane] -
cu->shader->getScratchBase()
943 uint32_t physSgprIdx =
944 cu->registerManager->mapSgpr(
wavefront(), numSgprs - 4);
947 cu->registerManager->mapSgpr(
wavefront(), numSgprs - 3);
948 uint32_t size =
cu->srf[
simdId]->read(physSgprIdx);
951 for (
int lane = 0; lane <
cu->wfSize(); ++lane) {
954 cu->shader->getHiddenPrivateBase() -
955 cu->shader->getScratchBase();
976 panic(
"Invalid memory operation!\n");
982 panic(
"flat addr %#llx maps to bad segment %d\n",
989TheGpuISA::ScalarRegU32
1000 cu->stats.dynamicLMemInstrCnt++;
1002 cu->stats.dynamicFlatMemInstrCnt++;
1007 int number_pages_touched =
cu->pagesTouched.size();
1008 assert(number_pages_touched);
1009 cu->stats.pageDivergenceDist.sample(number_pages_touched);
1013 for (
auto it :
cu->pagesTouched) {
1016 ret =
cu->pageAccesses
1017 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
1018 std::make_pair(1, it.second)));
1022 ret.first->second.first++;
1023 ret.first->second.second += it.second;
1027 cu->pagesTouched.clear();
1032 cu->stats.dynamicGMemInstrCnt++;
1055 }
else if (hopId == 0) {
bool isFlatScratch() const
bool isKernelLaunch() const
std::vector< Tick > roundTripTime
bool hasDestinationSgpr() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isAtomicExch() const
bool isFlatGlobal() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool hasSourceSgpr() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
enums::StorageClassType executedAs()
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isGloballyCoherent() const
bool readsExecMask() const
TheGpuISA::ScalarRegU32 srcLiteral() const
int maxSrcScalarRegOperandSize()
InstSeqNum seqNum() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
bool writesFlatScratch() const
void execute(GPUDynInstPtr gpuDynInst)
bool isKernArgSeg() const
int maxSrcScalarRegOpSize
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
ComputeUnit * computeUnit()
GPUExecContext(ComputeUnit *_cu, Wavefront *_wf)
enums::StorageClassType executed_as
const ApertureRegister & ldsApe() const
std::set< InstSeqNum > lgkmIssued
ComputeUnit * computeUnit
void decVMemInstsIssued()
void decLGKMInstsIssued()
std::set< InstSeqNum > vmemIssued
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask