38#include "arch/gpu_isa.hh"
55#include "params/Shader.hh"
64class GPUCommandProcessor;
92 std::unordered_map<int, uint32_t>
hwRegs;
223 "Currently we support only single process\n");
288 void AccessMem(uint64_t address,
void *ptr, uint32_t size,
int cu_id,
289 MemCmd cmd,
bool suppress_func_errors);
291 void ReadMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id);
293 void ReadMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id,
294 bool suppress_func_errors);
296 void WriteMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id);
298 void WriteMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id,
299 bool suppress_func_errors);
302 bool suppress_func_errors,
int cu_id);
307 cuList[cu_id] = compute_unit;
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
This class handles reads from the system/host memory space from the shader.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void prepareInvalidate(HSAQueueEntry *task)
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
void setLdsApe(Addr base, Addr limit)
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
ApertureRegister _scratchApe
void setScratchApe(Addr base, Addr limit)
EventFunctionWrapper tickEvent
std::unordered_map< int, uint32_t > hwRegs
std::vector< ComputeUnit * > cuList
ApertureRegister _gpuVmApe
const ApertureRegister & scratchApe() const
void addDeferredDispatch(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
void incNumOutstandingInvL2s()
void registerCU(int cu_id, ComputeUnit *compute_unit)
void ScheduleAdd(int *val, Tick when, int x)
GPUDispatcher & _dispatcher
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick > > &roundTripTime)
std::vector< uint64_t > sa_when
bool processTimingPacket(PacketPtr pkt)
int getNumOutstandingInvL2s() const
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
bool isScratchApe(Addr addr) const
std::vector< int32_t > sa_x
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
gem5::Shader::ShaderStats stats
const ApertureRegister & ldsApe() const
std::vector< std::tuple< void *, uint32_t, Addr > > deferred_dispatches
bool isLdsApe(Addr addr) const
bool isGpuVmApe(Addr addr) const
bool dispatchWorkgroups(HSAQueueEntry *task)
GPUDispatcher & dispatcher()
void decNumOutstandingInvL2s()
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
void incVectorInstDstOperand(int num_operands)
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from CP.
AMDGPUSystemHub * systemHub
void setHwReg(int regIdx, uint32_t val)
void updateContext(int cid)
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void requestKernelExitEvent(bool is_blit_kernel)
uint32_t getHwReg(int regIdx)
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
void sampleLoad(const Tick accessTime)
const ApertureRegister & gpuVmApe() const
void incVectorInstSrcOperand(int num_operands)
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
int num_outstanding_invl2s
void sampleStore(const Tick accessTime)
GPUCommandProcessor & gpuCmdProc
Addr shHiddenPrivateBaseVmid
std::vector< int * > sa_val
Addr getHiddenPrivateBase()
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
ThreadContext is the external interface to all thread state for anything outside of the CPU.
A simple distribution stat.
This is a simple scalar statistic, like a counter.
A vector of scalar stats.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
BitfieldType< SegDescriptorLimit > limit
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
static const int LDS_SIZE
Declarations of a non-full system Page Table.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Declaration of Statistics objects.
statistics::Vector vectorInstSrcOperand
statistics::Distribution storeLatencyDist
statistics::Distribution initToCoalesceLatency
statistics::Scalar shaderActiveTicks
statistics::Distribution loadLatencyDist
statistics::Distribution allLatencyDist
statistics::Distribution gmToCompleteLatency
ShaderStats(statistics::Group *parent, int wf_size)
statistics::Distribution coalsrLineAddresses
statistics::Vector vectorInstDstOperand
statistics::Distribution rubyNetworkLatency
statistics::Distribution * cacheBlockRoundTrip
statistics::Distribution gmEnqueueLatency