Go to the documentation of this file.
40 #include "arch/isa.hh"
57 #include "params/Shader.hh"
186 "Currently we support only single process\n");
246 void AccessMem(uint64_t address,
void *ptr, uint32_t size,
int cu_id,
247 MemCmd cmd,
bool suppress_func_errors);
249 void ReadMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id);
251 void ReadMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id,
252 bool suppress_func_errors);
254 void WriteMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id);
256 void WriteMem(uint64_t address,
void *ptr, uint32_t sz,
int cu_id,
257 bool suppress_func_errors);
260 bool suppress_func_errors,
int cu_id);
265 cuList[cu_id] = compute_unit;
324 #endif // __SHADER_HH__
bool isGpuVmApe(Addr addr) const
std::vector< int32_t > sa_x
Shader::ShaderStats stats
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
void updateContext(int cid)
Stats::Distribution initToCoalesceLatency
EventFunctionWrapper tickEvent
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
void registerCU(int cu_id, ComputeUnit *compute_unit)
Stats::Vector vectorInstDstOperand
const ApertureRegister & scratchApe() const
Stats::Distribution rubyNetworkLatency
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
uint64_t Tick
Tick count type.
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Addr shHiddenPrivateBaseVmid
std::shared_ptr< Request > RequestPtr
void prepareInvalidate(HSAQueueEntry *task)
Stats::Distribution loadLatencyDist
Stats::Vector vectorInstSrcOperand
A vector of scalar stats.
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Stats::Scalar shaderActiveTicks
This is a simple scalar statistic, like a counter.
GPUDispatcher & dispatcher()
GPUCommandProcessor & gpuCmdProc
ThreadContext is the external interface to all thread state for anything outside of the CPU.
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Stats::Distribution coalsrLineAddresses
ApertureRegister _gpuVmApe
const ApertureRegister & ldsApe() const
Addr getHiddenPrivateBase()
std::vector< int * > sa_val
Stats::Distribution allLatencyDist
Stats::Distribution * cacheBlockRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
A simple distribution stat.
void incVectorInstDstOperand(int num_operands)
static const int LDS_SIZE
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
std::vector< uint64_t > sa_when
void ScheduleAdd(int *val, Tick when, int x)
ShaderStats(Stats::Group *parent, int wf_size)
void incVectorInstSrcOperand(int num_operands)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
bool isScratchApe(Addr addr) const
bool isLdsApe(Addr addr) const
bool processTimingPacket(PacketPtr pkt)
Stats::Distribution gmEnqueueLatency
void sampleLoad(const Tick accessTime)
bool dispatchWorkgroups(HSAQueueEntry *task)
Stats::Distribution gmToCompleteLatency
GPUDispatcher & _dispatcher
void sampleStore(const Tick accessTime)
ApertureRegister _scratchApe
std::vector< ComputeUnit * > cuList
Stats::Distribution storeLatencyDist
const ApertureRegister & gpuVmApe() const
Generated on Tue Jun 22 2021 15:28:29 for gem5 by doxygen 1.8.17