Go to the documentation of this file.
35 #include "debug/GPUAgentDisp.hh"
36 #include "debug/GPUDisp.hh"
37 #include "debug/GPUKernelInfo.hh"
38 #include "debug/GPUWgLatency.hh"
50 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
51 tickEvent([this]{
exec(); },
53 dispatchActive(
false), stats(
this)
55 schedule(&tickEvent, 0);
118 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
120 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
155 while (
execIds.size() > fail_count) {
158 bool launched(
false);
173 if (!task->isInvDone()){
177 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
178 " invalidate requests\n", exec_id, task->outstandingInvs());
186 while (!task->dispComplete()) {
191 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
200 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
204 }
else if (!launched) {
207 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
216 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
217 disp_count, fail_count);
231 assert(task->dispatchId() == kern_id);
237 return (task->numWgCompleted() + 1 == task->numWgTotal());
247 assert(
val == -1 ||
val == 1);
250 task->updateOutstandingInvs(
val);
267 assert(
val == -1 ||
val == 1);
270 task->updateOutstandingWbs(
val);
273 return (task->outstandingWbs() == 0);
283 return task->outstandingWbs();
300 assert(task->dispatchId() == kern_id);
301 task->notifyWgCompleted();
303 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
306 if (task->numWgCompleted() == task->numWgTotal()) {
309 .
finishPkt(task->dispPktPtr(), task->queueId());
310 if (task->completionSignal()) {
317 uint64_t signal_value =
320 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
321 "signal! Addr: %d\n", task->completionSignal());
326 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
330 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
332 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
350 : statistics::
Group(parent),
351 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
352 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
353 "wavefronts that are waiting to be dispatched")
Tick curTick()
The universal simulation clock.
Tick when() const
Get the time that the event is scheduled.
std::queue< int > execIds
void serialize(CheckpointOut &cp) const override
Serialize an object.
#define UNSERIALIZE_SCALAR(scalar)
void updateContext(int cid)
GPUCommandProcessor * gpuCmdProc
HSAQueueEntry * hsaTask(int disp_id)
void schedule(Event &event, Tick when)
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
HSAPacketProcessor & hsaPacketProc()
bool isReachingKernelEnd(Wavefront *wf)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void finishPkt(void *pkt, uint32_t rl_idx)
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
uint64_t Tick
Tick count type.
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
bool dispatchWorkgroups(HSAQueueEntry *task)
Abstract superclass for simulation objects.
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
ComputeUnit * computeUnit
void prepareInvalidate(HSAQueueEntry *task)
#define SERIALIZE_SCALAR(scalar)
void deschedule(Event &event)
const std::string & kernelName() const
statistics::Scalar cyclesWaitingForDispatch
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
gem5::GPUDispatcher::GPUDispatcherStats stats
GPUDispatcherParams Params
GPUDispatcherStats(statistics::Group *parent)
void setShader(Shader *new_shader)
GPUDispatcher(const Params &p)
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
uint64_t functionalReadHsaSignal(Addr signal_handle)
EventFunctionWrapper tickEvent
std::ostream CheckpointOut
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
statistics::Scalar numKernelLaunched
bool scheduled() const
Determine if the current event is scheduled.
std::queue< int > doneIds
Generated on Wed May 4 2022 12:13:58 for gem5 by doxygen 1.8.17