35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
51 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
52 tickEvent([this]{
exec(); },
54 dispatchActive(
false), kernelExitEvents(
p.kernel_exit_events),
57 schedule(&tickEvent, 0);
120 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
122 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
157 while (
execIds.size() > fail_count) {
160 bool launched(
false);
175 if (!task->isInvDone()){
179 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
180 " invalidate requests\n", exec_id, task->outstandingInvs());
188 while (!task->dispComplete()) {
193 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
202 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
206 }
else if (!launched) {
209 DPRINTF(GPUKernelInfo,
"Launched kernel %d for WG %d\n",
210 exec_id, disp_count);
219 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
220 disp_count, fail_count);
234 assert(task->dispatchId() == kern_id);
240 return (task->numWgCompleted() + 1 == task->numWgTotal());
250 assert(
val == -1 ||
val == 1);
253 task->updateOutstandingInvs(
val);
270 assert(
val == -1 ||
val == 1);
273 task->updateOutstandingWbs(
val);
276 return (task->outstandingWbs() == 0);
286 return task->outstandingWbs();
303 assert(task->dispatchId() == kern_id);
304 task->notifyWgCompleted();
306 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
309 if (task->numWgCompleted() == task->numWgTotal()) {
312 .
finishPkt(task->dispPktPtr(), task->queueId());
313 if (task->completionSignal()) {
314 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
315 "signal! Addr: %d\n", task->completionSignal());
319 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
323 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
325 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
347 : statistics::
Group(parent),
348 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
349 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
350 "wavefronts that are waiting to be dispatched")
void sendCompletionSignal(Addr signal_handle)
HSAPacketProcessor & hsaPacketProc()
void serialize(CheckpointOut &cp) const override
Serialize an object.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
GPUCommandProcessor * gpuCmdProc
std::queue< int > execIds
GPUDispatcher(const Params &p)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void setShader(Shader *new_shader)
std::queue< int > doneIds
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
bool dispatchWorkgroups(HSAQueueEntry *task)
void updateContext(int cid)
void requestKernelExitEvent(bool is_blit_kernel)
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void deschedule(Event &event)
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Tick when() const
Get the time that the event is scheduled.
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Copyright (c) 2024 Arm Limited All rights reserved.
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Tick
Tick count type.
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
statistics::Scalar numKernelLaunched
GPUDispatcherStats(statistics::Group *parent)
statistics::Scalar cyclesWaitingForDispatch
This file defines buffer classes used to handle pointer arguments in emulated syscalls.