35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
51 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
52 tickEvent([this]{
exec(); },
54 dispatchActive(
false), kernelExitEvents(
p.kernel_exit_events),
57 schedule(&tickEvent, 0);
120 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
122 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
157 while (
execIds.size() > fail_count) {
160 bool launched(
false);
175 if (!task->isInvDone()){
179 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
180 " invalidate requests\n", exec_id, task->outstandingInvs());
188 while (!task->dispComplete()) {
193 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
202 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
206 }
else if (!launched) {
209 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
218 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
219 disp_count, fail_count);
233 assert(task->dispatchId() == kern_id);
239 return (task->numWgCompleted() + 1 == task->numWgTotal());
249 assert(
val == -1 ||
val == 1);
252 task->updateOutstandingInvs(
val);
269 assert(
val == -1 ||
val == 1);
272 task->updateOutstandingWbs(
val);
275 return (task->outstandingWbs() == 0);
285 return task->outstandingWbs();
302 assert(task->dispatchId() == kern_id);
303 task->notifyWgCompleted();
305 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
308 if (task->numWgCompleted() == task->numWgTotal()) {
311 .
finishPkt(task->dispPktPtr(), task->queueId());
312 if (task->completionSignal()) {
313 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
314 "signal! Addr: %d\n", task->completionSignal());
318 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
322 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
324 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
346 : statistics::
Group(parent),
347 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
348 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
349 "wavefronts that are waiting to be dispatched")
void sendCompletionSignal(Addr signal_handle)
HSAPacketProcessor & hsaPacketProc()
void serialize(CheckpointOut &cp) const override
Serialize an object.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
GPUCommandProcessor * gpuCmdProc
std::queue< int > execIds
GPUDispatcher(const Params &p)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void setShader(Shader *new_shader)
std::queue< int > doneIds
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
bool dispatchWorkgroups(HSAQueueEntry *task)
void updateContext(int cid)
void requestKernelExitEvent(bool is_blit_kernel)
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void deschedule(Event &event)
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Tick when() const
Get the time that the event is scheduled.
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Tick
Tick count type.
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
statistics::Scalar numKernelLaunched
GPUDispatcherStats(statistics::Group *parent)
statistics::Scalar cyclesWaitingForDispatch
This file defines buffer classes used to handle pointer arguments in emulated syscalls.