35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
50 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
51 tickEvent([this]{
exec(); },
53 dispatchActive(
false), kernelExitEvents(
p.kernel_exit_events),
56 schedule(&tickEvent, 0);
119 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
121 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
156 while (
execIds.size() > fail_count) {
159 bool launched(
false);
174 if (!task->isInvDone()){
178 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
179 " invalidate requests\n", exec_id, task->outstandingInvs());
187 while (!task->dispComplete()) {
192 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
201 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
205 }
else if (!launched) {
208 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
217 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
218 disp_count, fail_count);
232 assert(task->dispatchId() == kern_id);
238 return (task->numWgCompleted() + 1 == task->numWgTotal());
248 assert(
val == -1 ||
val == 1);
251 task->updateOutstandingInvs(
val);
268 assert(
val == -1 ||
val == 1);
271 task->updateOutstandingWbs(
val);
274 return (task->outstandingWbs() == 0);
284 return task->outstandingWbs();
301 assert(task->dispatchId() == kern_id);
302 task->notifyWgCompleted();
304 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
307 if (task->numWgCompleted() == task->numWgTotal()) {
310 .
finishPkt(task->dispPktPtr(), task->queueId());
311 if (task->completionSignal()) {
318 uint64_t signal_value =
321 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
322 "signal! Addr: %d\n", task->completionSignal());
327 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
331 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
333 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
355 : statistics::
Group(parent),
356 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
357 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
358 "wavefronts that are waiting to be dispatched")
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
HSAPacketProcessor & hsaPacketProc()
uint64_t functionalReadHsaSignal(Addr signal_handle)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
GPUCommandProcessor * gpuCmdProc
std::queue< int > execIds
GPUDispatcher(const Params &p)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void setShader(Shader *new_shader)
std::queue< int > doneIds
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
bool dispatchWorkgroups(HSAQueueEntry *task)
void updateContext(int cid)
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void deschedule(Event &event)
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Tick when() const
Get the time that the event is scheduled.
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Tick
Tick count type.
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
statistics::Scalar numKernelLaunched
GPUDispatcherStats(statistics::Group *parent)
statistics::Scalar cyclesWaitingForDispatch
This file defines buffer classes used to handle pointer arguments in emulated syscalls.