35 #include "debug/GPUAgentDisp.hh"
36 #include "debug/GPUDisp.hh"
37 #include "debug/GPUKernelInfo.hh"
38 #include "debug/GPUWgLatency.hh"
50 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
51 tickEvent([this]{
exec(); },
53 dispatchActive(
false), stats(
this)
55 schedule(&tickEvent, 0);
118 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
120 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
155 while (
execIds.size() > fail_count) {
158 bool launched(
false);
173 if (!task->isInvDone()){
177 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
178 " invalidate requests\n", exec_id, task->outstandingInvs());
186 while (!task->dispComplete()) {
191 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
200 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
204 }
else if (!launched) {
207 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
216 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
217 disp_count, fail_count);
231 assert(task->dispatchId() == kern_id);
237 return (task->numWgCompleted() + 1 == task->numWgTotal());
247 assert(
val == -1 ||
val == 1);
250 task->updateOutstandingInvs(
val);
267 assert(
val == -1 ||
val == 1);
270 task->updateOutstandingWbs(
val);
273 return (task->outstandingWbs() == 0);
283 return task->outstandingWbs();
300 assert(task->dispatchId() == kern_id);
301 task->notifyWgCompleted();
303 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
306 if (task->numWgCompleted() == task->numWgTotal()) {
309 .
finishPkt(task->dispPktPtr(), task->queueId());
310 if (task->completionSignal()) {
317 uint64_t signal_value =
320 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
321 "signal! Addr: %d\n", task->completionSignal());
326 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
330 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
332 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
350 : statistics::
Group(parent),
351 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
352 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
353 "wavefronts that are waiting to be dispatched")
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
HSAPacketProcessor & hsaPacketProc()
uint64_t functionalReadHsaSignal(Addr signal_handle)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
GPUCommandProcessor * gpuCmdProc
std::queue< int > execIds
GPUDispatcher(const Params &p)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void setShader(Shader *new_shader)
std::queue< int > doneIds
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
bool dispatchWorkgroups(HSAQueueEntry *task)
void updateContext(int cid)
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void deschedule(Event &event)
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Tick when() const
Get the time that the event is scheduled.
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Tick
Tick count type.
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
statistics::Scalar numKernelLaunched
GPUDispatcherStats(statistics::Group *parent)
statistics::Scalar cyclesWaitingForDispatch
This file defines buffer classes used to handle pointer arguments in emulated syscalls.