Go to the documentation of this file.
37 #include "debug/GPUAgentDisp.hh"
38 #include "debug/GPUDisp.hh"
39 #include "debug/GPUKernelInfo.hh"
40 #include "debug/GPUWgLatency.hh"
49 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
50 tickEvent([this]{
exec(); },
52 dispatchActive(
false), stats(
this)
54 schedule(&tickEvent, 0);
117 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
119 DPRINTF(GPUAgentDisp,
"launching kernel: %s, dispatch ID: %d\n",
154 while (
execIds.size() > fail_count) {
157 bool launched(
false);
172 if (!task->isInvDone()){
176 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
177 " invalidate requests\n", exec_id, task->outstandingInvs());
185 while (!task->dispComplete()) {
190 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
199 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
203 }
else if (!launched) {
206 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
215 DPRINTF(GPUWgLatency,
"Kernel Wgs dispatched: %d | %d failures\n",
216 disp_count, fail_count);
230 assert(task->dispatchId() == kern_id);
236 return (task->numWgCompleted() + 1 == task->numWgTotal());
246 assert(
val == -1 ||
val == 1);
249 task->updateOutstandingInvs(
val);
266 assert(
val == -1 ||
val == 1);
269 task->updateOutstandingWbs(
val);
272 return (task->outstandingWbs() == 0);
282 return task->outstandingWbs();
299 assert(task->dispatchId() == kern_id);
300 task->notifyWgCompleted();
302 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
305 if (task->numWgCompleted() == task->numWgTotal()) {
308 .
finishPkt(task->dispPktPtr(), task->queueId());
309 if (task->completionSignal()) {
316 uint64_t signal_value =
319 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete with completion "
320 "signal! Addr: %d\n", task->completionSignal());
325 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
329 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
331 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
348 :
Stats::Group(parent),
349 ADD_STAT(numKernelLaunched,
"number of kernel launched"),
350 ADD_STAT(cyclesWaitingForDispatch,
"number of cycles with outstanding "
351 "wavefronts that are waiting to be dispatched")
bool scheduled() const
Determine if the current event is scheduled.
GPUDispatcher(const Params &p)
uint64_t functionalReadHsaSignal(Addr signal_handle) override
GPUCommandProcessor * gpuCmdProc
void updateContext(int cid)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define UNSERIALIZE_SCALAR(scalar)
std::queue< int > execIds
const std::string & kernelName() const
uint64_t Tick
Tick count type.
void finishPkt(void *pkt, uint32_t rl_idx)
bool isReachingKernelEnd(Wavefront *wf)
HSAQueueEntry * hsaTask(int disp_id)
void deschedule(Event &event)
void prepareInvalidate(HSAQueueEntry *task)
Tick when() const
Get the time that the event is scheduled.
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
ComputeUnit * computeUnit
void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override
void schedule(Event &event, Tick when)
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Stats::Scalar cyclesWaitingForDispatch
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
std::queue< int > doneIds
#define SERIALIZE_SCALAR(scalar)
Stats::Scalar numKernelLaunched
EventFunctionWrapper tickEvent
GPUDispatcherParams Params
HSAPacketProcessor & hsaPacketProc()
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
GPUDispatcherStats(Stats::Group *parent)
GPUDispatcher::GPUDispatcherStats stats
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
void setShader(Shader *new_shader)
std::ostream CheckpointOut
void serialize(CheckpointOut &cp) const override
Serialize an object.
Tick curTick()
The universal simulation clock.
bool dispatchWorkgroups(HSAQueueEntry *task)
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Abstract superclass for simulation objects.
Generated on Tue Mar 23 2021 19:41:27 for gem5 by doxygen 1.8.17