Go to the documentation of this file.
37 #include "debug/GPUDisp.hh"
38 #include "debug/GPUKernelInfo.hh"
39 #include "debug/GPUWgLatency.hh"
48 :
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
49 tickEvent([this]{
exec(); },
64 .
name(
name() +
".num_kernel_launched")
65 .
desc(
"number of kernel launched")
69 .
name(
name() +
".cycles_wait_dispatch")
70 .
desc(
"number of cycles with outstanding wavefronts "
71 "that are waiting to be dispatched")
131 DPRINTF(GPUDisp,
"launching kernel: %s, dispatch ID: %d\n",
164 while (
execIds.size() > fail_count) {
167 bool launched(
false);
182 if (!task->isInvDone()){
186 DPRINTF(GPUDisp,
"kernel %d failed to launch, due to [%d] pending"
187 " invalidate requests\n", exec_id, task->outstandingInvs());
195 while (!task->dispComplete()) {
200 DPRINTF(GPUWgLatency,
"Attempt Kernel Launch cycle:%d kernel:%d\n",
209 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", exec_id);
213 }
else if (!launched) {
215 DPRINTF(GPUKernelInfo,
"Launched kernel %d\n", exec_id);
237 assert(task->dispatchId() == kern_id);
243 return (task->numWgCompleted() + 1 == task->numWgTotal());
253 assert(
val == -1 ||
val == 1);
256 task->updateOutstandingInvs(
val);
273 assert(
val == -1 ||
val == 1);
276 task->updateOutstandingWbs(
val);
279 return (task->outstandingWbs() == 0);
289 return task->outstandingWbs();
306 assert(task->dispatchId() == kern_id);
307 task->notifyWgCompleted();
309 DPRINTF(GPUWgLatency,
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
312 if (task->numWgCompleted() == task->numWgTotal()) {
315 .
finishPkt(task->dispPktPtr(), task->queueId());
316 if (task->completionSignal()) {
319 Addr signal_addr = task->completionSignal() +
sizeof(
Addr);
320 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! Triggering "
321 "completion signal: %x!\n", signal_addr);
332 auto &virt_proxy = tc->getVirtProxy();
334 prev_signal.
copyIn(virt_proxy);
337 *new_signal = (
Addr)*prev_signal - 1;
342 DPRINTF(GPUDisp,
"HSA AQL Kernel Complete! No completion "
346 DPRINTF(GPUWgLatency,
"Kernel Complete ticks:%d kernel:%d\n",
348 DPRINTF(GPUKernelInfo,
"Completed kernel %d\n", kern_id);
Stats::Scalar numKernelLaunched
bool scheduled() const
Determine if the current event is scheduled.
GPUCommandProcessor * gpuCmdProc
void regStats() override
Callback to set stat parameters.
void updateContext(int cid)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define UNSERIALIZE_SCALAR(scalar)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
std::queue< int > execIds
const std::string & kernelName() const
uint64_t Tick
Tick count type.
void finishPkt(void *pkt, uint32_t rl_idx)
bool isReachingKernelEnd(Wavefront *wf)
HSAQueueEntry * hsaTask(int disp_id)
void deschedule(Event &event)
void prepareInvalidate(HSAQueueEntry *task)
Tick when() const
Get the time that the event is scheduled.
Stats::Scalar cyclesWaitingForDispatch
GPUDispatcher(const Params *p)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
ComputeUnit * computeUnit
void schedule(Event &event, Tick when)
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
std::queue< int > doneIds
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
#define SERIALIZE_SCALAR(scalar)
bool copyIn(PortProxy &memproxy)
copy data into simulator space (read from target memory)
EventFunctionWrapper tickEvent
virtual const std::string name() const
GPUDispatcherParams Params
HSAPacketProcessor & hsaPacketProc()
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
void setShader(Shader *new_shader)
std::ostream CheckpointOut
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
void serialize(CheckpointOut &cp) const override
Serialize an object.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
bool dispatchWorkgroups(HSAQueueEntry *task)
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Tick curTick()
The current simulated tick.
Abstract superclass for simulation objects.
Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17