Go to the documentation of this file.
   37 #include "debug/GPUAgentDisp.hh" 
   38 #include "debug/GPUDisp.hh" 
   39 #include "debug/GPUKernelInfo.hh" 
   40 #include "debug/GPUWgLatency.hh" 
   49     : 
SimObject(
p), shader(nullptr), gpuCmdProc(nullptr),
 
   50       tickEvent([this]{ 
exec(); },
 
   52       dispatchActive(
false), stats(
this)
 
   54     schedule(&tickEvent, 0);
 
  117     DPRINTF(GPUDisp, 
"launching kernel: %s, dispatch ID: %d\n",
 
  119     DPRINTF(GPUAgentDisp, 
"launching kernel: %s, dispatch ID: %d\n",
 
  154     while (
execIds.size() > fail_count) {
 
  157         bool launched(
false);
 
  172         if (!task->isInvDone()){
 
  176             DPRINTF(GPUDisp, 
"kernel %d failed to launch, due to [%d] pending" 
  177                 " invalidate requests\n", exec_id, task->outstandingInvs());
 
  185         while (!task->dispComplete()) {
 
  190             DPRINTF(GPUWgLatency, 
"Attempt Kernel Launch cycle:%d kernel:%d\n",
 
  199                 DPRINTF(GPUDisp, 
"kernel %d failed to launch\n", exec_id);
 
  203             } 
else if (!launched) {
 
  206                 DPRINTF(GPUKernelInfo, 
"Launched kernel %d\n", exec_id);
 
  215     DPRINTF(GPUWgLatency, 
"Kernel Wgs dispatched: %d | %d failures\n",
 
  216             disp_count, fail_count);
 
  230     assert(task->dispatchId() == kern_id);
 
  236     return (task->numWgCompleted() + 1 == task->numWgTotal());
 
  246     assert(
val == -1 || 
val == 1);
 
  249     task->updateOutstandingInvs(
val);
 
  266     assert(
val == -1 || 
val == 1);
 
  269     task->updateOutstandingWbs(
val);
 
  272     return (task->outstandingWbs() == 0);
 
  282     return task->outstandingWbs();
 
  299     assert(task->dispatchId() == kern_id);
 
  300     task->notifyWgCompleted();
 
  302     DPRINTF(GPUWgLatency, 
"WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
 
  305     if (task->numWgCompleted() == task->numWgTotal()) {
 
  308             .
finishPkt(task->dispPktPtr(), task->queueId());
 
  309         if (task->completionSignal()) {
 
  316             uint64_t signal_value =
 
  319             DPRINTF(GPUDisp, 
"HSA AQL Kernel Complete with completion " 
  320                     "signal! Addr: %d\n", task->completionSignal());
 
  325             DPRINTF(GPUDisp, 
"HSA AQL Kernel Complete! No completion " 
  329         DPRINTF(GPUWgLatency, 
"Kernel Complete ticks:%d kernel:%d\n",
 
  331         DPRINTF(GPUKernelInfo, 
"Completed kernel %d\n", kern_id);
 
  348     : 
Stats::Group(parent),
 
  349       ADD_STAT(numKernelLaunched, 
"number of kernel launched"),
 
  350       ADD_STAT(cyclesWaitingForDispatch, 
"number of cycles with outstanding " 
  351                "wavefronts that are waiting to be dispatched")
 
  
bool scheduled() const
Determine if the current event is scheduled.
GPUDispatcher(const Params &p)
uint64_t functionalReadHsaSignal(Addr signal_handle) override
GPUCommandProcessor * gpuCmdProc
void updateContext(int cid)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define UNSERIALIZE_SCALAR(scalar)
std::queue< int > execIds
const std::string & kernelName() const
uint64_t Tick
Tick count type.
void finishPkt(void *pkt, uint32_t rl_idx)
bool isReachingKernelEnd(Wavefront *wf)
HSAQueueEntry * hsaTask(int disp_id)
void deschedule(Event &event)
void prepareInvalidate(HSAQueueEntry *task)
Tick when() const
Get the time that the event is scheduled.
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
ComputeUnit * computeUnit
void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override
void schedule(Event &event, Tick when)
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Stats::Scalar cyclesWaitingForDispatch
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
std::queue< int > doneIds
#define SERIALIZE_SCALAR(scalar)
Stats::Scalar numKernelLaunched
EventFunctionWrapper tickEvent
GPUDispatcherParams Params
HSAPacketProcessor & hsaPacketProc()
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
GPUDispatcherStats(Stats::Group *parent)
GPUDispatcher::GPUDispatcherStats stats
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
void setShader(Shader *new_shader)
std::ostream CheckpointOut
void serialize(CheckpointOut &cp) const override
Serialize an object.
Tick curTick()
The universal simulation clock.
bool dispatchWorkgroups(HSAQueueEntry *task)
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Abstract superclass for simulation objects.
Generated on Tue Jun 22 2021 15:28:28 for gem5 by  doxygen 1.8.17