gem5  v22.1.0.0
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 
34 
35 #include "debug/GPUAgentDisp.hh"
36 #include "debug/GPUDisp.hh"
37 #include "debug/GPUKernelInfo.hh"
38 #include "debug/GPUWgLatency.hh"
41 #include "gpu-compute/shader.hh"
42 #include "gpu-compute/wavefront.hh"
43 #include "sim/syscall_emul_buf.hh"
44 #include "sim/system.hh"
45 
46 namespace gem5
47 {
48 
50  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
51  tickEvent([this]{ exec(); },
52  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
53  dispatchActive(false), stats(this)
54 {
55  schedule(&tickEvent, 0);
56 }
57 
59 {
60 }
61 
64 {
65  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
66  return hsaQueueEntries[disp_id];
67 }
68 
69 void
71 {
72  gpuCmdProc = gpu_cmd_proc;
73 }
74 
75 void
77 {
78  shader = new_shader;
79 }
80 
81 void
83 {
84  Tick event_tick = 0;
85 
86  if (tickEvent.scheduled())
87  event_tick = tickEvent.when();
88 
89  SERIALIZE_SCALAR(event_tick);
90 }
91 
92 void
94 {
95  Tick event_tick;
96 
97  if (tickEvent.scheduled())
99 
100  UNSERIALIZE_SCALAR(event_tick);
101 
102  if (event_tick) {
103  schedule(&tickEvent, event_tick);
104  }
105 }
106 
113 void
115 {
117 
118  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
119  task->kernelName(), task->dispatchId());
120  DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
121  task->kernelName(), task->dispatchId());
122 
123  execIds.push(task->dispatchId());
124  dispatchActive = true;
125  hsaQueueEntries.emplace(task->dispatchId(), task);
126 
127  if (!tickEvent.scheduled()) {
129  }
130 }
131 
132 void
134 {
135  int fail_count(0);
136  int disp_count(0);
137 
143  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
144  DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
145 
146  if (execIds.size() > 0) {
148  }
149 
155  while (execIds.size() > fail_count) {
156  int exec_id = execIds.front();
157  auto task = hsaQueueEntries[exec_id];
158  bool launched(false);
159 
160  // acq is needed before starting dispatch
162  // try to invalidate cache
163  shader->prepareInvalidate(task);
164  } else {
165  // kern launch acquire is not set, skip invalidate
166  task->markInvDone();
167  }
168 
173  if (!task->isInvDone()){
174  execIds.push(exec_id);
175  ++fail_count;
176 
177  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
178  " invalidate requests\n", exec_id, task->outstandingInvs());
179 
180  // try the next kernel_id
181  execIds.pop();
182  continue;
183  }
184 
185  // kernel invalidate is done, start workgroup dispatch
186  while (!task->dispComplete()) {
187  // update the thread context
188  shader->updateContext(task->contextId());
189 
190  // attempt to dispatch workgroup
191  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
192  curTick(), exec_id);
193 
194  if (!shader->dispatchWorkgroups(task)) {
200  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
201  execIds.push(exec_id);
202  ++fail_count;
203  break;
204  } else if (!launched) {
205  launched = true;
206  disp_count++;
207  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
208  }
209  }
210 
211  // try the next kernel_id
212  execIds.pop();
213  }
214 
215  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
216  DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
217  disp_count, fail_count);
218 
219  while (doneIds.size()) {
220  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
221  doneIds.pop();
222  }
223 }
224 
225 bool
227 {
228  int kern_id = wf->kernId;
229  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
230  auto task = hsaQueueEntries[kern_id];
231  assert(task->dispatchId() == kern_id);
232 
237  return (task->numWgCompleted() + 1 == task->numWgTotal());
238 }
239 
245 void
247  assert(val == -1 || val == 1);
248 
249  auto task = hsaQueueEntries[kern_id];
250  task->updateOutstandingInvs(val);
251 
252  // kernel invalidate is done, schedule dispatch work
253  if (task->isInvDone() && !tickEvent.scheduled()) {
255  }
256 }
257 
265 bool
267  assert(val == -1 || val == 1);
268 
269  auto task = hsaQueueEntries[kern_id];
270  task->updateOutstandingWbs(val);
271 
272  // true: WB is done, false: WB is still ongoing
273  return (task->outstandingWbs() == 0);
274 }
275 
279 int
281  auto task = hsaQueueEntries[kernId];
282 
283  return task->outstandingWbs();
284 }
285 
294 void
296 {
297  int kern_id = wf->kernId;
298  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
299  auto task = hsaQueueEntries[kern_id];
300  assert(task->dispatchId() == kern_id);
301  task->notifyWgCompleted();
302 
303  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
304  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
305 
306  if (task->numWgCompleted() == task->numWgTotal()) {
307  // Notify the HSA PP that this kernel is complete
309  .finishPkt(task->dispPktPtr(), task->queueId());
310  if (task->completionSignal()) {
317  uint64_t signal_value =
318  gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
319 
320  DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
321  "signal! Addr: %d\n", task->completionSignal());
322 
323  gpuCmdProc->updateHsaSignal(task->completionSignal(),
324  signal_value - 1);
325  } else {
326  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
327  "signal\n");
328  }
329 
330  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
331  curTick(), kern_id);
332  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
333  }
334 
335  if (!tickEvent.scheduled()) {
337  }
338 }
339 
340 void
342 {
343  if (!tickEvent.scheduled()) {
345  }
346 }
347 
349  statistics::Group *parent)
350  : statistics::Group(parent),
351  ADD_STAT(numKernelLaunched, "number of kernel launched"),
352  ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
353  "wavefronts that are waiting to be dispatched")
354 {
355 }
356 
357 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
Tick clockPeriod() const
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
HSAPacketProcessor & hsaPacketProc()
uint64_t functionalReadHsaSignal(Addr signal_handle)
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:82
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:114
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:246
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:87
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:226
GPUDispatcherParams Params
Definition: dispatcher.hh:65
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:280
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:88
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:266
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:63
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:93
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:86
std::queue< int > execIds
Definition: dispatcher.hh:90
GPUDispatcher(const Params &p)
Definition: dispatcher.cc:49
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:295
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:70
void setShader(Shader *new_shader)
Definition: dispatcher.cc:76
std::queue< int > doneIds
Definition: dispatcher.hh:92
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:191
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
int impl_kern_launch_acq
Definition: shader.hh:225
void updateContext(int cid)
Definition: shader.cc:153
Abstract superclass for simulation objects.
Definition: sim_object.hh:148
uint32_t wgId
Definition: wavefront.hh:160
ComputeUnit * computeUnit
Definition: wavefront.hh:106
Statistics container.
Definition: group.hh:94
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
void deschedule(Event &event)
Definition: eventq.hh:1028
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:204
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 63 > val
Definition: misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
std::ostream CheckpointOut
Definition: serialize.hh:66
uint64_t Tick
Tick count type.
Definition: types.hh:58
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
GPUDispatcherStats(statistics::Group *parent)
Definition: dispatcher.cc:348
statistics::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:102
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1