gem5  v21.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 
36 
37 #include "debug/GPUAgentDisp.hh"
38 #include "debug/GPUDisp.hh"
39 #include "debug/GPUKernelInfo.hh"
40 #include "debug/GPUWgLatency.hh"
43 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "sim/syscall_emul_buf.hh"
46 #include "sim/system.hh"
47 
49  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
50  tickEvent([this]{ exec(); },
51  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
52  dispatchActive(false), stats(this)
53 {
54  schedule(&tickEvent, 0);
55 }
56 
58 {
59 }
60 
63 {
64  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
65  return hsaQueueEntries[disp_id];
66 }
67 
68 void
70 {
71  gpuCmdProc = gpu_cmd_proc;
72 }
73 
74 void
76 {
77  shader = new_shader;
78 }
79 
80 void
82 {
83  Tick event_tick = 0;
84 
85  if (tickEvent.scheduled())
86  event_tick = tickEvent.when();
87 
88  SERIALIZE_SCALAR(event_tick);
89 }
90 
91 void
93 {
94  Tick event_tick;
95 
96  if (tickEvent.scheduled())
98 
99  UNSERIALIZE_SCALAR(event_tick);
100 
101  if (event_tick) {
102  schedule(&tickEvent, event_tick);
103  }
104 }
105 
112 void
114 {
116 
117  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
118  task->kernelName(), task->dispatchId());
119  DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
120  task->kernelName(), task->dispatchId());
121 
122  execIds.push(task->dispatchId());
123  dispatchActive = true;
124  hsaQueueEntries.emplace(task->dispatchId(), task);
125 
126  if (!tickEvent.scheduled()) {
128  }
129 }
130 
131 void
133 {
134  int fail_count(0);
135  int disp_count(0);
136 
142  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
143  DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
144 
145  if (execIds.size() > 0) {
147  }
148 
154  while (execIds.size() > fail_count) {
155  int exec_id = execIds.front();
156  auto task = hsaQueueEntries[exec_id];
157  bool launched(false);
158 
159  // acq is needed before starting dispatch
161  // try to invalidate cache
162  shader->prepareInvalidate(task);
163  } else {
164  // kern launch acquire is not set, skip invalidate
165  task->markInvDone();
166  }
167 
172  if (!task->isInvDone()){
173  execIds.push(exec_id);
174  ++fail_count;
175 
176  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
177  " invalidate requests\n", exec_id, task->outstandingInvs());
178 
179  // try the next kernel_id
180  execIds.pop();
181  continue;
182  }
183 
184  // kernel invalidate is done, start workgroup dispatch
185  while (!task->dispComplete()) {
186  // update the thread context
187  shader->updateContext(task->contextId());
188 
189  // attempt to dispatch workgroup
190  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
191  curTick(), exec_id);
192 
193  if (!shader->dispatchWorkgroups(task)) {
199  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
200  execIds.push(exec_id);
201  ++fail_count;
202  break;
203  } else if (!launched) {
204  launched = true;
205  disp_count++;
206  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
207  }
208  }
209 
210  // try the next kernel_id
211  execIds.pop();
212  }
213 
214  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
215  DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
216  disp_count, fail_count);
217 
218  while (doneIds.size()) {
219  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
220  doneIds.pop();
221  }
222 }
223 
224 bool
226 {
227  int kern_id = wf->kernId;
228  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
229  auto task = hsaQueueEntries[kern_id];
230  assert(task->dispatchId() == kern_id);
231 
236  return (task->numWgCompleted() + 1 == task->numWgTotal());
237 }
238 
244 void
246  assert(val == -1 || val == 1);
247 
248  auto task = hsaQueueEntries[kern_id];
249  task->updateOutstandingInvs(val);
250 
251  // kernel invalidate is done, schedule dispatch work
252  if (task->isInvDone() && !tickEvent.scheduled()) {
254  }
255 }
256 
264 bool
266  assert(val == -1 || val == 1);
267 
268  auto task = hsaQueueEntries[kern_id];
269  task->updateOutstandingWbs(val);
270 
271  // true: WB is done, false: WB is still ongoing
272  return (task->outstandingWbs() == 0);
273 }
274 
278 int
280  auto task = hsaQueueEntries[kernId];
281 
282  return task->outstandingWbs();
283 }
284 
293 void
295 {
296  int kern_id = wf->kernId;
297  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
298  auto task = hsaQueueEntries[kern_id];
299  assert(task->dispatchId() == kern_id);
300  task->notifyWgCompleted();
301 
302  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
303  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
304 
305  if (task->numWgCompleted() == task->numWgTotal()) {
306  // Notify the HSA PP that this kernel is complete
308  .finishPkt(task->dispPktPtr(), task->queueId());
309  if (task->completionSignal()) {
316  uint64_t signal_value =
317  gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
318 
319  DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
320  "signal! Addr: %d\n", task->completionSignal());
321 
322  gpuCmdProc->updateHsaSignal(task->completionSignal(),
323  signal_value - 1);
324  } else {
325  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
326  "signal\n");
327  }
328 
329  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
330  curTick(), kern_id);
331  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
332  }
333 
334  if (!tickEvent.scheduled()) {
336  }
337 }
338 
339 void
341 {
342  if (!tickEvent.scheduled()) {
344  }
345 }
346 
348  : Stats::Group(parent),
349  ADD_STAT(numKernelLaunched, "number of kernel launched"),
350  ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
351  "wavefronts that are waiting to be dispatched")
352 {
353 }
hsa_queue_entry.hh
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:462
GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params &p)
Definition: dispatcher.cc:48
GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle) override
Definition: gpu_command_processor.cc:151
GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:85
GPUCommandProcessor
Definition: gpu_command_processor.hh:57
system.hh
Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:152
Shader
Definition: shader.hh:87
EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:201
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:591
GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition: dispatcher.cc:57
GPUDispatcher::execIds
std::queue< int > execIds
Definition: dispatcher.hh:89
HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:114
GPUDispatcher::exec
void exec()
Definition: dispatcher.cc:132
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:291
HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:661
GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:225
GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:62
EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1025
Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:190
syscall_emul_buf.hh
Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:505
HSAQueueEntry
Definition: hsa_queue_entry.hh:58
wavefront.hh
GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:294
Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:104
cp
Definition: cprintf.cc:37
GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override
Definition: gpu_command_processor.cc:160
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1016
HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:152
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:71
GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:113
GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:245
GPUDispatcher::dispatchActive
bool dispatchActive
Definition: dispatcher.hh:93
GPUDispatcher::GPUDispatcherStats::cyclesWaitingForDispatch
Stats::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:101
GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition: dispatcher.cc:340
GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:69
GPUDispatcher::shader
Shader * shader
Definition: dispatcher.hh:84
GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:92
gpu_command_processor.hh
GPUDispatcher::doneIds
std::queue< int > doneIds
Definition: dispatcher.hh:91
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:584
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
GPUDispatcher::GPUDispatcherStats::numKernelLaunched
Stats::Scalar numKernelLaunched
Definition: dispatcher.hh:100
GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:86
GPUDispatcher::Params
GPUDispatcherParams Params
Definition: dispatcher.hh:64
HSADevice::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: hsa_device.cc:40
GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:87
GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats
GPUDispatcherStats(Stats::Group *parent)
Definition: dispatcher.cc:347
GPUDispatcher::stats
GPUDispatcher::GPUDispatcherStats stats
Wavefront
Definition: wavefront.hh:59
GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:279
Stats::Group
Statistics container.
Definition: group.hh:87
Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:158
GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition: dispatcher.cc:75
CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:64
Wavefront::kernId
int kernId
Definition: wavefront.hh:95
Stats
Definition: statistics.cc:53
GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:81
curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:43
dispatcher.hh
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:198
CheckpointIn
Definition: serialize.hh:68
Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:234
GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:265
SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:141

Generated on Tue Mar 23 2021 19:41:27 for gem5 by doxygen 1.8.17