gem5  v21.1.0.2
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 
36 
37 #include "debug/GPUAgentDisp.hh"
38 #include "debug/GPUDisp.hh"
39 #include "debug/GPUKernelInfo.hh"
40 #include "debug/GPUWgLatency.hh"
43 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "sim/syscall_emul_buf.hh"
46 #include "sim/system.hh"
47 
48 namespace gem5
49 {
50 
52  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
53  tickEvent([this]{ exec(); },
54  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
55  dispatchActive(false), stats(this)
56 {
57  schedule(&tickEvent, 0);
58 }
59 
61 {
62 }
63 
66 {
67  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
68  return hsaQueueEntries[disp_id];
69 }
70 
71 void
73 {
74  gpuCmdProc = gpu_cmd_proc;
75 }
76 
77 void
79 {
80  shader = new_shader;
81 }
82 
83 void
85 {
86  Tick event_tick = 0;
87 
88  if (tickEvent.scheduled())
89  event_tick = tickEvent.when();
90 
91  SERIALIZE_SCALAR(event_tick);
92 }
93 
94 void
96 {
97  Tick event_tick;
98 
99  if (tickEvent.scheduled())
101 
102  UNSERIALIZE_SCALAR(event_tick);
103 
104  if (event_tick) {
105  schedule(&tickEvent, event_tick);
106  }
107 }
108 
115 void
117 {
119 
120  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
121  task->kernelName(), task->dispatchId());
122  DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
123  task->kernelName(), task->dispatchId());
124 
125  execIds.push(task->dispatchId());
126  dispatchActive = true;
127  hsaQueueEntries.emplace(task->dispatchId(), task);
128 
129  if (!tickEvent.scheduled()) {
131  }
132 }
133 
134 void
136 {
137  int fail_count(0);
138  int disp_count(0);
139 
145  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
146  DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
147 
148  if (execIds.size() > 0) {
150  }
151 
157  while (execIds.size() > fail_count) {
158  int exec_id = execIds.front();
159  auto task = hsaQueueEntries[exec_id];
160  bool launched(false);
161 
162  // acq is needed before starting dispatch
164  // try to invalidate cache
165  shader->prepareInvalidate(task);
166  } else {
167  // kern launch acquire is not set, skip invalidate
168  task->markInvDone();
169  }
170 
175  if (!task->isInvDone()){
176  execIds.push(exec_id);
177  ++fail_count;
178 
179  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
180  " invalidate requests\n", exec_id, task->outstandingInvs());
181 
182  // try the next kernel_id
183  execIds.pop();
184  continue;
185  }
186 
187  // kernel invalidate is done, start workgroup dispatch
188  while (!task->dispComplete()) {
189  // update the thread context
190  shader->updateContext(task->contextId());
191 
192  // attempt to dispatch workgroup
193  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
194  curTick(), exec_id);
195 
196  if (!shader->dispatchWorkgroups(task)) {
202  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
203  execIds.push(exec_id);
204  ++fail_count;
205  break;
206  } else if (!launched) {
207  launched = true;
208  disp_count++;
209  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
210  }
211  }
212 
213  // try the next kernel_id
214  execIds.pop();
215  }
216 
217  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
218  DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
219  disp_count, fail_count);
220 
221  while (doneIds.size()) {
222  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
223  doneIds.pop();
224  }
225 }
226 
227 bool
229 {
230  int kern_id = wf->kernId;
231  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
232  auto task = hsaQueueEntries[kern_id];
233  assert(task->dispatchId() == kern_id);
234 
239  return (task->numWgCompleted() + 1 == task->numWgTotal());
240 }
241 
247 void
249  assert(val == -1 || val == 1);
250 
251  auto task = hsaQueueEntries[kern_id];
252  task->updateOutstandingInvs(val);
253 
254  // kernel invalidate is done, schedule dispatch work
255  if (task->isInvDone() && !tickEvent.scheduled()) {
257  }
258 }
259 
267 bool
269  assert(val == -1 || val == 1);
270 
271  auto task = hsaQueueEntries[kern_id];
272  task->updateOutstandingWbs(val);
273 
274  // true: WB is done, false: WB is still ongoing
275  return (task->outstandingWbs() == 0);
276 }
277 
281 int
283  auto task = hsaQueueEntries[kernId];
284 
285  return task->outstandingWbs();
286 }
287 
296 void
298 {
299  int kern_id = wf->kernId;
300  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
301  auto task = hsaQueueEntries[kern_id];
302  assert(task->dispatchId() == kern_id);
303  task->notifyWgCompleted();
304 
305  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
306  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
307 
308  if (task->numWgCompleted() == task->numWgTotal()) {
309  // Notify the HSA PP that this kernel is complete
311  .finishPkt(task->dispPktPtr(), task->queueId());
312  if (task->completionSignal()) {
319  uint64_t signal_value =
320  gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
321 
322  DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
323  "signal! Addr: %d\n", task->completionSignal());
324 
325  gpuCmdProc->updateHsaSignal(task->completionSignal(),
326  signal_value - 1);
327  } else {
328  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
329  "signal\n");
330  }
331 
332  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
333  curTick(), kern_id);
334  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
335  }
336 
337  if (!tickEvent.scheduled()) {
339  }
340 }
341 
342 void
344 {
345  if (!tickEvent.scheduled()) {
347  }
348 }
349 
351  statistics::Group *parent)
352  : statistics::Group(parent),
353  ADD_STAT(numKernelLaunched, "number of kernel launched"),
354  ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
355  "wavefronts that are waiting to be dispatched")
356 {
357 }
358 
359 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
hsa_queue_entry.hh
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::GPUDispatcher::execIds
std::queue< int > execIds
Definition: dispatcher.hh:92
system.hh
gem5::GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:84
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition: dispatcher.cc:60
gem5::Wavefront
Definition: wavefront.hh:62
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:195
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:155
gem5::GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:88
gem5::Wavefront::kernId
int kernId
Definition: wavefront.hh:99
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:65
gem5::GPUDispatcher::shader
Shader * shader
Definition: dispatcher.hh:87
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:282
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:60
gem5::GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:228
syscall_emul_buf.hh
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:294
wavefront.hh
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:186
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:162
gem5::GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:268
gem5::GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:95
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:116
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:71
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:630
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:90
gpu_command_processor.hh
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:237
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:72
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:108
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:193
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1028
gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:117
gem5::GPUDispatcher::GPUDispatcherStats::cyclesWaitingForDispatch
statistics::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:104
gem5::GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:297
gem5::GPUDispatcher::stats
gem5::GPUDispatcher::GPUDispatcherStats stats
gem5::GPUDispatcher::Params
GPUDispatcherParams Params
Definition: dispatcher.hh:67
gem5::GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats
GPUDispatcherStats(statistics::Group *parent)
Definition: dispatcher.cc:350
gem5::GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition: dispatcher.cc:78
gem5::GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params &p)
Definition: dispatcher.cc:51
gem5::GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:248
gem5::GPUDispatcher::exec
void exec()
Definition: dispatcher.cc:135
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:177
gem5::GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:89
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
dispatcher.hh
gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:204
gem5::GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition: dispatcher.cc:343
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::GPUDispatcher::GPUDispatcherStats::numKernelLaunched
statistics::Scalar numKernelLaunched
Definition: dispatcher.hh:103
gem5::GPUDispatcher::dispatchActive
bool dispatchActive
Definition: dispatcher.hh:96
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:155
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
gem5::GPUDispatcher::doneIds
std::queue< int > doneIds
Definition: dispatcher.hh:94
gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:217
gem5::Shader
Definition: shader.hh:84

Generated on Tue Sep 21 2021 12:25:23 for gem5 by doxygen 1.8.17