gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 
34 
35 #include "debug/GPUAgentDisp.hh"
36 #include "debug/GPUDisp.hh"
37 #include "debug/GPUKernelInfo.hh"
38 #include "debug/GPUWgLatency.hh"
41 #include "gpu-compute/shader.hh"
42 #include "gpu-compute/wavefront.hh"
43 #include "sim/sim_exit.hh"
44 #include "sim/syscall_emul_buf.hh"
45 #include "sim/system.hh"
46 
47 namespace gem5
48 {
49 
51  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
52  tickEvent([this]{ exec(); },
53  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
54  dispatchActive(false), kernelExitEvents(p.kernel_exit_events),
55  stats(this)
56 {
57  schedule(&tickEvent, 0);
58 }
59 
61 {
62 }
63 
66 {
67  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
68  return hsaQueueEntries[disp_id];
69 }
70 
71 void
73 {
74  gpuCmdProc = gpu_cmd_proc;
75 }
76 
77 void
79 {
80  shader = new_shader;
81 }
82 
83 void
85 {
86  Tick event_tick = 0;
87 
88  if (tickEvent.scheduled())
89  event_tick = tickEvent.when();
90 
91  SERIALIZE_SCALAR(event_tick);
92 }
93 
94 void
96 {
97  Tick event_tick;
98 
99  if (tickEvent.scheduled())
101 
102  UNSERIALIZE_SCALAR(event_tick);
103 
104  if (event_tick) {
105  schedule(&tickEvent, event_tick);
106  }
107 }
108 
115 void
117 {
119 
120  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
121  task->kernelName(), task->dispatchId());
122  DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
123  task->kernelName(), task->dispatchId());
124 
125  execIds.push(task->dispatchId());
126  dispatchActive = true;
127  hsaQueueEntries.emplace(task->dispatchId(), task);
128 
129  if (!tickEvent.scheduled()) {
131  }
132 }
133 
134 void
136 {
137  int fail_count(0);
138  int disp_count(0);
139 
145  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
146  DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
147 
148  if (execIds.size() > 0) {
150  }
151 
157  while (execIds.size() > fail_count) {
158  int exec_id = execIds.front();
159  auto task = hsaQueueEntries[exec_id];
160  bool launched(false);
161 
162  // acq is needed before starting dispatch
164  // try to invalidate cache
165  shader->prepareInvalidate(task);
166  } else {
167  // kern launch acquire is not set, skip invalidate
168  task->markInvDone();
169  }
170 
175  if (!task->isInvDone()){
176  execIds.push(exec_id);
177  ++fail_count;
178 
179  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
180  " invalidate requests\n", exec_id, task->outstandingInvs());
181 
182  // try the next kernel_id
183  execIds.pop();
184  continue;
185  }
186 
187  // kernel invalidate is done, start workgroup dispatch
188  while (!task->dispComplete()) {
189  // update the thread context
190  shader->updateContext(task->contextId());
191 
192  // attempt to dispatch workgroup
193  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
194  curTick(), exec_id);
195 
196  if (!shader->dispatchWorkgroups(task)) {
202  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
203  execIds.push(exec_id);
204  ++fail_count;
205  break;
206  } else if (!launched) {
207  launched = true;
208  disp_count++;
209  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
210  }
211  }
212 
213  // try the next kernel_id
214  execIds.pop();
215  }
216 
217  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
218  DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
219  disp_count, fail_count);
220 
221  while (doneIds.size()) {
222  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
223  doneIds.pop();
224  }
225 }
226 
227 bool
229 {
230  int kern_id = wf->kernId;
231  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
232  auto task = hsaQueueEntries[kern_id];
233  assert(task->dispatchId() == kern_id);
234 
239  return (task->numWgCompleted() + 1 == task->numWgTotal());
240 }
241 
247 void
249  assert(val == -1 || val == 1);
250 
251  auto task = hsaQueueEntries[kern_id];
252  task->updateOutstandingInvs(val);
253 
254  // kernel invalidate is done, schedule dispatch work
255  if (task->isInvDone() && !tickEvent.scheduled()) {
257  }
258 }
259 
267 bool
269  assert(val == -1 || val == 1);
270 
271  auto task = hsaQueueEntries[kern_id];
272  task->updateOutstandingWbs(val);
273 
274  // true: WB is done, false: WB is still ongoing
275  return (task->outstandingWbs() == 0);
276 }
277 
281 int
283  auto task = hsaQueueEntries[kernId];
284 
285  return task->outstandingWbs();
286 }
287 
296 void
298 {
299  int kern_id = wf->kernId;
300  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
301  auto task = hsaQueueEntries[kern_id];
302  assert(task->dispatchId() == kern_id);
303  task->notifyWgCompleted();
304 
305  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
306  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
307 
308  if (task->numWgCompleted() == task->numWgTotal()) {
309  // Notify the HSA PP that this kernel is complete
311  .finishPkt(task->dispPktPtr(), task->queueId());
312  if (task->completionSignal()) {
319  uint64_t signal_value =
320  gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
321 
322  DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
323  "signal! Addr: %d\n", task->completionSignal());
324 
325  gpuCmdProc->updateHsaSignal(task->completionSignal(),
326  signal_value - 1);
327  } else {
328  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
329  "signal\n");
330  }
331 
332  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
333  curTick(), kern_id);
334  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
335 
336  if (kernelExitEvents) {
337  exitSimLoop("GPU Kernel Completed");
338  }
339  }
340 
341  if (!tickEvent.scheduled()) {
343  }
344 }
345 
346 void
348 {
349  if (!tickEvent.scheduled()) {
351  }
352 }
353 
355  statistics::Group *parent)
356  : statistics::Group(parent),
357  ADD_STAT(numKernelLaunched, "number of kernel launched"),
358  ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
359  "wavefronts that are waiting to be dispatched")
360 {
361 }
362 
363 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
hsa_queue_entry.hh
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:501
gem5::GPUDispatcher::execIds
std::queue< int > execIds
Definition: dispatcher.hh:90
system.hh
gem5::GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:84
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition: dispatcher.cc:60
gem5::Wavefront
Definition: wavefront.hh:60
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:225
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:153
gem5::GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:86
gem5::Wavefront::kernId
int kernId
Definition: wavefront.hh:97
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:776
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:65
gem5::GPUDispatcher::shader
Shader * shader
Definition: dispatcher.hh:85
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1012
gem5::GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:282
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
gem5::GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:228
syscall_emul_buf.hh
gem5::GPUDispatcher::kernelExitEvents
bool kernelExitEvents
Definition: dispatcher.hh:96
sim_exit.hh
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
wavefront.hh
gem5::exitSimLoop
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:88
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:263
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:160
gem5::GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:268
gem5::GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:95
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:116
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:70
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:671
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:210
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:88
gpu_command_processor.hh
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:72
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:106
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:191
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1021
gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:139
gem5::GPUDispatcher::GPUDispatcherStats::cyclesWaitingForDispatch
statistics::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:104
gem5::GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:297
gem5::GPUDispatcher::stats
gem5::GPUDispatcher::GPUDispatcherStats stats
gem5::GPUDispatcher::Params
GPUDispatcherParams Params
Definition: dispatcher.hh:65
gem5::GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats
GPUDispatcherStats(statistics::Group *parent)
Definition: dispatcher.cc:354
gem5::GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition: dispatcher.cc:78
gem5::GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params &p)
Definition: dispatcher.cc:50
gem5::GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:248
gem5::GPUDispatcher::exec
void exec()
Definition: dispatcher.cc:135
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:254
gem5::GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:87
gem5::statistics::Group
Statistics container.
Definition: group.hh:92
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
dispatcher.hh
gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:207
gem5::GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition: dispatcher.cc:347
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::GPUDispatcher::GPUDispatcherStats::numKernelLaunched
statistics::Scalar numKernelLaunched
Definition: dispatcher.hh:103
gem5::GPUDispatcher::dispatchActive
bool dispatchActive
Definition: dispatcher.hh:94
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:177
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:458
gem5::GPUDispatcher::doneIds
std::queue< int > doneIds
Definition: dispatcher.hh:92
gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:217
gem5::Shader
Definition: shader.hh:83

Generated on Sun Jul 30 2023 01:56:56 for gem5 by doxygen 1.8.17