gem5  v21.2.1.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 
34 
35 #include "debug/GPUAgentDisp.hh"
36 #include "debug/GPUDisp.hh"
37 #include "debug/GPUKernelInfo.hh"
38 #include "debug/GPUWgLatency.hh"
41 #include "gpu-compute/shader.hh"
42 #include "gpu-compute/wavefront.hh"
43 #include "sim/syscall_emul_buf.hh"
44 #include "sim/system.hh"
45 
46 namespace gem5
47 {
48 
50  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
51  tickEvent([this]{ exec(); },
52  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
53  dispatchActive(false), stats(this)
54 {
55  schedule(&tickEvent, 0);
56 }
57 
59 {
60 }
61 
64 {
65  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
66  return hsaQueueEntries[disp_id];
67 }
68 
69 void
71 {
72  gpuCmdProc = gpu_cmd_proc;
73 }
74 
75 void
77 {
78  shader = new_shader;
79 }
80 
81 void
83 {
84  Tick event_tick = 0;
85 
86  if (tickEvent.scheduled())
87  event_tick = tickEvent.when();
88 
89  SERIALIZE_SCALAR(event_tick);
90 }
91 
92 void
94 {
95  Tick event_tick;
96 
97  if (tickEvent.scheduled())
99 
100  UNSERIALIZE_SCALAR(event_tick);
101 
102  if (event_tick) {
103  schedule(&tickEvent, event_tick);
104  }
105 }
106 
113 void
115 {
117 
118  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
119  task->kernelName(), task->dispatchId());
120  DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
121  task->kernelName(), task->dispatchId());
122 
123  execIds.push(task->dispatchId());
124  dispatchActive = true;
125  hsaQueueEntries.emplace(task->dispatchId(), task);
126 
127  if (!tickEvent.scheduled()) {
129  }
130 }
131 
132 void
134 {
135  int fail_count(0);
136  int disp_count(0);
137 
143  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
144  DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
145 
146  if (execIds.size() > 0) {
148  }
149 
155  while (execIds.size() > fail_count) {
156  int exec_id = execIds.front();
157  auto task = hsaQueueEntries[exec_id];
158  bool launched(false);
159 
160  // acq is needed before starting dispatch
162  // try to invalidate cache
163  shader->prepareInvalidate(task);
164  } else {
165  // kern launch acquire is not set, skip invalidate
166  task->markInvDone();
167  }
168 
173  if (!task->isInvDone()){
174  execIds.push(exec_id);
175  ++fail_count;
176 
177  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
178  " invalidate requests\n", exec_id, task->outstandingInvs());
179 
180  // try the next kernel_id
181  execIds.pop();
182  continue;
183  }
184 
185  // kernel invalidate is done, start workgroup dispatch
186  while (!task->dispComplete()) {
187  // update the thread context
188  shader->updateContext(task->contextId());
189 
190  // attempt to dispatch workgroup
191  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
192  curTick(), exec_id);
193 
194  if (!shader->dispatchWorkgroups(task)) {
200  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
201  execIds.push(exec_id);
202  ++fail_count;
203  break;
204  } else if (!launched) {
205  launched = true;
206  disp_count++;
207  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
208  }
209  }
210 
211  // try the next kernel_id
212  execIds.pop();
213  }
214 
215  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
216  DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
217  disp_count, fail_count);
218 
219  while (doneIds.size()) {
220  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
221  doneIds.pop();
222  }
223 }
224 
225 bool
227 {
228  int kern_id = wf->kernId;
229  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
230  auto task = hsaQueueEntries[kern_id];
231  assert(task->dispatchId() == kern_id);
232 
237  return (task->numWgCompleted() + 1 == task->numWgTotal());
238 }
239 
245 void
247  assert(val == -1 || val == 1);
248 
249  auto task = hsaQueueEntries[kern_id];
250  task->updateOutstandingInvs(val);
251 
252  // kernel invalidate is done, schedule dispatch work
253  if (task->isInvDone() && !tickEvent.scheduled()) {
255  }
256 }
257 
265 bool
267  assert(val == -1 || val == 1);
268 
269  auto task = hsaQueueEntries[kern_id];
270  task->updateOutstandingWbs(val);
271 
272  // true: WB is done, false: WB is still ongoing
273  return (task->outstandingWbs() == 0);
274 }
275 
279 int
281  auto task = hsaQueueEntries[kernId];
282 
283  return task->outstandingWbs();
284 }
285 
294 void
296 {
297  int kern_id = wf->kernId;
298  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
299  auto task = hsaQueueEntries[kern_id];
300  assert(task->dispatchId() == kern_id);
301  task->notifyWgCompleted();
302 
303  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
304  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
305 
306  if (task->numWgCompleted() == task->numWgTotal()) {
307  // Notify the HSA PP that this kernel is complete
309  .finishPkt(task->dispPktPtr(), task->queueId());
310  if (task->completionSignal()) {
317  uint64_t signal_value =
318  gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
319 
320  DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
321  "signal! Addr: %d\n", task->completionSignal());
322 
323  gpuCmdProc->updateHsaSignal(task->completionSignal(),
324  signal_value - 1);
325  } else {
326  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
327  "signal\n");
328  }
329 
330  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
331  curTick(), kern_id);
332  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
333  }
334 
335  if (!tickEvent.scheduled()) {
337  }
338 }
339 
340 void
342 {
343  if (!tickEvent.scheduled()) {
345  }
346 }
347 
349  statistics::Group *parent)
350  : statistics::Group(parent),
351  ADD_STAT(numKernelLaunched, "number of kernel launched"),
352  ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
353  "wavefronts that are waiting to be dispatched")
354 {
355 }
356 
357 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
hsa_queue_entry.hh
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::GPUDispatcher::execIds
std::queue< int > execIds
Definition: dispatcher.hh:90
system.hh
gem5::GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:82
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition: dispatcher.cc:58
gem5::Wavefront
Definition: wavefront.hh:60
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:193
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:152
gem5::GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:86
gem5::Wavefront::kernId
int kernId
Definition: wavefront.hh:97
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:63
gem5::GPUDispatcher::shader
Shader * shader
Definition: dispatcher.hh:85
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:280
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:61
gem5::GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:226
syscall_emul_buf.hh
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
wavefront.hh
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:186
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:160
gem5::GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:266
gem5::GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:93
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:114
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:69
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:631
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:88
gpu_command_processor.hh
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:234
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:70
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:106
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:190
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1028
gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:115
gem5::GPUDispatcher::GPUDispatcherStats::cyclesWaitingForDispatch
statistics::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:102
gem5::GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:295
gem5::GPUDispatcher::stats
gem5::GPUDispatcher::GPUDispatcherStats stats
gem5::GPUDispatcher::Params
GPUDispatcherParams Params
Definition: dispatcher.hh:65
gem5::GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats
GPUDispatcherStats(statistics::Group *parent)
Definition: dispatcher.cc:348
gem5::GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition: dispatcher.cc:76
gem5::GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params &p)
Definition: dispatcher.cc:49
gem5::GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:246
gem5::GPUDispatcher::exec
void exec()
Definition: dispatcher.cc:133
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:177
gem5::GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:87
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
dispatcher.hh
gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:204
gem5::GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition: dispatcher.cc:341
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::GPUDispatcher::GPUDispatcherStats::numKernelLaunched
statistics::Scalar numKernelLaunched
Definition: dispatcher.hh:101
gem5::GPUDispatcher::dispatchActive
bool dispatchActive
Definition: dispatcher.hh:94
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:153
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
gem5::GPUDispatcher::doneIds
std::queue< int > doneIds
Definition: dispatcher.hh:92
gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:217
gem5::Shader
Definition: shader.hh:82

Generated on Wed May 4 2022 12:13:58 for gem5 by doxygen 1.8.17