gem5  v20.1.0.0
dispatcher.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 
36 
37 #include "debug/GPUDisp.hh"
38 #include "debug/GPUKernelInfo.hh"
39 #include "debug/GPUWgLatency.hh"
42 #include "gpu-compute/shader.hh"
43 #include "gpu-compute/wavefront.hh"
44 #include "sim/syscall_emul_buf.hh"
45 #include "sim/system.hh"
46 
48  : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
49  tickEvent([this]{ exec(); },
50  "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
51  dispatchActive(false)
52 {
53  schedule(&tickEvent, 0);
54 }
55 
57 {
58 }
59 
60 void
62 {
64  .name(name() + ".num_kernel_launched")
65  .desc("number of kernel launched")
66  ;
67 
69  .name(name() + ".cycles_wait_dispatch")
70  .desc("number of cycles with outstanding wavefronts "
71  "that are waiting to be dispatched")
72  ;
73 }
74 
77 {
78  assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
79  return hsaQueueEntries[disp_id];
80 }
81 
82 void
84 {
85  gpuCmdProc = gpu_cmd_proc;
86 }
87 
88 void
90 {
91  shader = new_shader;
92 }
93 
94 void
96 {
97  Tick event_tick = 0;
98 
99  if (tickEvent.scheduled())
100  event_tick = tickEvent.when();
101 
102  SERIALIZE_SCALAR(event_tick);
103 }
104 
105 void
107 {
108  Tick event_tick;
109 
110  if (tickEvent.scheduled())
112 
113  UNSERIALIZE_SCALAR(event_tick);
114 
115  if (event_tick) {
116  schedule(&tickEvent, event_tick);
117  }
118 }
119 
126 void
128 {
130 
131  DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
132  task->kernelName(), task->dispatchId());
133 
134  execIds.push(task->dispatchId());
135  dispatchActive = true;
136  hsaQueueEntries.emplace(task->dispatchId(), task);
137 
138  if (!tickEvent.scheduled()) {
140  }
141 }
142 
143 void
145 {
146  int fail_count(0);
147 
153  DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
154 
155  if (execIds.size() > 0) {
157  }
158 
164  while (execIds.size() > fail_count) {
165  int exec_id = execIds.front();
166  auto task = hsaQueueEntries[exec_id];
167  bool launched(false);
168 
169  // acq is needed before starting dispatch
171  // try to invalidate cache
172  shader->prepareInvalidate(task);
173  } else {
174  // kern launch acquire is not set, skip invalidate
175  task->markInvDone();
176  }
177 
182  if (!task->isInvDone()){
183  execIds.push(exec_id);
184  ++fail_count;
185 
186  DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
187  " invalidate requests\n", exec_id, task->outstandingInvs());
188 
189  // try the next kernel_id
190  execIds.pop();
191  continue;
192  }
193 
194  // kernel invalidate is done, start workgroup dispatch
195  while (!task->dispComplete()) {
196  // update the thread context
197  shader->updateContext(task->contextId());
198 
199  // attempt to dispatch workgroup
200  DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
201  curTick(), exec_id);
202 
203  if (!shader->dispatchWorkgroups(task)) {
209  DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
210  execIds.push(exec_id);
211  ++fail_count;
212  break;
213  } else if (!launched) {
214  launched = true;
215  DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
216  }
217  }
218 
219  // try the next kernel_id
220  execIds.pop();
221  }
222 
223  DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
224 
225  while (doneIds.size()) {
226  DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
227  doneIds.pop();
228  }
229 }
230 
231 bool
233 {
234  int kern_id = wf->kernId;
235  assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
236  auto task = hsaQueueEntries[kern_id];
237  assert(task->dispatchId() == kern_id);
238 
243  return (task->numWgCompleted() + 1 == task->numWgTotal());
244 }
245 
251 void
253  assert(val == -1 || val == 1);
254 
255  auto task = hsaQueueEntries[kern_id];
256  task->updateOutstandingInvs(val);
257 
258  // kernel invalidate is done, schedule dispatch work
259  if (task->isInvDone() && !tickEvent.scheduled()) {
261  }
262 }
263 
271 bool
273  assert(val == -1 || val == 1);
274 
275  auto task = hsaQueueEntries[kern_id];
276  task->updateOutstandingWbs(val);
277 
278  // true: WB is done, false: WB is still ongoing
279  return (task->outstandingWbs() == 0);
280 }
281 
285 int
287  auto task = hsaQueueEntries[kernId];
288 
289  return task->outstandingWbs();
290 }
291 
300 void
302 {
303  int kern_id = wf->kernId;
304  DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
305  auto task = hsaQueueEntries[kern_id];
306  assert(task->dispatchId() == kern_id);
307  task->notifyWgCompleted();
308 
309  DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
310  curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
311 
312  if (task->numWgCompleted() == task->numWgTotal()) {
313  // Notify the HSA PP that this kernel is complete
315  .finishPkt(task->dispPktPtr(), task->queueId());
316  if (task->completionSignal()) {
317  // The signal value is aligned 8 bytes from
318  // the actual handle in the runtime
319  Addr signal_addr = task->completionSignal() + sizeof(Addr);
320  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! Triggering "
321  "completion signal: %x!\n", signal_addr);
322 
331  auto *tc = gpuCmdProc->system()->threads[0];
332  auto &virt_proxy = tc->getVirtProxy();
333  TypedBufferArg<Addr> prev_signal(signal_addr);
334  prev_signal.copyIn(virt_proxy);
335 
336  Addr *new_signal = new Addr;
337  *new_signal = (Addr)*prev_signal - 1;
338 
339  gpuCmdProc->dmaWriteVirt(signal_addr, sizeof(Addr), nullptr,
340  new_signal, 0);
341  } else {
342  DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
343  "signal\n");
344  }
345 
346  DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
347  curTick(), kern_id);
348  DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
349  }
350 
351  if (!tickEvent.scheduled()) {
353  }
354 }
355 
356 void
358 {
359  if (!tickEvent.scheduled()) {
361  }
362 }
363 
364 GPUDispatcher *GPUDispatcherParams::create()
365 {
366  return new GPUDispatcher(this);
367 }
GPUDispatcher::numKernelLaunched
Stats::Scalar numKernelLaunched
Definition: dispatcher.hh:95
hsa_queue_entry.hh
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
GPUDispatcher::gpuCmdProc
GPUCommandProcessor * gpuCmdProc
Definition: dispatcher.hh:85
GPUCommandProcessor
Definition: gpu_command_processor.hh:57
system.hh
GPUDispatcher::regStats
void regStats() override
Callback to set stat parameters.
Definition: dispatcher.cc:61
Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:150
Shader
Definition: shader.hh:87
EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:199
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:797
GPUDispatcher::~GPUDispatcher
~GPUDispatcher()
Definition: dispatcher.cc:56
HSADevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Definition: hsa_device.cc:58
GPUDispatcher::execIds
std::queue< int > execIds
Definition: dispatcher.hh:89
HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:116
GPUDispatcher::exec
void exec()
Definition: dispatcher.cc:144
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:289
HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:667
GPUDispatcher::isReachingKernelEnd
bool isReachingKernelEnd(Wavefront *wf)
Definition: dispatcher.cc:232
GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:76
EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:1014
Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:194
syscall_emul_buf.hh
Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:503
HSAQueueEntry
Definition: hsa_queue_entry.hh:60
GPUDispatcher::cyclesWaitingForDispatch
Stats::Scalar cyclesWaitingForDispatch
Definition: dispatcher.hh:96
GPUDispatcher::GPUDispatcher
GPUDispatcher(const Params *p)
Definition: dispatcher.cc:47
GPUDispatcher
Definition: dispatcher.hh:60
wavefront.hh
GPUDispatcher::notifyWgCompl
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
Definition: dispatcher.cc:301
Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:99
cp
Definition: cprintf.cc:40
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:154
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:127
GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:252
GPUDispatcher::dispatchActive
bool dispatchActive
Definition: dispatcher.hh:93
GPUDispatcher::scheduleDispatch
void scheduleDispatch()
Definition: dispatcher.cc:357
GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:83
GPUDispatcher::shader
Shader * shader
Definition: dispatcher.hh:84
GPUDispatcher::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: dispatcher.cc:106
gpu_command_processor.hh
GPUDispatcher::doneIds
std::queue< int > doneIds
Definition: dispatcher.hh:91
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Stats::DataWrap::name
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:274
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:790
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
BaseBufferArg::copyIn
bool copyIn(PortProxy &memproxy)
copy data into simulator space (read from target memory)
Definition: syscall_emul_buf.hh:77
GPUDispatcher::tickEvent
EventFunctionWrapper tickEvent
Definition: dispatcher.hh:86
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:133
GPUDispatcher::Params
GPUDispatcherParams Params
Definition: dispatcher.hh:63
System::threads
Threads threads
Definition: system.hh:309
HSADevice::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: hsa_device.cc:45
GPUDispatcher::hsaQueueEntries
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition: dispatcher.hh:87
GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:201
Wavefront
Definition: wavefront.hh:57
GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:286
Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:152
GPUDispatcher::setShader
void setShader(Shader *new_shader)
Definition: dispatcher.cc:89
CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:63
Wavefront::kernId
int kernId
Definition: wavefront.hh:90
TypedBufferArg
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Definition: syscall_emul_buf.hh:128
GPUDispatcher::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: dispatcher.cc:95
dispatcher.hh
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:218
CheckpointIn
Definition: serialize.hh:67
Stats::DataWrap::desc
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:307
Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:272
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:92

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17