gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dispatcher.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32
34
35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
41#include "gpu-compute/shader.hh"
44#include "sim/system.hh"
45
46namespace gem5
47{
48
50 : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
51 tickEvent([this]{ exec(); },
52 "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
53 dispatchActive(false), kernelExitEvents(p.kernel_exit_events),
54 stats(this)
55{
56 schedule(&tickEvent, 0);
57}
58
60{
61}
62
65{
66 assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
67 return hsaQueueEntries[disp_id];
68}
69
70void
72{
73 gpuCmdProc = gpu_cmd_proc;
74}
75
76void
78{
79 shader = new_shader;
80}
81
82void
84{
85 Tick event_tick = 0;
86
87 if (tickEvent.scheduled())
88 event_tick = tickEvent.when();
89
90 SERIALIZE_SCALAR(event_tick);
91}
92
93void
95{
96 Tick event_tick;
97
98 if (tickEvent.scheduled())
100
101 UNSERIALIZE_SCALAR(event_tick);
102
103 if (event_tick) {
104 schedule(&tickEvent, event_tick);
105 }
106}
107
114void
116{
118
119 DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
120 task->kernelName(), task->dispatchId());
121 DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
122 task->kernelName(), task->dispatchId());
123
124 execIds.push(task->dispatchId());
125 dispatchActive = true;
126 hsaQueueEntries.emplace(task->dispatchId(), task);
127
128 if (!tickEvent.scheduled()) {
130 }
131}
132
133void
135{
136 int fail_count(0);
137 int disp_count(0);
138
144 DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
145 DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
146
147 if (execIds.size() > 0) {
149 }
150
156 while (execIds.size() > fail_count) {
157 int exec_id = execIds.front();
158 auto task = hsaQueueEntries[exec_id];
159 bool launched(false);
160
161 // acq is needed before starting dispatch
163 // try to invalidate cache
165 } else {
166 // kern launch acquire is not set, skip invalidate
167 task->markInvDone();
168 }
169
174 if (!task->isInvDone()){
175 execIds.push(exec_id);
176 ++fail_count;
177
178 DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
179 " invalidate requests\n", exec_id, task->outstandingInvs());
180
181 // try the next kernel_id
182 execIds.pop();
183 continue;
184 }
185
186 // kernel invalidate is done, start workgroup dispatch
187 while (!task->dispComplete()) {
188 // update the thread context
189 shader->updateContext(task->contextId());
190
191 // attempt to dispatch workgroup
192 DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
193 curTick(), exec_id);
194
195 if (!shader->dispatchWorkgroups(task)) {
201 DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
202 execIds.push(exec_id);
203 ++fail_count;
204 break;
205 } else if (!launched) {
206 launched = true;
207 disp_count++;
208 DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
209 }
210 }
211
212 // try the next kernel_id
213 execIds.pop();
214 }
215
216 DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
217 DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
218 disp_count, fail_count);
219
220 while (doneIds.size()) {
221 DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
222 doneIds.pop();
223 }
224}
225
226bool
228{
229 int kern_id = wf->kernId;
230 assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
231 auto task = hsaQueueEntries[kern_id];
232 assert(task->dispatchId() == kern_id);
233
238 return (task->numWgCompleted() + 1 == task->numWgTotal());
239}
240
246void
248 assert(val == -1 || val == 1);
249
250 auto task = hsaQueueEntries[kern_id];
251 task->updateOutstandingInvs(val);
252
253 // kernel invalidate is done, schedule dispatch work
254 if (task->isInvDone() && !tickEvent.scheduled()) {
256 }
257}
258
266bool
268 assert(val == -1 || val == 1);
269
270 auto task = hsaQueueEntries[kern_id];
271 task->updateOutstandingWbs(val);
272
273 // true: WB is done, false: WB is still ongoing
274 return (task->outstandingWbs() == 0);
275}
276
280int
282 auto task = hsaQueueEntries[kernId];
283
284 return task->outstandingWbs();
285}
286
295void
297{
298 int kern_id = wf->kernId;
299 DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
300 auto task = hsaQueueEntries[kern_id];
301 assert(task->dispatchId() == kern_id);
302 task->notifyWgCompleted();
303
304 DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
305 curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
306
307 if (task->numWgCompleted() == task->numWgTotal()) {
308 // Notify the HSA PP that this kernel is complete
310 .finishPkt(task->dispPktPtr(), task->queueId());
311 if (task->completionSignal()) {
318 uint64_t signal_value =
319 gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
320
321 DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
322 "signal! Addr: %d\n", task->completionSignal());
323
324 gpuCmdProc->updateHsaSignal(task->completionSignal(),
325 signal_value - 1);
326 } else {
327 DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
328 "signal\n");
329 }
330
331 DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
332 curTick(), kern_id);
333 DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
334
335 if (kernelExitEvents) {
336 exitSimLoop("GPU Kernel Completed");
337 }
338 }
339
340 if (!tickEvent.scheduled()) {
342 }
343}
344
345void
347{
348 if (!tickEvent.scheduled()) {
350 }
351}
352
354 statistics::Group *parent)
355 : statistics::Group(parent),
356 ADD_STAT(numKernelLaunched, "number of kernel launched"),
357 ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
358 "wavefronts that are waiting to be dispatched")
359{
360}
361
362} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Tick clockPeriod() const
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
HSAPacketProcessor & hsaPacketProc()
uint64_t functionalReadHsaSignal(Addr signal_handle)
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition dispatcher.cc:83
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
Definition dispatcher.hh:87
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
Definition dispatcher.hh:65
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition dispatcher.hh:88
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
Definition dispatcher.cc:64
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition dispatcher.cc:94
GPUCommandProcessor * gpuCmdProc
Definition dispatcher.hh:86
std::queue< int > execIds
Definition dispatcher.hh:90
GPUDispatcher(const Params &p)
Definition dispatcher.cc:49
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition dispatcher.cc:71
void setShader(Shader *new_shader)
Definition dispatcher.cc:77
std::queue< int > doneIds
Definition dispatcher.hh:92
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
Definition shader.cc:191
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition shader.cc:235
int impl_kern_launch_acq
Definition shader.hh:225
void updateContext(int cid)
Definition shader.cc:153
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
Definition wavefront.hh:106
Statistics container.
Definition group.hh:93
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
void deschedule(Event &event)
Definition eventq.hh:1021
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
Bitfield< 63 > val
Definition misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Tick
Tick count type.
Definition types.hh:58
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
GPUDispatcherStats(statistics::Group *parent)
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

Generated on Mon Jul 10 2023 14:24:31 for gem5 by doxygen 1.9.7