gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
dispatcher.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32
34
35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
41#include "gpu-compute/shader.hh"
43#include "sim/sim_exit.hh"
45#include "sim/system.hh"
46
47namespace gem5
48{
49
51 : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
52 tickEvent([this]{ exec(); },
53 "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
54 dispatchActive(false), kernelExitEvents(p.kernel_exit_events),
55 stats(this)
56{
57 schedule(&tickEvent, 0);
58}
59
63
66{
67 assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
68 return hsaQueueEntries[disp_id];
69}
70
71void
73{
74 gpuCmdProc = gpu_cmd_proc;
75}
76
77void
79{
80 shader = new_shader;
81}
82
83void
85{
86 Tick event_tick = 0;
87
88 if (tickEvent.scheduled())
89 event_tick = tickEvent.when();
90
91 SERIALIZE_SCALAR(event_tick);
92}
93
94void
96{
97 Tick event_tick;
98
99 if (tickEvent.scheduled())
101
102 UNSERIALIZE_SCALAR(event_tick);
103
104 if (event_tick) {
105 schedule(&tickEvent, event_tick);
106 }
107}
108
115void
117{
119
120 DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
121 task->kernelName(), task->dispatchId());
122 DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
123 task->kernelName(), task->dispatchId());
124
125 execIds.push(task->dispatchId());
126 dispatchActive = true;
127 hsaQueueEntries.emplace(task->dispatchId(), task);
128
129 if (!tickEvent.scheduled()) {
131 }
132}
133
134void
136{
137 int fail_count(0);
138 int disp_count(0);
139
145 DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
146 DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
147
148 if (execIds.size() > 0) {
150 }
151
157 while (execIds.size() > fail_count) {
158 int exec_id = execIds.front();
159 auto task = hsaQueueEntries[exec_id];
160 bool launched(false);
161
162 // acq is needed before starting dispatch
164 // try to invalidate cache
166 } else {
167 // kern launch acquire is not set, skip invalidate
168 task->markInvDone();
169 }
170
175 if (!task->isInvDone()){
176 execIds.push(exec_id);
177 ++fail_count;
178
179 DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
180 " invalidate requests\n", exec_id, task->outstandingInvs());
181
182 // try the next kernel_id
183 execIds.pop();
184 continue;
185 }
186
187 // kernel invalidate is done, start workgroup dispatch
188 while (!task->dispComplete()) {
189 // update the thread context
190 shader->updateContext(task->contextId());
191
192 // attempt to dispatch workgroup
193 DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
194 curTick(), exec_id);
195
196 if (!shader->dispatchWorkgroups(task)) {
202 DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
203 execIds.push(exec_id);
204 ++fail_count;
205 break;
206 } else if (!launched) {
207 launched = true;
208 disp_count++;
209 DPRINTF(GPUKernelInfo, "Launched kernel %d for WG %d\n",
210 exec_id, disp_count);
211 }
212 }
213
214 // try the next kernel_id
215 execIds.pop();
216 }
217
218 DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
219 DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
220 disp_count, fail_count);
221
222 while (doneIds.size()) {
223 DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
224 doneIds.pop();
225 }
226}
227
228bool
230{
231 int kern_id = wf->kernId;
232 assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
233 auto task = hsaQueueEntries[kern_id];
234 assert(task->dispatchId() == kern_id);
235
240 return (task->numWgCompleted() + 1 == task->numWgTotal());
241}
242
248void
250 assert(val == -1 || val == 1);
251
252 auto task = hsaQueueEntries[kern_id];
253 task->updateOutstandingInvs(val);
254
255 // kernel invalidate is done, schedule dispatch work
256 if (task->isInvDone() && !tickEvent.scheduled()) {
258 }
259}
260
268bool
270 assert(val == -1 || val == 1);
271
272 auto task = hsaQueueEntries[kern_id];
273 task->updateOutstandingWbs(val);
274
275 // true: WB is done, false: WB is still ongoing
276 return (task->outstandingWbs() == 0);
277}
278
282int
284 auto task = hsaQueueEntries[kernId];
285
286 return task->outstandingWbs();
287}
288
297void
299{
300 int kern_id = wf->kernId;
301 DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
302 auto task = hsaQueueEntries[kern_id];
303 assert(task->dispatchId() == kern_id);
304 task->notifyWgCompleted();
305
306 DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
307 curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
308
309 if (task->numWgCompleted() == task->numWgTotal()) {
310 // Notify the HSA PP that this kernel is complete
312 .finishPkt(task->dispPktPtr(), task->queueId());
313 if (task->completionSignal()) {
314 DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
315 "signal! Addr: %d\n", task->completionSignal());
316
317 gpuCmdProc->sendCompletionSignal(task->completionSignal());
318 } else {
319 DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
320 "signal\n");
321 }
322
323 DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
324 curTick(), kern_id);
325 DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
326
327 if (kernelExitEvents) {
328 shader->requestKernelExitEvent(task->completionSignal());
329 }
330 }
331
332 if (!tickEvent.scheduled()) {
334 }
335}
336
337void
344
346 statistics::Group *parent)
347 : statistics::Group(parent),
348 ADD_STAT(numKernelLaunched, "number of kernel launched"),
349 ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
350 "wavefronts that are waiting to be dispatched")
351{
352}
353
354} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Tick clockPeriod() const
void sendCompletionSignal(Addr signal_handle)
HSAPacketProcessor & hsaPacketProc()
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition dispatcher.cc:84
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
Definition dispatcher.hh:87
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
Definition dispatcher.hh:65
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition dispatcher.hh:88
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
Definition dispatcher.cc:65
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition dispatcher.cc:95
GPUCommandProcessor * gpuCmdProc
Definition dispatcher.hh:86
std::queue< int > execIds
Definition dispatcher.hh:90
GPUDispatcher(const Params &p)
Definition dispatcher.cc:50
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition dispatcher.cc:72
void setShader(Shader *new_shader)
Definition dispatcher.cc:78
std::queue< int > doneIds
Definition dispatcher.hh:92
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
Definition shader.cc:203
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition shader.cc:258
int impl_kern_launch_acq
Definition shader.hh:237
void updateContext(int cid)
Definition shader.cc:165
void requestKernelExitEvent(bool is_blit_kernel)
Definition shader.hh:332
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
Definition wavefront.hh:108
Statistics container.
Definition group.hh:93
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
void deschedule(Event &event)
Definition eventq.hh:1021
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Tick
Tick count type.
Definition types.hh:58
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
GPUDispatcherStats(statistics::Group *parent)
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

Generated on Mon Jan 13 2025 04:28:36 for gem5 by doxygen 1.9.8