gem5 v24.0.0.0
Loading...
Searching...
No Matches
dispatcher.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2015,2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32
34
35#include "debug/GPUAgentDisp.hh"
36#include "debug/GPUDisp.hh"
37#include "debug/GPUKernelInfo.hh"
38#include "debug/GPUWgLatency.hh"
41#include "gpu-compute/shader.hh"
43#include "sim/sim_exit.hh"
45#include "sim/system.hh"
46
47namespace gem5
48{
49
51 : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
52 tickEvent([this]{ exec(); },
53 "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
54 dispatchActive(false), kernelExitEvents(p.kernel_exit_events),
55 stats(this)
56{
57 schedule(&tickEvent, 0);
58}
59
63
66{
67 assert(hsaQueueEntries.find(disp_id) != hsaQueueEntries.end());
68 return hsaQueueEntries[disp_id];
69}
70
71void
73{
74 gpuCmdProc = gpu_cmd_proc;
75}
76
77void
79{
80 shader = new_shader;
81}
82
83void
85{
86 Tick event_tick = 0;
87
88 if (tickEvent.scheduled())
89 event_tick = tickEvent.when();
90
91 SERIALIZE_SCALAR(event_tick);
92}
93
94void
96{
97 Tick event_tick;
98
99 if (tickEvent.scheduled())
101
102 UNSERIALIZE_SCALAR(event_tick);
103
104 if (event_tick) {
105 schedule(&tickEvent, event_tick);
106 }
107}
108
115void
117{
119
120 DPRINTF(GPUDisp, "launching kernel: %s, dispatch ID: %d\n",
121 task->kernelName(), task->dispatchId());
122 DPRINTF(GPUAgentDisp, "launching kernel: %s, dispatch ID: %d\n",
123 task->kernelName(), task->dispatchId());
124
125 execIds.push(task->dispatchId());
126 dispatchActive = true;
127 hsaQueueEntries.emplace(task->dispatchId(), task);
128
129 if (!tickEvent.scheduled()) {
131 }
132}
133
134void
136{
137 int fail_count(0);
138 int disp_count(0);
139
145 DPRINTF(GPUDisp, "Launching %d Kernels\n", execIds.size());
146 DPRINTF(GPUAgentDisp, "Launching %d Kernels\n", execIds.size());
147
148 if (execIds.size() > 0) {
150 }
151
157 while (execIds.size() > fail_count) {
158 int exec_id = execIds.front();
159 auto task = hsaQueueEntries[exec_id];
160 bool launched(false);
161
162 // acq is needed before starting dispatch
164 // try to invalidate cache
166 } else {
167 // kern launch acquire is not set, skip invalidate
168 task->markInvDone();
169 }
170
175 if (!task->isInvDone()){
176 execIds.push(exec_id);
177 ++fail_count;
178
179 DPRINTF(GPUDisp, "kernel %d failed to launch, due to [%d] pending"
180 " invalidate requests\n", exec_id, task->outstandingInvs());
181
182 // try the next kernel_id
183 execIds.pop();
184 continue;
185 }
186
187 // kernel invalidate is done, start workgroup dispatch
188 while (!task->dispComplete()) {
189 // update the thread context
190 shader->updateContext(task->contextId());
191
192 // attempt to dispatch workgroup
193 DPRINTF(GPUWgLatency, "Attempt Kernel Launch cycle:%d kernel:%d\n",
194 curTick(), exec_id);
195
196 if (!shader->dispatchWorkgroups(task)) {
202 DPRINTF(GPUDisp, "kernel %d failed to launch\n", exec_id);
203 execIds.push(exec_id);
204 ++fail_count;
205 break;
206 } else if (!launched) {
207 launched = true;
208 disp_count++;
209 DPRINTF(GPUKernelInfo, "Launched kernel %d\n", exec_id);
210 }
211 }
212
213 // try the next kernel_id
214 execIds.pop();
215 }
216
217 DPRINTF(GPUDisp, "Returning %d Kernels\n", doneIds.size());
218 DPRINTF(GPUWgLatency, "Kernel Wgs dispatched: %d | %d failures\n",
219 disp_count, fail_count);
220
221 while (doneIds.size()) {
222 DPRINTF(GPUDisp, "Kernel %d completed\n", doneIds.front());
223 doneIds.pop();
224 }
225}
226
227bool
229{
230 int kern_id = wf->kernId;
231 assert(hsaQueueEntries.find(kern_id) != hsaQueueEntries.end());
232 auto task = hsaQueueEntries[kern_id];
233 assert(task->dispatchId() == kern_id);
234
239 return (task->numWgCompleted() + 1 == task->numWgTotal());
240}
241
247void
249 assert(val == -1 || val == 1);
250
251 auto task = hsaQueueEntries[kern_id];
252 task->updateOutstandingInvs(val);
253
254 // kernel invalidate is done, schedule dispatch work
255 if (task->isInvDone() && !tickEvent.scheduled()) {
257 }
258}
259
267bool
269 assert(val == -1 || val == 1);
270
271 auto task = hsaQueueEntries[kern_id];
272 task->updateOutstandingWbs(val);
273
274 // true: WB is done, false: WB is still ongoing
275 return (task->outstandingWbs() == 0);
276}
277
281int
283 auto task = hsaQueueEntries[kernId];
284
285 return task->outstandingWbs();
286}
287
296void
298{
299 int kern_id = wf->kernId;
300 DPRINTF(GPUDisp, "notify WgCompl %d\n", wf->wgId);
301 auto task = hsaQueueEntries[kern_id];
302 assert(task->dispatchId() == kern_id);
303 task->notifyWgCompleted();
304
305 DPRINTF(GPUWgLatency, "WG Complete cycle:%d wg:%d kernel:%d cu:%d\n",
306 curTick(), wf->wgId, kern_id, wf->computeUnit->cu_id);
307
308 if (task->numWgCompleted() == task->numWgTotal()) {
309 // Notify the HSA PP that this kernel is complete
311 .finishPkt(task->dispPktPtr(), task->queueId());
312 if (task->completionSignal()) {
313 DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
314 "signal! Addr: %d\n", task->completionSignal());
315
316 gpuCmdProc->sendCompletionSignal(task->completionSignal());
317 } else {
318 DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
319 "signal\n");
320 }
321
322 DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
323 curTick(), kern_id);
324 DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
325
326 if (kernelExitEvents) {
327 shader->requestKernelExitEvent(task->completionSignal());
328 }
329 }
330
331 if (!tickEvent.scheduled()) {
333 }
334}
335
336void
343
345 statistics::Group *parent)
346 : statistics::Group(parent),
347 ADD_STAT(numKernelLaunched, "number of kernel launched"),
348 ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
349 "wavefronts that are waiting to be dispatched")
350{
351}
352
353} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Tick clockPeriod() const
void sendCompletionSignal(Addr signal_handle)
HSAPacketProcessor & hsaPacketProc()
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition dispatcher.cc:84
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
EventFunctionWrapper tickEvent
Definition dispatcher.hh:87
bool isReachingKernelEnd(Wavefront *wf)
GPUDispatcherParams Params
Definition dispatcher.hh:65
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
std::unordered_map< int, HSAQueueEntry * > hsaQueueEntries
Definition dispatcher.hh:88
gem5::GPUDispatcher::GPUDispatcherStats stats
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
HSAQueueEntry * hsaTask(int disp_id)
Definition dispatcher.cc:65
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition dispatcher.cc:95
GPUCommandProcessor * gpuCmdProc
Definition dispatcher.hh:86
std::queue< int > execIds
Definition dispatcher.hh:90
GPUDispatcher(const Params &p)
Definition dispatcher.cc:50
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition dispatcher.cc:72
void setShader(Shader *new_shader)
Definition dispatcher.cc:78
std::queue< int > doneIds
Definition dispatcher.hh:92
void finishPkt(void *pkt, uint32_t rl_idx)
const std::string & kernelName() const
void prepareInvalidate(HSAQueueEntry *task)
Definition shader.cc:203
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition shader.cc:254
int impl_kern_launch_acq
Definition shader.hh:237
void updateContext(int cid)
Definition shader.cc:165
void requestKernelExitEvent(bool is_blit_kernel)
Definition shader.hh:332
Abstract superclass for simulation objects.
ComputeUnit * computeUnit
Definition wavefront.hh:108
Statistics container.
Definition group.hh:93
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
void deschedule(Event &event)
Definition eventq.hh:1021
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Tick
Tick count type.
Definition types.hh:58
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
GPUDispatcherStats(statistics::Group *parent)
This file defines buffer classes used to handle pointer arguments in emulated syscalls.

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0