gem5 v24.0.0.0
Loading...
Searching...
No Matches
hsa_queue_entry.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2017-2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
41#ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
42#define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
43
44#include <bitset>
45#include <cstdint>
46#include <cstring>
47#include <iostream>
48#include <vector>
49
50#include "base/intmath.hh"
51#include "base/types.hh"
52#include "dev/hsa/hsa_packet.hh"
53#include "dev/hsa/hsa_queue.hh"
54#include "enums/GfxVersion.hh"
56
57namespace gem5
58{
59
61{
62 public:
63 HSAQueueEntry(std::string kernel_name, uint32_t queue_id,
64 int dispatch_id, void *disp_pkt, AMDKernelCode *akc,
65 Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
66 : _gfxVersion(gfx_version), kernName(kernel_name),
67 _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,
68 (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,
69 (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_z}},
70 _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
71 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
72 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
73 _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
74 _hostDispPktAddr(host_pkt_addr),
75 _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
76 ->completion_signal),
77 codeAddress(code_addr),
78 kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
79 _outstandingInvs(-1), _outstandingWbs(0),
80 _ldsSize((int)((_hsa_dispatch_packet_t*)disp_pkt)->
81 group_segment_size),
82 _privMemPerItem((int)((_hsa_dispatch_packet_t*)disp_pkt)->
83 private_segment_size),
84 _contextId(0), _wgId{{ 0, 0, 0 }},
85 _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),
86 _globalWgId(0), dispatchComplete(false)
87
88 {
89 // Use the resource descriptors to determine number of GPRs. This will
90 // round up in some cases, however the exact number field in the AMD
91 // kernel code struct is not backwards compatible and that field is
92 // not populated in newer compiles. The resource descriptor dword must
93 // be backwards compatible, so use that always.
94 // LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
95 // #code-object-v3-kernel-descriptor
96 //
97 // Currently, the only supported gfx versions in gem5 that compute
98 // VGPR count differently are gfx90a and gfx942.
99 if (gfx_version == GfxVersion::gfx90a ||
100 gfx_version == GfxVersion::gfx942) {
101 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
102 } else {
103 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
104 }
105
106 // SGPR allocation granulary is 16 in GFX9
107 // Source: https://llvm.org/docs/AMDGPUUsage.html
108 if (gfx_version == GfxVersion::gfx900 ||
109 gfx_version == GfxVersion::gfx902 ||
110 gfx_version == GfxVersion::gfx908 ||
111 gfx_version == GfxVersion::gfx90a ||
112 gfx_version == GfxVersion::gfx942) {
113 numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
114 } else {
115 panic("Saw unknown gfx version setting up GPR counts\n");
116 }
117
118 initialVgprState.reset();
119 initialSgprState.reset();
120
121 for (int i = 0; i < MAX_DIM; ++i) {
122 _numWg[i] = divCeil(_gridSize[i], _wgSize[i]);
123 _numWgTotal *= _numWg[i];
124 }
125
126 parseKernelCode(akc);
127
128 // Offset of a first AccVGPR in the unified register file.
129 // Granularity 4. Value 0-63. 0 - accum-offset = 4,
130 // 1 - accum-offset = 8, ..., 63 - accum-offset = 256.
131 _accumOffset = (akc->accum_offset + 1) * 4;
132 }
133
134 const GfxVersion&
136 {
137 return _gfxVersion;
138 }
139
140 const std::string&
142 {
143 return kernName;
144 }
145
146 int
147 wgSize(int dim) const
148 {
149 assert(dim < MAX_DIM);
150 return _wgSize[dim];
151 }
152
153 int
154 gridSize(int dim) const
155 {
156 assert(dim < MAX_DIM);
157 return _gridSize[dim];
158 }
159
160 int
162 {
163 return numVgprs;
164 }
165
166 int
168 {
169 return numSgprs;
170 }
171
172 uint32_t
173 queueId() const
174 {
175 return _queueId;
176 }
177
178 int
180 {
181 return _dispatchId;
182 }
183
184 void*
186 {
187 return dispPkt;
188 }
189
190 Addr
192 {
193 return _hostDispPktAddr;
194 }
195
196 Addr
198 {
199 return _completionSignal;
200 }
201
202 Addr
203 codeAddr() const
204 {
205 return codeAddress;
206 }
207
208 Addr
210 {
211 return kernargAddress;
212 }
213
214 int
215 ldsSize() const
216 {
217 return _ldsSize;
218 }
219
220 int privMemPerItem() const { return _privMemPerItem; }
221
222 int
223 contextId() const
224 {
225 return _contextId;
226 }
227
228 bool
230 {
231 return dispatchComplete;
232 }
233
234 int
235 wgId(int dim) const
236 {
237 assert(dim < MAX_DIM);
238 return _wgId[dim];
239 }
240
241 void
242 wgId(int dim, int val)
243 {
244 assert(dim < MAX_DIM);
245 _wgId[dim] = val;
246 }
247
248 int
250 {
251 return _globalWgId;
252 }
253
254 void
256 {
257 _globalWgId = val;
258 }
259
260 int
261 numWg(int dim) const
262 {
263 assert(dim < MAX_DIM);
264 return _numWg[dim];
265 }
266
267 void
269 {
270 ++_numWgCompleted;
271 }
272
273 int
275 {
276 return _numWgCompleted;
277 }
278
279 int
281 {
282 return _numWgTotal;
283 }
284
285 void
287 {
288 ++_wgId[0];
289 ++_globalWgId;
290
291 if (wgId(0) * wgSize(0) >= gridSize(0)) {
292 _wgId[0] = 0;
293 ++_wgId[1];
294
295 if (wgId(1) * wgSize(1) >= gridSize(1)) {
296 _wgId[1] = 0;
297 ++_wgId[2];
298
299 if (wgId(2) * wgSize(2) >= gridSize(2)) {
300 dispatchComplete = true;
301 }
302 }
303 }
304 }
305
306 int
308 {
309 return numWgArrivedAtBarrier;
310 }
311
312 bool vgprBitEnabled(int bit) const
313 {
314 return initialVgprState.test(bit);
315 }
316
317 bool sgprBitEnabled(int bit) const
318 {
319 return initialSgprState.test(bit);
320 }
321
327
334
335 // the maximum number of dimensions for a grid or workgroup
336 const static int MAX_DIM = 3;
337
338 /* getter */
339 int
341 return _outstandingInvs;
342 }
343
349 bool
351 {
352 return (_outstandingInvs != -1);
353 }
354
360 void
362 {
363 _outstandingInvs += val;
364 assert(_outstandingInvs >= 0);
365 }
366
370 void
372 {
373 _outstandingInvs = 0;
374 }
375
379 bool
380 isInvDone() const
381 {
382 assert(_outstandingInvs >= 0);
383 return (_outstandingInvs == 0);
384 }
385
386 int
388 {
389 return _outstandingWbs;
390 }
391
397 void
399 {
400 _outstandingWbs += val;
401 assert(_outstandingWbs >= 0);
402 }
403
404 unsigned
406 {
407 return _accumOffset;
408 }
409
410 private:
411 void
413 {
415 initialSgprState.set(PrivateSegBuf,
417 initialSgprState.set(DispatchPtr,
419 initialSgprState.set(QueuePtr,
421 initialSgprState.set(KernargSegPtr,
423 initialSgprState.set(DispatchId,
425 initialSgprState.set(FlatScratchInit,
427 initialSgprState.set(PrivateSegSize,
429 initialSgprState.set(WorkgroupIdX,
431 initialSgprState.set(WorkgroupIdY,
433 initialSgprState.set(WorkgroupIdZ,
435 initialSgprState.set(WorkgroupInfo,
437 initialSgprState.set(PrivSegWaveByteOffset,
439
444 initialVgprState.set(WorkitemIdX, true);
445 initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id > 0);
446 initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id > 1);
447 }
448
449 // store gfx version for version specific task handling
450 GfxVersion _gfxVersion;
451 // name of the kernel associated with the AQL entry
452 std::string kernName;
453 // workgroup Size (3 dimensions)
454 std::array<int, MAX_DIM> _wgSize;
455 // grid Size (3 dimensions)
456 std::array<int, MAX_DIM> _gridSize;
457 // total number of VGPRs per work-item
459 // total number of SGPRs per wavefront
461 // id of AQL queue in which this entry is placed
462 uint32_t _queueId;
464 // raw AQL packet pointer
465 void *dispPkt;
466 // host-side addr of the dispatch packet
468 // pointer to bool
470 // base address of the raw machine code
472 // base address of the kernel args
494 std::array<int, MAX_DIM> _wgId;
495 std::array<int, MAX_DIM> _numWg;
498 // The number of completed work groups
502
503 std::bitset<NumVectorInitFields> initialVgprState;
504 std::bitset<NumScalarInitFields> initialSgprState;
505
506 unsigned _accumOffset;
507};
508
509} // namespace gem5
510
511#endif // __GPU_COMPUTE_HSA_QUEUE_ENTRY__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
uint32_t queueId() const
bool sgprBitEnabled(int bit) const
std::bitset< NumScalarInitFields > initialSgprState
int numWg(int dim) const
int wgId(int dim) const
const GfxVersion & gfxVersion() const
Addr hostDispPktAddr() const
int wgSize(int dim) const
void wgId(int dim, int val)
void parseKernelCode(AMDKernelCode *akc)
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
bool vgprBitEnabled(int bit) const
void markInvDone()
Forcefully change the state to be inv done.
const std::string & kernelName() const
std::array< int, MAX_DIM > _wgId
std::array< int, MAX_DIM > _gridSize
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
std::array< int, MAX_DIM > _wgSize
std::array< int, MAX_DIM > _numWg
Addr completionSignal() const
bool isInvDone() const
Is invalidate done?
int gridSize(int dim) const
int _outstandingInvs
Number of outstanding invs for the kernel.
std::bitset< NumVectorInitFields > initialVgprState
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
unsigned accumOffset() const
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
static constexpr T divCeil(const T &a, const U &b)
Definition intmath.hh:110
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ WorkgroupIdX
@ DispatchId
@ DispatchPtr
@ QueuePtr
@ PrivSegWaveByteOffset
@ PrivateSegBuf
@ WorkgroupIdY
@ PrivateSegSize
@ WorkgroupInfo
@ WorkgroupIdZ
@ FlatScratchInit
@ KernargSegPtr
@ WorkitemIdX
@ WorkitemIdZ
@ WorkitemIdY
PM4 packets.
uint32_t enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_queue_ptr
uint32_t enable_private_segment
uint32_t enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_dispatch_id
uint32_t enable_vgpr_workitem_id
uint32_t enable_sgpr_private_segment_size
uint32_t enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_info

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0