gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hsa_queue_entry.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
43 #ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
44 #define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
45 
46 #include <bitset>
47 #include <cstdint>
48 #include <cstring>
49 #include <iostream>
50 #include <vector>
51 
52 #include "base/intmath.hh"
53 #include "base/types.hh"
54 #include "dev/hsa/hsa_packet.hh"
55 #include "dev/hsa/hsa_queue.hh"
57 
59 {
60  public:
61  HSAQueueEntry(std::string kernel_name, uint32_t queue_id,
62  int dispatch_id, void *disp_pkt, AMDKernelCode *akc,
63  Addr host_pkt_addr, Addr code_addr)
64  : kernName(kernel_name),
65  _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,
66  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,
67  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_z}},
68  _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
69  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
70  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
71  numVgprs(akc->workitem_vgpr_count),
72  numSgprs(akc->wavefront_sgpr_count),
73  _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
74  _hostDispPktAddr(host_pkt_addr),
76  ->completion_signal),
77  codeAddress(code_addr),
78  kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
80  _ldsSize((int)((_hsa_dispatch_packet_t*)disp_pkt)->
81  group_segment_size),
82  _privMemPerItem((int)((_hsa_dispatch_packet_t*)disp_pkt)->
83  private_segment_size),
84  _contextId(0), _wgId{{ 0, 0, 0 }},
86  _globalWgId(0), dispatchComplete(false)
87 
88  {
89  // Precompiled BLIT kernels actually violate the spec a bit
90  // and don't set many of the required akc fields. For these kernels,
91  // we need to rip register usage from the resource registers.
92  //
93  // We can't get an exact number of registers from the resource
94  // registers because they round, but we can get an upper bound on it
95  if (!numVgprs)
96  numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
97 
98  // TODO: Granularity changes for GFX9!
99  if (!numSgprs)
100  numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
101 
102  initialVgprState.reset();
103  initialSgprState.reset();
104 
105  for (int i = 0; i < MAX_DIM; ++i) {
107  _numWgTotal *= _numWg[i];
108  }
109 
110  parseKernelCode(akc);
111  }
112 
113  const std::string&
114  kernelName() const
115  {
116  return kernName;
117  }
118 
119  int
120  wgSize(int dim) const
121  {
122  assert(dim < MAX_DIM);
123  return _wgSize[dim];
124  }
125 
126  int
127  gridSize(int dim) const
128  {
129  assert(dim < MAX_DIM);
130  return _gridSize[dim];
131  }
132 
133  int
135  {
136  return numVgprs;
137  }
138 
139  int
141  {
142  return numSgprs;
143  }
144 
145  uint32_t
146  queueId() const
147  {
148  return _queueId;
149  }
150 
151  int
152  dispatchId() const
153  {
154  return _dispatchId;
155  }
156 
157  void*
159  {
160  return dispPkt;
161  }
162 
163  Addr
165  {
166  return _hostDispPktAddr;
167  }
168 
169  Addr
171  {
172  return _completionSignal;
173  }
174 
175  Addr
176  codeAddr() const
177  {
178  return codeAddress;
179  }
180 
181  Addr
182  kernargAddr() const
183  {
184  return kernargAddress;
185  }
186 
187  int
188  ldsSize() const
189  {
190  return _ldsSize;
191  }
192 
193  int privMemPerItem() const { return _privMemPerItem; }
194 
195  int
196  contextId() const
197  {
198  return _contextId;
199  }
200 
201  bool
202  dispComplete() const
203  {
204  return dispatchComplete;
205  }
206 
207  int
208  wgId(int dim) const
209  {
210  assert(dim < MAX_DIM);
211  return _wgId[dim];
212  }
213 
214  void
215  wgId(int dim, int val)
216  {
217  assert(dim < MAX_DIM);
218  _wgId[dim] = val;
219  }
220 
221  int
222  globalWgId() const
223  {
224  return _globalWgId;
225  }
226 
227  void
229  {
230  _globalWgId = val;
231  }
232 
233  int
234  numWg(int dim) const
235  {
236  assert(dim < MAX_DIM);
237  return _numWg[dim];
238  }
239 
240  void
242  {
243  ++_numWgCompleted;
244  }
245 
246  int
248  {
249  return _numWgCompleted;
250  }
251 
252  int
253  numWgTotal() const
254  {
255  return _numWgTotal;
256  }
257 
258  void
260  {
261  ++_wgId[0];
262  ++_globalWgId;
263 
264  if (wgId(0) * wgSize(0) >= gridSize(0)) {
265  _wgId[0] = 0;
266  ++_wgId[1];
267 
268  if (wgId(1) * wgSize(1) >= gridSize(1)) {
269  _wgId[1] = 0;
270  ++_wgId[2];
271 
272  if (wgId(2) * wgSize(2) >= gridSize(2)) {
273  dispatchComplete = true;
274  }
275  }
276  }
277  }
278 
279  int
281  {
282  return numWgArrivedAtBarrier;
283  }
284 
285  bool vgprBitEnabled(int bit) const
286  {
287  return initialVgprState.test(bit);
288  }
289 
290  bool sgprBitEnabled(int bit) const
291  {
292  return initialSgprState.test(bit);
293  }
294 
300 
307 
308  // the maximum number of dimensions for a grid or workgroup
309  const static int MAX_DIM = 3;
310 
311  /* getter */
312  int
314  return _outstandingInvs;
315  }
316 
322  bool
324  {
325  return (_outstandingInvs != -1);
326  }
327 
333  void
335  {
337  assert(_outstandingInvs >= 0);
338  }
339 
343  void
345  {
346  _outstandingInvs = 0;
347  }
348 
352  bool
353  isInvDone() const
354  {
355  assert(_outstandingInvs >= 0);
356  return (_outstandingInvs == 0);
357  }
358 
359  int
361  {
362  return _outstandingWbs;
363  }
364 
370  void
372  {
373  _outstandingWbs += val;
374  assert(_outstandingWbs >= 0);
375  }
376 
377  private:
378  void
380  {
387  akc->enable_sgpr_queue_ptr);
412 
417  initialVgprState.set(WorkitemIdX, true);
420  }
421 
422  // name of the kernel associated with the AQL entry
423  std::string kernName;
424  // workgroup Size (3 dimensions)
425  std::array<int, MAX_DIM> _wgSize;
426  // grid Size (3 dimensions)
427  std::array<int, MAX_DIM> _gridSize;
428  // total number of VGPRs per work-item
429  int numVgprs;
430  // total number of SGPRs per wavefront
431  int numSgprs;
432  // id of AQL queue in which this entry is placed
433  uint32_t _queueId;
435  // raw AQL packet pointer
436  void *dispPkt;
437  // host-side addr of the dispatch packet
439  // pointer to bool
441  // base address of the raw machine code
443  // base address of the kernel args
462  int _ldsSize;
465  std::array<int, MAX_DIM> _wgId;
466  std::array<int, MAX_DIM> _numWg;
469  // The number of completed work groups
473 
474  std::bitset<NumVectorInitFields> initialVgprState;
475  std::bitset<NumScalarInitFields> initialSgprState;
476 };
477 
478 #endif // __GPU_COMPUTE_HSA_QUEUE_ENTRY__
WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:65
PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:69
HSAQueueEntry::initialSgprState
std::bitset< NumScalarInitFields > initialSgprState
Definition: hsa_queue_entry.hh:475
AMDKernelCode::enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_kernarg_segment_ptr
Definition: kernel_code.hh:150
HSAQueueEntry::_contextId
int _contextId
Definition: hsa_queue_entry.hh:464
HSAQueueEntry::initialVgprState
std::bitset< NumVectorInitFields > initialVgprState
Definition: hsa_queue_entry.hh:474
HSAQueueEntry::numWgCompleted
int numWgCompleted() const
Definition: hsa_queue_entry.hh:247
HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:164
kernel_code.hh
PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:55
HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:309
HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:285
HSAQueueEntry::codeAddress
Addr codeAddress
Definition: hsa_queue_entry.hh:442
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
AMDKernelCode::enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_flat_scratch_init
Definition: kernel_code.hh:152
HSAQueueEntry::_privMemPerItem
int _privMemPerItem
Definition: hsa_queue_entry.hh:463
WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:77
HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:120
HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:114
HSAQueueEntry::dispPkt
void * dispPkt
Definition: hsa_queue_entry.hh:436
HSAQueueEntry::kernargAddress
Addr kernargAddress
Definition: hsa_queue_entry.hh:444
AMDKernelCode::enable_sgpr_dispatch_id
uint32_t enable_sgpr_dispatch_id
Definition: kernel_code.hh:151
QueuePtr
@ QueuePtr
Definition: kernel_code.hh:57
HSAQueueEntry::updateOutstandingWbs
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
Definition: hsa_queue_entry.hh:371
AMDKernelCode::enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_x
Definition: kernel_code.hh:127
HSAQueueEntry::numWgTotal
int numWgTotal() const
Definition: hsa_queue_entry.hh:253
HSAQueueEntry::ldsSize
int ldsSize() const
Definition: hsa_queue_entry.hh:188
HSAQueueEntry::wgId
void wgId(int dim, int val)
Definition: hsa_queue_entry.hh:215
KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:58
HSAQueueEntry::notifyWgCompleted
void notifyWgCompleted()
Definition: hsa_queue_entry.hh:241
HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:134
HSAQueueEntry
Definition: hsa_queue_entry.hh:58
hsa_queue.hh
WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:67
HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:290
AMDKernelCode
Definition: kernel_code.hh:81
AMDKernelCode::enable_sgpr_queue_ptr
uint32_t enable_sgpr_queue_ptr
Definition: kernel_code.hh:149
HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:170
HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:306
HSAQueueEntry::numWg
int numWg(int dim) const
Definition: hsa_queue_entry.hh:234
HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:146
HSAQueueEntry::_queueId
uint32_t _queueId
Definition: hsa_queue_entry.hh:433
FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:60
divCeil
T divCeil(const T &a, const U &b)
Definition: intmath.hh:114
HSAQueueEntry::updateOutstandingInvs
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
Definition: hsa_queue_entry.hh:334
HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:152
HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:299
AMDKernelCode::enable_sgpr_private_segment_wave_byte_offset
uint32_t enable_sgpr_private_segment_wave_byte_offset
Definition: kernel_code.hh:124
HSAQueueEntry::outstandingWbs
int outstandingWbs() const
Definition: hsa_queue_entry.hh:360
HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:208
HSAQueueEntry::contextId
int contextId() const
Definition: hsa_queue_entry.hh:196
AMDKernelCode::enable_sgpr_grid_workgroup_count_z
uint32_t enable_sgpr_grid_workgroup_count_z
Definition: kernel_code.hh:156
hsa_packet.hh
HSAQueueEntry::globalWgId
void globalWgId(int val)
Definition: hsa_queue_entry.hh:228
HSAQueueEntry::dispPktPtr
void * dispPktPtr()
Definition: hsa_queue_entry.hh:158
HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:127
HSAQueueEntry::_completionSignal
Addr _completionSignal
Definition: hsa_queue_entry.hh:440
GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:62
HSAQueueEntry::_globalWgId
int _globalWgId
Definition: hsa_queue_entry.hh:471
HSAQueueEntry::_hostDispPktAddr
Addr _hostDispPktAddr
Definition: hsa_queue_entry.hh:438
HSAQueueEntry::dispatchComplete
bool dispatchComplete
Definition: hsa_queue_entry.hh:472
WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:75
AMDKernelCode::enable_vgpr_workitem_id
uint32_t enable_vgpr_workitem_id
Definition: kernel_code.hh:131
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
HSAQueueEntry::isInvDone
bool isInvDone() const
Is invalidate done?
Definition: hsa_queue_entry.hh:353
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
HSAQueueEntry::_numWgTotal
int _numWgTotal
Definition: hsa_queue_entry.hh:467
HSAQueueEntry::_wgId
std::array< int, MAX_DIM > _wgId
Definition: hsa_queue_entry.hh:465
AMDKernelCode::enable_sgpr_grid_workgroup_count_x
uint32_t enable_sgpr_grid_workgroup_count_x
Definition: kernel_code.hh:154
HSAQueueEntry::_dispatchId
int _dispatchId
Definition: hsa_queue_entry.hh:434
HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:182
AMDKernelCode::enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_workgroup_id_y
Definition: kernel_code.hh:128
HSAQueueEntry::_wgSize
std::array< int, MAX_DIM > _wgSize
Definition: hsa_queue_entry.hh:425
HSAQueueEntry::numWgAtBarrier
int numWgAtBarrier() const
Definition: hsa_queue_entry.hh:280
AMDKernelCode::enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_workgroup_id_z
Definition: kernel_code.hh:129
HSAQueueEntry::_outstandingInvs
int _outstandingInvs
Number of outstanding invs for the kernel.
Definition: hsa_queue_entry.hh:453
HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:176
HSAQueueEntry::markInvDone
void markInvDone()
Forcefully change the state to be inv done.
Definition: hsa_queue_entry.hh:344
AMDKernelCode::enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_private_segment_buffer
Definition: kernel_code.hh:147
HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:140
_hsa_dispatch_packet_s
Definition: hsa_packet.hh:51
_amd_queue_s
Definition: hsa_queue.hh:63
HSAQueueEntry::_outstandingWbs
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
Definition: hsa_queue_entry.hh:461
HSAQueueEntry::numSgprs
int numSgprs
Definition: hsa_queue_entry.hh:431
types.hh
WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:76
HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition: hsa_queue_entry.hh:193
HSAQueueEntry::parseKernelCode
void parseKernelCode(AMDKernelCode *akc)
Definition: hsa_queue_entry.hh:379
DispatchId
@ DispatchId
Definition: kernel_code.hh:59
HSAQueueEntry::_ldsSize
int _ldsSize
Definition: hsa_queue_entry.hh:462
HSAQueueEntry::_numWg
std::array< int, MAX_DIM > _numWg
Definition: hsa_queue_entry.hh:466
WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:66
HSAQueueEntry::_numWgCompleted
int _numWgCompleted
Definition: hsa_queue_entry.hh:470
HSAQueueEntry::dispComplete
bool dispComplete() const
Definition: hsa_queue_entry.hh:202
HSAQueueEntry::numWgArrivedAtBarrier
int numWgArrivedAtBarrier
Definition: hsa_queue_entry.hh:468
HSAQueueEntry::_gridSize
std::array< int, MAX_DIM > _gridSize
Definition: hsa_queue_entry.hh:427
HSAQueueEntry::numVgprs
int numVgprs
Definition: hsa_queue_entry.hh:429
intmath.hh
PrivateSegSize
@ PrivateSegSize
Definition: kernel_code.hh:61
GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:63
HSAQueueEntry::kernName
std::string kernName
Definition: hsa_queue_entry.hh:423
AMDKernelCode::enable_sgpr_grid_workgroup_count_y
uint32_t enable_sgpr_grid_workgroup_count_y
Definition: kernel_code.hh:155
HSAQueueEntry::isInvStarted
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
Definition: hsa_queue_entry.hh:323
AMDKernelCode::enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_ptr
Definition: kernel_code.hh:148
AMDKernelCode::enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_size
Definition: kernel_code.hh:153
GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:64
HSAQueueEntry::markWgDispatch
void markWgDispatch()
Definition: hsa_queue_entry.hh:259
HSAQueueEntry::globalWgId
int globalWgId() const
Definition: hsa_queue_entry.hh:222
HSAQueueEntry::outstandingInvs
int outstandingInvs()
Definition: hsa_queue_entry.hh:313
WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:68
DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:56
HSAQueueEntry::HSAQueueEntry
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr)
Definition: hsa_queue_entry.hh:61
AMDKernelCode::enable_sgpr_workgroup_info
uint32_t enable_sgpr_workgroup_info
Definition: kernel_code.hh:130

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17