gem5  v20.1.0.0
hsa_queue_entry.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Anthony Gutierrez
34  */
35 
45 #ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
46 #define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
47 
48 #include <bitset>
49 #include <cstdint>
50 #include <cstring>
51 #include <iostream>
52 #include <vector>
53 
54 #include "base/intmath.hh"
55 #include "base/types.hh"
56 #include "dev/hsa/hsa_packet.hh"
57 #include "dev/hsa/hsa_queue.hh"
59 
61 {
62  public:
63  HSAQueueEntry(std::string kernel_name, uint32_t queue_id,
64  int dispatch_id, void *disp_pkt, AMDKernelCode *akc,
65  Addr host_pkt_addr, Addr code_addr)
66  : kernName(kernel_name),
67  _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,
68  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,
69  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_z}},
70  _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
71  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
72  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
73  numVgprs(akc->workitem_vgpr_count),
74  numSgprs(akc->wavefront_sgpr_count),
75  _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
76  _hostDispPktAddr(host_pkt_addr),
78  ->completion_signal),
79  codeAddress(code_addr),
80  kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
82  _ldsSize((int)((_hsa_dispatch_packet_t*)disp_pkt)->
83  group_segment_size),
84  _privMemPerItem((int)((_hsa_dispatch_packet_t*)disp_pkt)->
85  private_segment_size),
86  _contextId(0), _wgId{{ 0, 0, 0 }},
88  _globalWgId(0), dispatchComplete(false)
89 
90  {
91  // Precompiled BLIT kernels actually violate the spec a bit
92  // and don't set many of the required akc fields. For these kernels,
93  // we need to rip register usage from the resource registers.
94  //
95  // We can't get an exact number of registers from the resource
96  // registers because they round, but we can get an upper bound on it
97  if (!numVgprs)
98  numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
99 
100  // TODO: Granularity changes for GFX9!
101  if (!numSgprs)
102  numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
103 
104  initialVgprState.reset();
105  initialSgprState.reset();
106 
107  for (int i = 0; i < MAX_DIM; ++i) {
109  _numWgTotal *= _numWg[i];
110  }
111 
112  parseKernelCode(akc);
113  }
114 
115  const std::string&
116  kernelName() const
117  {
118  return kernName;
119  }
120 
121  int
122  wgSize(int dim) const
123  {
124  assert(dim < MAX_DIM);
125  return _wgSize[dim];
126  }
127 
128  int
129  gridSize(int dim) const
130  {
131  assert(dim < MAX_DIM);
132  return _gridSize[dim];
133  }
134 
135  int
137  {
138  return numVgprs;
139  }
140 
141  int
143  {
144  return numSgprs;
145  }
146 
147  uint32_t
148  queueId() const
149  {
150  return _queueId;
151  }
152 
153  int
154  dispatchId() const
155  {
156  return _dispatchId;
157  }
158 
159  void*
161  {
162  return dispPkt;
163  }
164 
165  Addr
167  {
168  return _hostDispPktAddr;
169  }
170 
171  Addr
173  {
174  return _completionSignal;
175  }
176 
177  Addr
178  codeAddr() const
179  {
180  return codeAddress;
181  }
182 
183  Addr
184  kernargAddr() const
185  {
186  return kernargAddress;
187  }
188 
189  int
190  ldsSize() const
191  {
192  return _ldsSize;
193  }
194 
195  int privMemPerItem() const { return _privMemPerItem; }
196 
197  int
198  contextId() const
199  {
200  return _contextId;
201  }
202 
203  bool
204  dispComplete() const
205  {
206  return dispatchComplete;
207  }
208 
209  int
210  wgId(int dim) const
211  {
212  assert(dim < MAX_DIM);
213  return _wgId[dim];
214  }
215 
216  void
217  wgId(int dim, int val)
218  {
219  assert(dim < MAX_DIM);
220  _wgId[dim] = val;
221  }
222 
223  int
224  globalWgId() const
225  {
226  return _globalWgId;
227  }
228 
229  void
231  {
232  _globalWgId = val;
233  }
234 
235  int
236  numWg(int dim) const
237  {
238  assert(dim < MAX_DIM);
239  return _numWg[dim];
240  }
241 
242  void
244  {
245  ++_numWgCompleted;
246  }
247 
248  int
250  {
251  return _numWgCompleted;
252  }
253 
254  int
255  numWgTotal() const
256  {
257  return _numWgTotal;
258  }
259 
260  void
262  {
263  ++_wgId[0];
264  ++_globalWgId;
265 
266  if (wgId(0) * wgSize(0) >= gridSize(0)) {
267  _wgId[0] = 0;
268  ++_wgId[1];
269 
270  if (wgId(1) * wgSize(1) >= gridSize(1)) {
271  _wgId[1] = 0;
272  ++_wgId[2];
273 
274  if (wgId(2) * wgSize(2) >= gridSize(2)) {
275  dispatchComplete = true;
276  }
277  }
278  }
279  }
280 
281  int
283  {
284  return numWgArrivedAtBarrier;
285  }
286 
287  bool vgprBitEnabled(int bit) const
288  {
289  return initialVgprState.test(bit);
290  }
291 
292  bool sgprBitEnabled(int bit) const
293  {
294  return initialSgprState.test(bit);
295  }
296 
302 
309 
310  // the maximum number of dimensions for a grid or workgroup
311  const static int MAX_DIM = 3;
312 
313  /* getter */
314  int
316  return _outstandingInvs;
317  }
318 
324  bool
326  {
327  return (_outstandingInvs != -1);
328  }
329 
335  void
337  {
339  assert(_outstandingInvs >= 0);
340  }
341 
345  void
347  {
348  _outstandingInvs = 0;
349  }
350 
354  bool
355  isInvDone() const
356  {
357  assert(_outstandingInvs >= 0);
358  return (_outstandingInvs == 0);
359  }
360 
361  int
363  {
364  return _outstandingWbs;
365  }
366 
372  void
374  {
375  _outstandingWbs += val;
376  assert(_outstandingWbs >= 0);
377  }
378 
379  private:
380  void
382  {
389  akc->enable_sgpr_queue_ptr);
414 
419  initialVgprState.set(WorkitemIdX, true);
422  }
423 
424  // name of the kernel associated with the AQL entry
425  std::string kernName;
426  // workgroup Size (3 dimensions)
427  std::array<int, MAX_DIM> _wgSize;
428  // grid Size (3 dimensions)
429  std::array<int, MAX_DIM> _gridSize;
430  // total number of VGPRs per work-item
431  int numVgprs;
432  // total number of SGPRs per wavefront
433  int numSgprs;
434  // id of AQL queue in which this entry is placed
435  uint32_t _queueId;
437  // raw AQL packet pointer
438  void *dispPkt;
439  // host-side addr of the dispatch packet
441  // pointer to bool
443  // base address of the raw machine code
445  // base address of the kernel args
464  int _ldsSize;
467  std::array<int, MAX_DIM> _wgId;
468  std::array<int, MAX_DIM> _numWg;
471  // The number of completed work groups
475 
476  std::bitset<NumVectorInitFields> initialVgprState;
477  std::bitset<NumScalarInitFields> initialSgprState;
478 };
479 
480 #endif // __GPU_COMPUTE_HSA_QUEUE_ENTRY__
WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:67
PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:71
HSAQueueEntry::initialSgprState
std::bitset< NumScalarInitFields > initialSgprState
Definition: hsa_queue_entry.hh:477
AMDKernelCode::enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_kernarg_segment_ptr
Definition: kernel_code.hh:152
HSAQueueEntry::_contextId
int _contextId
Definition: hsa_queue_entry.hh:466
HSAQueueEntry::initialVgprState
std::bitset< NumVectorInitFields > initialVgprState
Definition: hsa_queue_entry.hh:476
HSAQueueEntry::numWgCompleted
int numWgCompleted() const
Definition: hsa_queue_entry.hh:249
HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:166
kernel_code.hh
PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:57
HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:311
HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:287
HSAQueueEntry::codeAddress
Addr codeAddress
Definition: hsa_queue_entry.hh:444
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
AMDKernelCode::enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_flat_scratch_init
Definition: kernel_code.hh:154
HSAQueueEntry::_privMemPerItem
int _privMemPerItem
Definition: hsa_queue_entry.hh:465
WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:79
HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:122
HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:116
HSAQueueEntry::dispPkt
void * dispPkt
Definition: hsa_queue_entry.hh:438
HSAQueueEntry::kernargAddress
Addr kernargAddress
Definition: hsa_queue_entry.hh:446
AMDKernelCode::enable_sgpr_dispatch_id
uint32_t enable_sgpr_dispatch_id
Definition: kernel_code.hh:153
QueuePtr
@ QueuePtr
Definition: kernel_code.hh:59
HSAQueueEntry::updateOutstandingWbs
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
Definition: hsa_queue_entry.hh:373
AMDKernelCode::enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_x
Definition: kernel_code.hh:129
HSAQueueEntry::numWgTotal
int numWgTotal() const
Definition: hsa_queue_entry.hh:255
HSAQueueEntry::ldsSize
int ldsSize() const
Definition: hsa_queue_entry.hh:190
HSAQueueEntry::wgId
void wgId(int dim, int val)
Definition: hsa_queue_entry.hh:217
KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:60
HSAQueueEntry::notifyWgCompleted
void notifyWgCompleted()
Definition: hsa_queue_entry.hh:243
HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:136
HSAQueueEntry
Definition: hsa_queue_entry.hh:60
hsa_queue.hh
WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:69
HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:292
AMDKernelCode
Definition: kernel_code.hh:83
AMDKernelCode::enable_sgpr_queue_ptr
uint32_t enable_sgpr_queue_ptr
Definition: kernel_code.hh:151
HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:172
HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:308
HSAQueueEntry::numWg
int numWg(int dim) const
Definition: hsa_queue_entry.hh:236
HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:148
HSAQueueEntry::_queueId
uint32_t _queueId
Definition: hsa_queue_entry.hh:435
FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:62
divCeil
T divCeil(const T &a, const U &b)
Definition: intmath.hh:114
HSAQueueEntry::updateOutstandingInvs
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
Definition: hsa_queue_entry.hh:336
HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:154
HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:301
AMDKernelCode::enable_sgpr_private_segment_wave_byte_offset
uint32_t enable_sgpr_private_segment_wave_byte_offset
Definition: kernel_code.hh:126
HSAQueueEntry::outstandingWbs
int outstandingWbs() const
Definition: hsa_queue_entry.hh:362
HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:210
HSAQueueEntry::contextId
int contextId() const
Definition: hsa_queue_entry.hh:198
AMDKernelCode::enable_sgpr_grid_workgroup_count_z
uint32_t enable_sgpr_grid_workgroup_count_z
Definition: kernel_code.hh:158
hsa_packet.hh
HSAQueueEntry::globalWgId
void globalWgId(int val)
Definition: hsa_queue_entry.hh:230
HSAQueueEntry::dispPktPtr
void * dispPktPtr()
Definition: hsa_queue_entry.hh:160
HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:129
HSAQueueEntry::_completionSignal
Addr _completionSignal
Definition: hsa_queue_entry.hh:442
GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:64
HSAQueueEntry::_globalWgId
int _globalWgId
Definition: hsa_queue_entry.hh:473
HSAQueueEntry::_hostDispPktAddr
Addr _hostDispPktAddr
Definition: hsa_queue_entry.hh:440
HSAQueueEntry::dispatchComplete
bool dispatchComplete
Definition: hsa_queue_entry.hh:474
WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:77
AMDKernelCode::enable_vgpr_workitem_id
uint32_t enable_vgpr_workitem_id
Definition: kernel_code.hh:133
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
HSAQueueEntry::isInvDone
bool isInvDone() const
Is invalidate done?
Definition: hsa_queue_entry.hh:355
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
HSAQueueEntry::_numWgTotal
int _numWgTotal
Definition: hsa_queue_entry.hh:469
HSAQueueEntry::_wgId
std::array< int, MAX_DIM > _wgId
Definition: hsa_queue_entry.hh:467
AMDKernelCode::enable_sgpr_grid_workgroup_count_x
uint32_t enable_sgpr_grid_workgroup_count_x
Definition: kernel_code.hh:156
HSAQueueEntry::_dispatchId
int _dispatchId
Definition: hsa_queue_entry.hh:436
HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:184
AMDKernelCode::enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_workgroup_id_y
Definition: kernel_code.hh:130
HSAQueueEntry::_wgSize
std::array< int, MAX_DIM > _wgSize
Definition: hsa_queue_entry.hh:427
HSAQueueEntry::numWgAtBarrier
int numWgAtBarrier() const
Definition: hsa_queue_entry.hh:282
AMDKernelCode::enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_workgroup_id_z
Definition: kernel_code.hh:131
HSAQueueEntry::_outstandingInvs
int _outstandingInvs
Number of outstanding invs for the kernel.
Definition: hsa_queue_entry.hh:455
HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:178
HSAQueueEntry::markInvDone
void markInvDone()
Forcefully change the state to be inv done.
Definition: hsa_queue_entry.hh:346
AMDKernelCode::enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_private_segment_buffer
Definition: kernel_code.hh:149
HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:142
_hsa_dispatch_packet_s
Definition: hsa_packet.hh:53
_amd_queue_s
Definition: hsa_queue.hh:65
HSAQueueEntry::_outstandingWbs
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
Definition: hsa_queue_entry.hh:463
HSAQueueEntry::numSgprs
int numSgprs
Definition: hsa_queue_entry.hh:433
types.hh
WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:78
HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition: hsa_queue_entry.hh:195
HSAQueueEntry::parseKernelCode
void parseKernelCode(AMDKernelCode *akc)
Definition: hsa_queue_entry.hh:381
DispatchId
@ DispatchId
Definition: kernel_code.hh:61
HSAQueueEntry::_ldsSize
int _ldsSize
Definition: hsa_queue_entry.hh:464
HSAQueueEntry::_numWg
std::array< int, MAX_DIM > _numWg
Definition: hsa_queue_entry.hh:468
WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:68
HSAQueueEntry::_numWgCompleted
int _numWgCompleted
Definition: hsa_queue_entry.hh:472
HSAQueueEntry::dispComplete
bool dispComplete() const
Definition: hsa_queue_entry.hh:204
HSAQueueEntry::numWgArrivedAtBarrier
int numWgArrivedAtBarrier
Definition: hsa_queue_entry.hh:470
HSAQueueEntry::_gridSize
std::array< int, MAX_DIM > _gridSize
Definition: hsa_queue_entry.hh:429
HSAQueueEntry::numVgprs
int numVgprs
Definition: hsa_queue_entry.hh:431
intmath.hh
PrivateSegSize
@ PrivateSegSize
Definition: kernel_code.hh:63
GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:65
HSAQueueEntry::kernName
std::string kernName
Definition: hsa_queue_entry.hh:425
AMDKernelCode::enable_sgpr_grid_workgroup_count_y
uint32_t enable_sgpr_grid_workgroup_count_y
Definition: kernel_code.hh:157
HSAQueueEntry::isInvStarted
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
Definition: hsa_queue_entry.hh:325
AMDKernelCode::enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_ptr
Definition: kernel_code.hh:150
AMDKernelCode::enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_size
Definition: kernel_code.hh:155
GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:66
HSAQueueEntry::markWgDispatch
void markWgDispatch()
Definition: hsa_queue_entry.hh:261
HSAQueueEntry::globalWgId
int globalWgId() const
Definition: hsa_queue_entry.hh:224
HSAQueueEntry::outstandingInvs
int outstandingInvs()
Definition: hsa_queue_entry.hh:315
WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:70
DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:58
HSAQueueEntry::HSAQueueEntry
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr)
Definition: hsa_queue_entry.hh:63
AMDKernelCode::enable_sgpr_workgroup_info
uint32_t enable_sgpr_workgroup_info
Definition: kernel_code.hh:132

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17