gem5  v21.1.0.2
hsa_queue_entry.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
43 #ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
44 #define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
45 
46 #include <bitset>
47 #include <cstdint>
48 #include <cstring>
49 #include <iostream>
50 #include <vector>
51 
52 #include "base/intmath.hh"
53 #include "base/types.hh"
54 #include "dev/hsa/hsa_packet.hh"
55 #include "dev/hsa/hsa_queue.hh"
57 
58 namespace gem5
59 {
60 
62 {
63  public:
64  HSAQueueEntry(std::string kernel_name, uint32_t queue_id,
65  int dispatch_id, void *disp_pkt, AMDKernelCode *akc,
66  Addr host_pkt_addr, Addr code_addr)
67  : kernName(kernel_name),
68  _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,
69  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,
70  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_z}},
71  _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
72  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
73  (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
74  numVgprs(akc->workitem_vgpr_count),
75  numSgprs(akc->wavefront_sgpr_count),
76  _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
77  _hostDispPktAddr(host_pkt_addr),
78  _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
79  ->completion_signal),
80  codeAddress(code_addr),
81  kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
83  _ldsSize((int)((_hsa_dispatch_packet_t*)disp_pkt)->
84  group_segment_size),
85  _privMemPerItem((int)((_hsa_dispatch_packet_t*)disp_pkt)->
86  private_segment_size),
87  _contextId(0), _wgId{{ 0, 0, 0 }},
89  _globalWgId(0), dispatchComplete(false)
90 
91  {
92  // Precompiled BLIT kernels actually violate the spec a bit
93  // and don't set many of the required akc fields. For these kernels,
94  // we need to rip register usage from the resource registers.
95  //
96  // We can't get an exact number of registers from the resource
97  // registers because they round, but we can get an upper bound on it
98  if (!numVgprs)
99  numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
100 
101  // TODO: Granularity changes for GFX9!
102  if (!numSgprs)
103  numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
104 
105  initialVgprState.reset();
106  initialSgprState.reset();
107 
108  for (int i = 0; i < MAX_DIM; ++i) {
110  _numWgTotal *= _numWg[i];
111  }
112 
113  parseKernelCode(akc);
114  }
115 
116  const std::string&
117  kernelName() const
118  {
119  return kernName;
120  }
121 
122  int
123  wgSize(int dim) const
124  {
125  assert(dim < MAX_DIM);
126  return _wgSize[dim];
127  }
128 
129  int
130  gridSize(int dim) const
131  {
132  assert(dim < MAX_DIM);
133  return _gridSize[dim];
134  }
135 
136  int
138  {
139  return numVgprs;
140  }
141 
142  int
144  {
145  return numSgprs;
146  }
147 
148  uint32_t
149  queueId() const
150  {
151  return _queueId;
152  }
153 
154  int
155  dispatchId() const
156  {
157  return _dispatchId;
158  }
159 
160  void*
162  {
163  return dispPkt;
164  }
165 
166  Addr
168  {
169  return _hostDispPktAddr;
170  }
171 
172  Addr
174  {
175  return _completionSignal;
176  }
177 
178  Addr
179  codeAddr() const
180  {
181  return codeAddress;
182  }
183 
184  Addr
185  kernargAddr() const
186  {
187  return kernargAddress;
188  }
189 
190  int
191  ldsSize() const
192  {
193  return _ldsSize;
194  }
195 
196  int privMemPerItem() const { return _privMemPerItem; }
197 
198  int
199  contextId() const
200  {
201  return _contextId;
202  }
203 
204  bool
205  dispComplete() const
206  {
207  return dispatchComplete;
208  }
209 
210  int
211  wgId(int dim) const
212  {
213  assert(dim < MAX_DIM);
214  return _wgId[dim];
215  }
216 
217  void
218  wgId(int dim, int val)
219  {
220  assert(dim < MAX_DIM);
221  _wgId[dim] = val;
222  }
223 
224  int
225  globalWgId() const
226  {
227  return _globalWgId;
228  }
229 
230  void
232  {
233  _globalWgId = val;
234  }
235 
236  int
237  numWg(int dim) const
238  {
239  assert(dim < MAX_DIM);
240  return _numWg[dim];
241  }
242 
243  void
245  {
246  ++_numWgCompleted;
247  }
248 
249  int
251  {
252  return _numWgCompleted;
253  }
254 
255  int
256  numWgTotal() const
257  {
258  return _numWgTotal;
259  }
260 
261  void
263  {
264  ++_wgId[0];
265  ++_globalWgId;
266 
267  if (wgId(0) * wgSize(0) >= gridSize(0)) {
268  _wgId[0] = 0;
269  ++_wgId[1];
270 
271  if (wgId(1) * wgSize(1) >= gridSize(1)) {
272  _wgId[1] = 0;
273  ++_wgId[2];
274 
275  if (wgId(2) * wgSize(2) >= gridSize(2)) {
276  dispatchComplete = true;
277  }
278  }
279  }
280  }
281 
282  int
284  {
285  return numWgArrivedAtBarrier;
286  }
287 
288  bool vgprBitEnabled(int bit) const
289  {
290  return initialVgprState.test(bit);
291  }
292 
293  bool sgprBitEnabled(int bit) const
294  {
295  return initialSgprState.test(bit);
296  }
297 
303 
310 
311  // the maximum number of dimensions for a grid or workgroup
312  const static int MAX_DIM = 3;
313 
314  /* getter */
315  int
317  return _outstandingInvs;
318  }
319 
325  bool
327  {
328  return (_outstandingInvs != -1);
329  }
330 
336  void
338  {
340  assert(_outstandingInvs >= 0);
341  }
342 
346  void
348  {
349  _outstandingInvs = 0;
350  }
351 
355  bool
356  isInvDone() const
357  {
358  assert(_outstandingInvs >= 0);
359  return (_outstandingInvs == 0);
360  }
361 
362  int
364  {
365  return _outstandingWbs;
366  }
367 
373  void
375  {
376  _outstandingWbs += val;
377  assert(_outstandingWbs >= 0);
378  }
379 
380  private:
381  void
383  {
390  akc->enable_sgpr_queue_ptr);
415 
420  initialVgprState.set(WorkitemIdX, true);
423  }
424 
425  // name of the kernel associated with the AQL entry
426  std::string kernName;
427  // workgroup Size (3 dimensions)
428  std::array<int, MAX_DIM> _wgSize;
429  // grid Size (3 dimensions)
430  std::array<int, MAX_DIM> _gridSize;
431  // total number of VGPRs per work-item
432  int numVgprs;
433  // total number of SGPRs per wavefront
434  int numSgprs;
435  // id of AQL queue in which this entry is placed
436  uint32_t _queueId;
438  // raw AQL packet pointer
439  void *dispPkt;
440  // host-side addr of the dispatch packet
442  // pointer to bool
444  // base address of the raw machine code
446  // base address of the kernel args
465  int _ldsSize;
468  std::array<int, MAX_DIM> _wgId;
469  std::array<int, MAX_DIM> _numWg;
472  // The number of completed work groups
476 
477  std::bitset<NumVectorInitFields> initialVgprState;
478  std::bitset<NumScalarInitFields> initialSgprState;
479 };
480 
481 } // namespace gem5
482 
483 #endif // __GPU_COMPUTE_HSA_QUEUE_ENTRY__
gem5::_amd_queue_t
Definition: hsa_queue.hh:66
gem5::HSAQueueEntry::numWg
int numWg(int dim) const
Definition: hsa_queue_entry.hh:237
gem5::HSAQueueEntry::_hostDispPktAddr
Addr _hostDispPktAddr
Definition: hsa_queue_entry.hh:441
gem5::FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:63
gem5::HSAQueueEntry::parseKernelCode
void parseKernelCode(AMDKernelCode *akc)
Definition: hsa_queue_entry.hh:382
gem5::HSAQueueEntry::HSAQueueEntry
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr)
Definition: hsa_queue_entry.hh:64
gem5::AMDKernelCode::enable_vgpr_workitem_id
uint32_t enable_vgpr_workitem_id
Definition: kernel_code.hh:134
gem5::WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:79
kernel_code.hh
gem5::HSAQueueEntry::_numWgTotal
int _numWgTotal
Definition: hsa_queue_entry.hh:470
gem5::HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition: hsa_queue_entry.hh:196
gem5::HSAQueueEntry::_contextId
int _contextId
Definition: hsa_queue_entry.hh:467
gem5::HSAQueueEntry::_completionSignal
Addr _completionSignal
Definition: hsa_queue_entry.hh:443
gem5::AMDKernelCode::enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_x
Definition: kernel_code.hh:130
gem5::HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:173
gem5::PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:58
gem5::AMDKernelCode::enable_sgpr_dispatch_id
uint32_t enable_sgpr_dispatch_id
Definition: kernel_code.hh:154
gem5::HSAQueueEntry::_numWgCompleted
int _numWgCompleted
Definition: hsa_queue_entry.hh:473
gem5::AMDKernelCode::enable_sgpr_grid_workgroup_count_z
uint32_t enable_sgpr_grid_workgroup_count_z
Definition: kernel_code.hh:159
gem5::HSAQueueEntry::_dispatchId
int _dispatchId
Definition: hsa_queue_entry.hh:437
gem5::HSAQueueEntry::numWgTotal
int numWgTotal() const
Definition: hsa_queue_entry.hh:256
gem5::HSAQueueEntry::contextId
int contextId() const
Definition: hsa_queue_entry.hh:199
gem5::HSAQueueEntry::kernargAddress
Addr kernargAddress
Definition: hsa_queue_entry.hh:447
gem5::AMDKernelCode::enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_ptr
Definition: kernel_code.hh:151
gem5::AMDKernelCode::enable_sgpr_private_segment_wave_byte_offset
uint32_t enable_sgpr_private_segment_wave_byte_offset
Definition: kernel_code.hh:127
gem5::HSAQueueEntry::_outstandingInvs
int _outstandingInvs
Number of outstanding invs for the kernel.
Definition: hsa_queue_entry.hh:456
gem5::HSAQueueEntry::_privMemPerItem
int _privMemPerItem
Definition: hsa_queue_entry.hh:466
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
gem5::AMDKernelCode::enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_workgroup_id_z
Definition: kernel_code.hh:132
gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:149
gem5::HSAQueueEntry::wgId
void wgId(int dim, int val)
Definition: hsa_queue_entry.hh:218
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:71
gem5::AMDKernelCode::enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_private_segment_buffer
Definition: kernel_code.hh:150
gem5::HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:211
gem5::HSAQueueEntry::markWgDispatch
void markWgDispatch()
Definition: hsa_queue_entry.hh:262
gem5::PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:72
gem5::HSAQueueEntry::kernName
std::string kernName
Definition: hsa_queue_entry.hh:426
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:185
gem5::HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:143
hsa_queue.hh
gem5::AMDKernelCode::enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_size
Definition: kernel_code.hh:156
gem5::HSAQueueEntry::dispatchComplete
bool dispatchComplete
Definition: hsa_queue_entry.hh:475
gem5::HSAQueueEntry::numWgAtBarrier
int numWgAtBarrier() const
Definition: hsa_queue_entry.hh:283
gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:309
gem5::WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:69
gem5::WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:70
gem5::HSAQueueEntry::numWgArrivedAtBarrier
int numWgArrivedAtBarrier
Definition: hsa_queue_entry.hh:471
gem5::HSAQueueEntry::isInvStarted
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
Definition: hsa_queue_entry.hh:326
gem5::PrivateSegSize
@ PrivateSegSize
Definition: kernel_code.hh:64
gem5::HSAQueueEntry::markInvDone
void markInvDone()
Forcefully change the state to be inv done.
Definition: hsa_queue_entry.hh:347
gem5::HSAQueueEntry::_queueId
uint32_t _queueId
Definition: hsa_queue_entry.hh:436
gem5::AMDKernelCode::enable_sgpr_workgroup_info
uint32_t enable_sgpr_workgroup_info
Definition: kernel_code.hh:133
gem5::HSAQueueEntry::dispPktPtr
void * dispPktPtr()
Definition: hsa_queue_entry.hh:161
gem5::HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:312
gem5::WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:78
gem5::HSAQueueEntry::notifyWgCompleted
void notifyWgCompleted()
Definition: hsa_queue_entry.hh:244
gem5::HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:137
gem5::HSAQueueEntry::codeAddress
Addr codeAddress
Definition: hsa_queue_entry.hh:445
gem5::AMDKernelCode::enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_flat_scratch_init
Definition: kernel_code.hh:155
gem5::HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:288
gem5::KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:61
gem5::DispatchId
@ DispatchId
Definition: kernel_code.hh:62
gem5::HSAQueueEntry::_numWg
std::array< int, MAX_DIM > _numWg
Definition: hsa_queue_entry.hh:469
gem5::AMDKernelCode::enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_kernarg_segment_ptr
Definition: kernel_code.hh:153
gem5::HSAQueueEntry::outstandingInvs
int outstandingInvs()
Definition: hsa_queue_entry.hh:316
gem5::HSAQueueEntry::isInvDone
bool isInvDone() const
Is invalidate done?
Definition: hsa_queue_entry.hh:356
hsa_packet.hh
gem5::HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:167
gem5::HSAQueueEntry::dispPkt
void * dispPkt
Definition: hsa_queue_entry.hh:439
gem5::HSAQueueEntry::updateOutstandingInvs
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
Definition: hsa_queue_entry.hh:337
gem5::HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:123
gem5::HSAQueueEntry::_globalWgId
int _globalWgId
Definition: hsa_queue_entry.hh:474
gem5::AMDKernelCode::enable_sgpr_queue_ptr
uint32_t enable_sgpr_queue_ptr
Definition: kernel_code.hh:152
gem5::HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:179
gem5::_hsa_dispatch_packet_t
Definition: hsa_packet.hh:55
gem5::HSAQueueEntry::dispComplete
bool dispComplete() const
Definition: hsa_queue_entry.hh:205
gem5::HSAQueueEntry::_gridSize
std::array< int, MAX_DIM > _gridSize
Definition: hsa_queue_entry.hh:430
gem5::HSAQueueEntry::globalWgId
int globalWgId() const
Definition: hsa_queue_entry.hh:225
gem5::HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:130
gem5::GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:65
gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:302
gem5::HSAQueueEntry::initialSgprState
std::bitset< NumScalarInitFields > initialSgprState
Definition: hsa_queue_entry.hh:478
gem5::WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:80
gem5::HSAQueueEntry::numVgprs
int numVgprs
Definition: hsa_queue_entry.hh:432
gem5::GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:67
gem5::HSAQueueEntry::numWgCompleted
int numWgCompleted() const
Definition: hsa_queue_entry.hh:250
gem5::HSAQueueEntry::initialVgprState
std::bitset< NumVectorInitFields > initialVgprState
Definition: hsa_queue_entry.hh:477
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::HSAQueueEntry::globalWgId
void globalWgId(int val)
Definition: hsa_queue_entry.hh:231
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition: hsa_queue_entry.hh:117
gem5::HSAQueueEntry::outstandingWbs
int outstandingWbs() const
Definition: hsa_queue_entry.hh:363
gem5::AMDKernelCode::enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_workgroup_id_y
Definition: kernel_code.hh:131
gem5::HSAQueueEntry::_ldsSize
int _ldsSize
Definition: hsa_queue_entry.hh:465
gem5::QueuePtr
@ QueuePtr
Definition: kernel_code.hh:60
gem5::WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:68
types.hh
gem5::HSAQueueEntry::updateOutstandingWbs
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
Definition: hsa_queue_entry.hh:374
gem5::GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:66
gem5::DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:59
gem5::HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:293
gem5::HSAQueueEntry::_outstandingWbs
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
Definition: hsa_queue_entry.hh:464
gem5::HSAQueueEntry::_wgSize
std::array< int, MAX_DIM > _wgSize
Definition: hsa_queue_entry.hh:428
gem5::AMDKernelCode::enable_sgpr_grid_workgroup_count_y
uint32_t enable_sgpr_grid_workgroup_count_y
Definition: kernel_code.hh:158
gem5::HSAQueueEntry::numSgprs
int numSgprs
Definition: hsa_queue_entry.hh:434
gem5::HSAQueueEntry::ldsSize
int ldsSize() const
Definition: hsa_queue_entry.hh:191
intmath.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:155
gem5::HSAQueueEntry::_wgId
std::array< int, MAX_DIM > _wgId
Definition: hsa_queue_entry.hh:468
gem5::AMDKernelCode::enable_sgpr_grid_workgroup_count_x
uint32_t enable_sgpr_grid_workgroup_count_x
Definition: kernel_code.hh:157
gem5::AMDKernelCode
Definition: kernel_code.hh:84

Generated on Tue Sep 21 2021 12:25:24 for gem5 by doxygen 1.8.17