41#ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
42#define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
54#include "enums/GfxVersion.hh"
65 Addr host_pkt_addr,
Addr code_addr, GfxVersion gfx_version)
70 _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
71 (
int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
72 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
73 numVgprs(akc->workitem_vgpr_count),
74 numSgprs(akc->wavefront_sgpr_count),
75 _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
76 _hostDispPktAddr(host_pkt_addr),
77 _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
79 codeAddress(code_addr),
80 kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
81 _outstandingInvs(-1), _outstandingWbs(0),
82 _ldsSize((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
84 _privMemPerItem((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
85 private_segment_size),
86 _contextId(0), _wgId{{ 0, 0, 0 }},
87 _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),
88 _globalWgId(0), dispatchComplete(
false)
103 if (gfx_version == GfxVersion::gfx90a) {
104 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
106 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
110 if (!numSgprs || numSgprs ==
111 std::numeric_limits<
decltype(akc->wavefront_sgpr_count)>::max()) {
113 uint16_t version = akc->amd_machine_version_major;
114 assert((version == 0) || (version == 8) || (version == 9));
119 if ((version == 0) || (version == 8)) {
121 numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
122 }
else if (version == 9) {
123 numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
127 initialVgprState.reset();
128 initialSgprState.reset();
130 for (
int i = 0;
i < MAX_DIM; ++
i) {
131 _numWg[
i] =
divCeil(_gridSize[
i], _wgSize[
i]);
132 _numWgTotal *= _numWg[
i];
135 parseKernelCode(akc);
147 assert(dim < MAX_DIM);
154 assert(dim < MAX_DIM);
155 return _gridSize[dim];
191 return _hostDispPktAddr;
197 return _completionSignal;
209 return kernargAddress;
229 return dispatchComplete;
235 assert(dim < MAX_DIM);
242 assert(dim < MAX_DIM);
261 assert(dim < MAX_DIM);
274 return _numWgCompleted;
289 if (wgId(0) * wgSize(0) >= gridSize(0)) {
293 if (wgId(1) * wgSize(1) >= gridSize(1)) {
297 if (wgId(2) * wgSize(2) >= gridSize(2)) {
298 dispatchComplete =
true;
307 return numWgArrivedAtBarrier;
312 return initialVgprState.test(bit);
317 return initialSgprState.test(bit);
334 const static int MAX_DIM = 3;
339 return _outstandingInvs;
350 return (_outstandingInvs != -1);
361 _outstandingInvs +=
val;
362 assert(_outstandingInvs >= 0);
371 _outstandingInvs = 0;
380 assert(_outstandingInvs >= 0);
381 return (_outstandingInvs == 0);
387 return _outstandingWbs;
398 _outstandingWbs +=
val;
399 assert(_outstandingWbs >= 0);
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
bool sgprBitEnabled(int bit) const
std::bitset< NumScalarInitFields > initialSgprState
Addr hostDispPktAddr() const
int wgSize(int dim) const
void wgId(int dim, int val)
int numVectorRegs() const
int numWgAtBarrier() const
void parseKernelCode(AMDKernelCode *akc)
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
bool vgprBitEnabled(int bit) const
void markInvDone()
Forcefully change the state to be inv done.
const std::string & kernelName() const
int outstandingWbs() const
std::array< int, MAX_DIM > _wgId
int privMemPerItem() const
int numWgCompleted() const
std::array< int, MAX_DIM > _gridSize
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
std::array< int, MAX_DIM > _wgSize
bool dispComplete() const
std::array< int, MAX_DIM > _numWg
Addr completionSignal() const
bool isInvDone() const
Is invalidate done?
int numWgArrivedAtBarrier
int gridSize(int dim) const
int numScalarRegs() const
int _outstandingInvs
Number of outstanding invs for the kernel.
std::bitset< NumVectorInitFields > initialVgprState
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
static constexpr T divCeil(const T &a, const U &b)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint32_t enable_sgpr_workgroup_info
uint32_t enable_sgpr_queue_ptr
uint32_t enable_sgpr_grid_workgroup_count_x
uint32_t enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_id
uint32_t enable_vgpr_workitem_id
uint32_t enable_sgpr_private_segment_wave_byte_offset
uint32_t enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_grid_workgroup_count_y
uint32_t enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_grid_workgroup_count_z
uint32_t enable_sgpr_kernarg_segment_ptr