41 #ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
42 #define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
69 _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
70 (
int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
71 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
72 numVgprs(akc->workitem_vgpr_count),
73 numSgprs(akc->wavefront_sgpr_count),
74 _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
75 _hostDispPktAddr(host_pkt_addr),
76 _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
78 codeAddress(code_addr),
79 kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
80 _outstandingInvs(-1), _outstandingWbs(0),
81 _ldsSize((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
83 _privMemPerItem((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
84 private_segment_size),
85 _contextId(0), _wgId{{ 0, 0, 0 }},
86 _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),
87 _globalWgId(0), dispatchComplete(
false)
97 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
99 if (!numSgprs || numSgprs ==
100 std::numeric_limits<decltype(akc->wavefront_sgpr_count)>::max()) {
102 uint16_t version = akc->amd_machine_version_major;
103 assert((version == 0) || (version == 8) || (version == 9));
108 if ((version == 0) || (version == 8)) {
110 numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
111 }
else if (version == 9) {
112 numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
116 initialVgprState.reset();
117 initialSgprState.reset();
119 for (
int i = 0;
i < MAX_DIM; ++
i) {
120 _numWg[
i] =
divCeil(_gridSize[
i], _wgSize[
i]);
121 _numWgTotal *= _numWg[
i];
124 parseKernelCode(akc);
136 assert(dim < MAX_DIM);
143 assert(dim < MAX_DIM);
144 return _gridSize[dim];
180 return _hostDispPktAddr;
186 return _completionSignal;
198 return kernargAddress;
218 return dispatchComplete;
224 assert(dim < MAX_DIM);
231 assert(dim < MAX_DIM);
250 assert(dim < MAX_DIM);
263 return _numWgCompleted;
278 if (wgId(0) * wgSize(0) >= gridSize(0)) {
282 if (wgId(1) * wgSize(1) >= gridSize(1)) {
286 if (wgId(2) * wgSize(2) >= gridSize(2)) {
287 dispatchComplete =
true;
296 return numWgArrivedAtBarrier;
301 return initialVgprState.test(bit);
306 return initialSgprState.test(bit);
323 const static int MAX_DIM = 3;
328 return _outstandingInvs;
339 return (_outstandingInvs != -1);
350 _outstandingInvs +=
val;
351 assert(_outstandingInvs >= 0);
360 _outstandingInvs = 0;
369 assert(_outstandingInvs >= 0);
370 return (_outstandingInvs == 0);
376 return _outstandingWbs;
387 _outstandingWbs +=
val;
388 assert(_outstandingWbs >= 0);
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
bool sgprBitEnabled(int bit) const
std::bitset< NumScalarInitFields > initialSgprState
const std::string & kernelName() const
Addr hostDispPktAddr() const
int wgSize(int dim) const
void wgId(int dim, int val)
int numVectorRegs() const
int numWgAtBarrier() const
void parseKernelCode(AMDKernelCode *akc)
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
bool vgprBitEnabled(int bit) const
void markInvDone()
Forcefully change the state to be inv done.
int outstandingWbs() const
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr)
std::array< int, MAX_DIM > _wgId
int privMemPerItem() const
int numWgCompleted() const
std::array< int, MAX_DIM > _gridSize
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
std::array< int, MAX_DIM > _wgSize
bool dispComplete() const
std::array< int, MAX_DIM > _numWg
Addr completionSignal() const
bool isInvDone() const
Is invalidate done?
int numWgArrivedAtBarrier
int gridSize(int dim) const
int numScalarRegs() const
int _outstandingInvs
Number of outstanding invs for the kernel.
std::bitset< NumVectorInitFields > initialVgprState
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
static constexpr T divCeil(const T &a, const U &b)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint32_t enable_sgpr_workgroup_info
uint32_t enable_sgpr_queue_ptr
uint32_t enable_sgpr_grid_workgroup_count_x
uint32_t enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_id
uint32_t enable_vgpr_workitem_id
uint32_t enable_sgpr_private_segment_wave_byte_offset
uint32_t enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_grid_workgroup_count_y
uint32_t enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_grid_workgroup_count_z
uint32_t enable_sgpr_kernarg_segment_ptr