65 Addr host_pkt_addr,
Addr code_addr, GfxVersion gfx_version)
70 _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
71 (
int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
72 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
73 _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
74 _hostDispPktAddr(host_pkt_addr),
75 _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
77 codeAddress(code_addr),
78 kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),
79 _outstandingInvs(-1), _outstandingWbs(0),
80 _ldsSize((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
82 _privMemPerItem((
int)((_hsa_dispatch_packet_t*)disp_pkt)->
83 private_segment_size),
84 _contextId(0), _wgId{{ 0, 0, 0 }},
85 _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),
86 _globalWgId(0), dispatchComplete(
false)
99 if (gfx_version == GfxVersion::gfx90a ||
100 gfx_version == GfxVersion::gfx942) {
101 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
103 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
108 if (gfx_version == GfxVersion::gfx900 ||
109 gfx_version == GfxVersion::gfx902 ||
110 gfx_version == GfxVersion::gfx908 ||
111 gfx_version == GfxVersion::gfx90a ||
112 gfx_version == GfxVersion::gfx942) {
113 numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
115 panic(
"Saw unknown gfx version setting up GPR counts\n");
118 initialVgprState.reset();
119 initialSgprState.reset();
121 for (
int i = 0;
i < MAX_DIM; ++
i) {
122 _numWg[
i] =
divCeil(_gridSize[
i], _wgSize[
i]);
123 _numWgTotal *= _numWg[
i];
126 parseKernelCode(akc);
131 _accumOffset = (akc->accum_offset + 1) * 4;