41#ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__
42#define __GPU_COMPUTE_HSA_QUEUE_ENTRY__
54#include "enums/GfxVersion.hh"
65 Addr host_pkt_addr,
Addr code_addr, GfxVersion gfx_version)
70 _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
71 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
72 (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
73 _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
74 _hostDispPktAddr(host_pkt_addr),
77 codeAddress(code_addr),
79 _outstandingInvs(-1), _outstandingWbs(0),
83 private_segment_size),
84 _contextId(0), _wgId{{ 0, 0, 0 }},
85 _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),
86 _globalWgId(0), dispatchComplete(
false)
99 if (gfx_version == GfxVersion::gfx90a ||
100 gfx_version == GfxVersion::gfx942) {
101 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
103 numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
108 if (gfx_version == GfxVersion::gfx900 ||
109 gfx_version == GfxVersion::gfx902 ||
110 gfx_version == GfxVersion::gfx908 ||
111 gfx_version == GfxVersion::gfx90a ||
112 gfx_version == GfxVersion::gfx942) {
113 numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
115 panic(
"Saw unknown gfx version setting up GPR counts\n");
118 initialVgprState.reset();
119 initialSgprState.reset();
121 for (
int i = 0;
i < MAX_DIM; ++
i) {
122 _numWg[
i] =
divCeil(_gridSize[
i], _wgSize[
i]);
123 _numWgTotal *= _numWg[
i];
126 parseKernelCode(akc);
131 _accumOffset = (akc->accum_offset + 1) * 4;
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
bool sgprBitEnabled(int bit) const
std::bitset< NumScalarInitFields > initialSgprState
const GfxVersion & gfxVersion() const
void preloadLength(unsigned val)
Addr hostDispPktAddr() const
int wgSize(int dim) const
void wgId(int dim, int val)
int numVectorRegs() const
int numWgAtBarrier() const
void parseKernelCode(AMDKernelCode *akc)
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
uint32_t _preloadArgs[KernargPreloadPktSize/sizeof(uint32_t)]
bool vgprBitEnabled(int bit) const
void markInvDone()
Forcefully change the state to be inv done.
const std::string & kernelName() const
int outstandingWbs() const
std::array< int, MAX_DIM > _wgId
int privMemPerItem() const
int numWgCompleted() const
std::array< int, MAX_DIM > _gridSize
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
std::array< int, MAX_DIM > _wgSize
bool dispComplete() const
std::array< int, MAX_DIM > _numWg
Addr completionSignal() const
bool isInvDone() const
Is invalidate done?
int numWgArrivedAtBarrier
int gridSize(int dim) const
int numScalarRegs() const
unsigned preloadLength() const
int _outstandingInvs
Number of outstanding invs for the kernel.
std::bitset< NumVectorInitFields > initialVgprState
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
unsigned accumOffset() const
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
static constexpr T divCeil(const T &a, const U &b)
#define panic(...)
This implements a cprintf based panic() function.
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED AMDKernelCode
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
constexpr int KernargPreloadPktSize
The number of bytes after the dispatch packet which contain kernel arguments that should be preloaded...
uint32_t enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_queue_ptr
uint32_t enable_private_segment
uint32_t enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_dispatch_id
uint32_t enable_vgpr_workitem_id
uint32_t enable_sgpr_private_segment_size
uint32_t enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_info