43#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
53#include "debug/GPUCommandProc.hh"
60#include "params/GPUCommandProcessor.hh"
66struct GPUCommandProcessorParams;
74 typedef GPUCommandProcessorParams
Params;
120 uint32_t queue_id,
Addr host_pkt_addr);
132 [] (
const uint64_t &) { });
135 uint64_t *prev_value);
150 return signal_handle + offsetof(
amd_signal_t, event_mailbox_ptr);
155 return signal_handle + offsetof(
amd_signal_t, event_id);
198 const uint32_t &readDispIdOffset)
208 task->
queueId())->hostReadIndexPtr - readDispIdOffset;
252 "Needed more scratch, but kernels are in flight for "
253 "this queue and it is unsafe to reallocate scratch. "
254 "We need to implement additional intelligence in the "
255 "hardware scheduling logic to support CP-driven "
256 "queue draining and scheduling.");
257 DPRINTF(GPUCommandProc,
"Not enough scratch space to launch "
258 "kernel (%x available, %x requested bytes per "
259 "workitem). Asking host runtime to allocate more "
265 [ = ] (
const uint64_t &dma_buffer)
266 { WaitScratchDmaEvent(task, dma_buffer); });
269 DPRINTF(GPUCommandProc,
"Sufficient scratch space, launching "
270 "kernel (%x available, %x requested bytes per "
285 if (dmaBuffer == 0) {
286 DPRINTF(GPUCommandProc,
"Host scratch allocation complete. "
287 "Attempting to re-read MQD\n");
308 DPRINTF(GPUCommandProc,
"Polling queue inactive signal at "
309 "%p.\n", value_addr);
311 [ = ] (
const uint64_t &dma_buffer)
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void sendCompletionSignal(Addr signal_handle)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
GfxVersion getGfxVersion() const
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
GPUComputeDriver * driver()
Addr getHsaSignalMailboxAddr(Addr signal_handle)
GPUCommandProcessor()=delete
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
void updateHsaSignalDone(uint64_t *signal_value)
GPUComputeDriver * _driver
void completeTimingRead(int dispType)
void setShader(Shader *shader)
HSAPacketProcessor & hsaPacketProc()
int target_non_blit_kernel_id
void performTimingRead(PacketPtr pkt, int dispType)
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
std::list< struct KernelDispatchData > kernelDispatchList
Addr getHsaSignalValueAddr(Addr signal_handle)
void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value)
HSAPacketProcessor * hsaPP
void dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
void updateHsaSignalAsync(Addr signal_handle, int64_t diff)
std::unordered_map< Addr, Tick > dispatchStartTime
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
void sanityCheckAKC(AMDKernelCode *akc)
GPUDispatcher & dispatcher
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
void initPreload(AMDKernelCode *akc, HSAQueueEntry *task)
GPUCommandProcessorParams Params
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
void readPreload(AMDKernelCode *akc, HSAQueueEntry *task)
void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value)
void updateHsaEventData(Addr signal_handle, uint64_t *event_value)
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
void updateHsaSignalData(Addr value_addr, int64_t diff, uint64_t *prev_value)
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
int privMemPerItem() const
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
const int NumVecElemPerVecReg(64)
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED AMDKernelCode
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::amd_signal_s amd_signal_t
_hsa_signal_t queue_inactive_signal
uint32_t compute_tmpring_size_wavesize