43#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
53#include "debug/GPUCommandProc.hh"
60#include "params/GPUCommandProcessor.hh"
66struct GPUCommandProcessorParams;
67class GPUComputeDriver;
74 typedef GPUCommandProcessorParams
Params;
103 uint32_t queue_id,
Addr host_pkt_addr);
115 [] (
const uint64_t &) { });
118 uint64_t *prev_value);
133 return signal_handle + offsetof(
amd_signal_t, event_mailbox_ptr);
138 return signal_handle + offsetof(
amd_signal_t, event_id);
181 const uint32_t &readDispIdOffset)
235 "Needed more scratch, but kernels are in flight for "
236 "this queue and it is unsafe to reallocate scratch. "
237 "We need to implement additional intelligence in the "
238 "hardware scheduling logic to support CP-driven "
239 "queue draining and scheduling.");
240 DPRINTF(GPUCommandProc,
"Not enough scratch space to launch "
241 "kernel (%x available, %x requested bytes per "
242 "workitem). Asking host runtime to allocate more "
248 [ = ] (
const uint64_t &dma_buffer)
249 { WaitScratchDmaEvent(task, dma_buffer); });
252 DPRINTF(GPUCommandProc,
"Sufficient scratch space, launching "
253 "kernel (%x available, %x requested bytes per "
268 if (dmaBuffer == 0) {
269 DPRINTF(GPUCommandProc,
"Host scratch allocation complete. "
270 "Attempting to re-read MQD\n");
291 DPRINTF(GPUCommandProc,
"Polling queue inactive signal at "
292 "%p.\n", value_addr);
294 [ = ] (
const uint64_t &dma_buffer)
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void sendCompletionSignal(Addr signal_handle)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
GPUComputeDriver * driver()
Addr getHsaSignalMailboxAddr(Addr signal_handle)
GPUCommandProcessor()=delete
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
void updateHsaSignalDone(uint64_t *signal_value)
GPUComputeDriver * _driver
void setShader(Shader *shader)
HSAPacketProcessor & hsaPacketProc()
int target_non_blit_kernel_id
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Addr getHsaSignalValueAddr(Addr signal_handle)
void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value)
HSAPacketProcessor * hsaPP
void dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
void updateHsaSignalAsync(Addr signal_handle, int64_t diff)
std::unordered_map< Addr, Tick > dispatchStartTime
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
void sanityCheckAKC(AMDKernelCode *akc)
GPUDispatcher & dispatcher
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
GPUCommandProcessorParams Params
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value)
void updateHsaEventData(Addr signal_handle, uint64_t *event_value)
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
void updateHsaSignalData(Addr value_addr, int64_t diff, uint64_t *prev_value)
uint64_t inFlightPkts(uint32_t queId)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
uint64_t hostReadIndexPtr
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
int privMemPerItem() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
const int NumVecElemPerVecReg(64)
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::unique_ptr< TranslationGen > TranslationGenPtr
_hsa_signal_t queue_inactive_signal
uint32_t compute_tmpring_size_wavesize