43 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
52 #include "debug/GPUCommandProc.hh"
59 #include "params/GPUCommandProcessor.hh"
65 struct GPUCommandProcessorParams;
66 class GPUComputeDriver;
73 typedef GPUCommandProcessorParams
Params;
111 [] (
const uint64_t &) { });
122 return signal_handle + offsetof(
amd_signal_t, event_mailbox_ptr);
127 return signal_handle + offsetof(
amd_signal_t, event_id);
157 const uint32_t &readDispIdOffset)
211 "Needed more scratch, but kernels are in flight for "
212 "this queue and it is unsafe to reallocate scratch. "
213 "We need to implement additional intelligence in the "
214 "hardware scheduling logic to support CP-driven "
215 "queue draining and scheduling.");
216 DPRINTF(GPUCommandProc,
"Not enough scratch space to launch "
217 "kernel (%x available, %x requested bytes per "
218 "workitem). Asking host runtime to allocate more "
224 [ = ] (
const uint64_t &dma_buffer)
225 { WaitScratchDmaEvent(task, dma_buffer); });
228 DPRINTF(GPUCommandProc,
"Sufficient scratch space, launching "
229 "kernel (%x available, %x requested bytes per "
244 if (dmaBuffer == 0) {
245 DPRINTF(GPUCommandProc,
"Host scratch allocation complete. "
246 "Attempting to re-read MQD\n");
267 DPRINTF(GPUCommandProc,
"Polling queue inactive signal at "
268 "%p.\n", value_addr);
270 [ = ] (
const uint64_t &dma_buffer)
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
GPUComputeDriver * driver()
Addr getHsaSignalMailboxAddr(Addr signal_handle)
GPUCommandProcessor()=delete
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
GPUComputeDriver * _driver
void setShader(Shader *shader)
HSAPacketProcessor & hsaPacketProc()
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Addr getHsaSignalValueAddr(Addr signal_handle)
HSAPacketProcessor * hsaPP
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
GPUDispatcher & dispatcher
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
GPUCommandProcessorParams Params
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
uint64_t inFlightPkts(uint32_t queId)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
uint64_t hostReadIndexPtr
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
int privMemPerItem() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::unique_ptr< TranslationGen > TranslationGenPtr
_hsa_signal_t queue_inactive_signal
uint32_t compute_tmpring_size_wavesize