Go to the documentation of this file.
43 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
52 #include "debug/GPUCommandProc.hh"
59 #include "params/GPUCommandProcessor.hh"
65 struct GPUCommandProcessorParams;
66 class GPUComputeDriver;
73 typedef GPUCommandProcessorParams
Params;
111 [] (
const uint64_t &) { });
122 return signal_handle + offsetof(
amd_signal_t, event_mailbox_ptr);
127 return signal_handle + offsetof(
amd_signal_t, event_id);
157 const uint32_t &readDispIdOffset)
211 "Needed more scratch, but kernels are in flight for "
212 "this queue and it is unsafe to reallocate scratch. "
213 "We need to implement additional intelligence in the "
214 "hardware scheduling logic to support CP-driven "
215 "queue draining and scheduling.");
216 DPRINTF(GPUCommandProc,
"Not enough scratch space to launch "
217 "kernel (%x available, %x requested bytes per "
218 "workitem). Asking host runtime to allocate more "
225 "Runtime dynamic scratch allocation not supported");
228 [ = ] (
const uint64_t &dma_buffer)
229 { WaitScratchDmaEvent(task, dma_buffer); });
232 DPRINTF(GPUCommandProc,
"Sufficient scratch space, launching "
233 "kernel (%x available, %x requested bytes per "
248 if (dmaBuffer == 0) {
249 DPRINTF(GPUCommandProc,
"Host scratch allocation complete. "
250 "Attempting to re-read MQD\n");
271 DPRINTF(GPUCommandProc,
"Polling queue inactive signal at "
272 "%p.\n", value_addr);
274 [ = ] (
const uint64_t &dma_buffer)
283 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
GPUCommandProcessorParams Params
Addr getHsaSignalEventAddr(Addr signal_handle)
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
HSAPacketProcessor * hsaPP
int privMemPerItem() const
void signalWakeupEvent(uint32_t event_id)
void attachDriver(GPUComputeDriver *driver)
uint64_t hostReadIndexPtr
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
GPUCommandProcessor()=delete
uint64_t inFlightPkts(uint32_t queId)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
HSAPacketProcessor & hsaPacketProc()
Addr getHsaSignalMailboxAddr(Addr signal_handle)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
void setShader(Shader *shader)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
uint64_t Tick
Tick count type.
GPUComputeDriver * _driver
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Device model for an AMD GPU.
Addr getHsaSignalValueAddr(Addr signal_handle)
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
void setGPUDevice(AMDGPUDevice *gpu_device)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
_hsa_signal_t queue_inactive_signal
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
uint32_t compute_tmpring_size_wavesize
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
GPUDispatcher & dispatcher
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
std::unique_ptr< TranslationGen > TranslationGenPtr
GPUComputeDriver * driver()
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17