Go to the documentation of this file.
38 #include "debug/GPUCommandProc.hh"
39 #include "debug/GPUKernelInfo.hh"
46 #include "params/GPUCommandProcessor.hh"
57 walker(
p.walker), hsaPP(
p.hsapp)
86 auto process =
sys->
threads[0]->getProcessPtr();
88 return process->pTable->translateRange(
vaddr, size);
119 static int dynamic_task_id = 0;
138 bool is_system_page =
true;
150 phys_addr, tmp_bytes, BaseMMU::Mode::Read,
154 DPRINTF(GPUCommandProc,
"kernobj vaddr %#lx paddr %#lx size %d s:%d\n",
168 if (is_system_page) {
169 DPRINTF(GPUCommandProc,
"kernel_object in system, using proxy\n");
174 DPRINTF(GPUCommandProc,
"kernel_object in device, using device mem\n");
179 system()->cacheLineSize());
185 chunk_addr, dummy, BaseMMU::Mode::Read,
189 RequestPtr request = std::make_shared<Request>(chunk_addr,
198 DPRINTF(GPUCommandProc,
"GPU machine code is %lli bytes from start of the "
201 DPRINTF(GPUCommandProc,
"GPUCommandProc: Sending dispatch pkt to %lu\n",
202 (uint64_t)tc->cpuId());
208 DPRINTF(GPUCommandProc,
"Machine code starts at addr: %#x\n",
211 std::string kernel_name;
223 kernel_name =
"Some kernel";
225 kernel_name =
"Blit kernel";
228 DPRINTF(GPUKernelInfo,
"Kernel name: %s\n", kernel_name.c_str());
233 dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr,
236 DPRINTF(GPUCommandProc,
"Task ID: %i Got AQL: wg size (%dx%dx%d), "
237 "grid size (%dx%dx%d) kernarg addr: %#x, completion "
244 DPRINTF(GPUCommandProc,
"Extracted code object: %s (num vector regs: %d, "
245 "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
271 DPRINTF(GPUCommandProc,
"Triggering completion signal: %x!\n", value_addr);
285 if (*mailbox_ptr != 0) {
291 DPRINTF(GPUCommandProc,
"Calling signal wakeup event on "
292 "signal event value %d\n", *event_val);
363 DPRINTF(GPUCommandProc,
"Agent Dispatch Packet NOP\n");
364 }
else if (agent_pkt->
type == AgentCmd::Steal) {
366 int kid = agent_pkt->
arg[0];
368 "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
375 DPRINTF(GPUCommandProc,
"Return Addr: %p\n",return_address);
378 *new_signal_addr = (
Addr)signal_addr;
382 "Agent Dispatch Packet Stealing signal handle from kid %d :" \
383 "(%x:%x) writing into %x\n",
384 kid,signal_addr,new_signal_addr,return_address);
388 panic(
"The agent dispatch packet provided an unknown argument in" \
389 "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
423 [ = ] (
const uint32_t &readDispIdOffset)
429 dmaReadVirt(hostReadIdxPtr +
sizeof(hostReadIdxPtr),
Addr getHsaSignalEventAddr(Addr signal_handle)
virtual void signalWakeupEvent(uint32_t event_id)
uint64_t runtime_loader_kernel_symbol
HSAPacketProcessor * hsaPP
GfxVersion getGfxVersion() const
uint16_t workgroup_size_x
void signalWakeupEvent(uint32_t event_id)
void attachDriver(GPUComputeDriver *driver)
Wraps a std::function object in a DmaCallback.
Addr completionSignal() const
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
bool next()
Advance generator to next chunk.
Addr complete() const
Number of bytes we have already chunked up.
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
HSAQueueEntry * hsaTask(int disp_id)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
GPUCommandProcessor()=delete
HSAPacketProcessor & hsaPacketProc()
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Addr getHsaSignalMailboxAddr(Addr signal_handle)
int numScalarRegs() const
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
This proxy attempts to translate virtual addresses using the TLBs.
def format Nop(code, *opt_flags)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
Fault startFunctional(Addr base, Addr vaddr, PageTableEntry &pte, unsigned &logBytes, BaseMMU::Mode mode)
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
int numVectorRegs() const
void finishPkt(void *pkt, uint32_t rl_idx)
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
void setShader(Shader *shader)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
ProbePointArg< PacketInfo > Packet
Packet probe point.
std::shared_ptr< Request > RequestPtr
GPUComputeDriver * _driver
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
This object is a proxy for a port or other object which implements the functional response protocol,...
uint16_t workgroup_size_z
Device model for an AMD GPU.
RequestorID getDevRequestor() const
void readBlob(Addr addr, void *p, int size) const
Higher level interfaces based on the above.
Addr getHsaSignalValueAddr(Addr signal_handle)
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void setGPUDevice(AMDGPUDevice *gpu_device)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Addr getPageTableBase(uint16_t vmid)
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
uint16_t workgroup_size_y
void setDevice(GPUCommandProcessor *dev)
Addr addr() const
Return starting address of current chunk.
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
int64_t kernel_code_entry_byte_offset
@ PHYSICAL
The virtual address is also the physical address.
void setDevRequestor(RequestorID mid)
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
GPUDispatcher & dispatcher
GfxVersion getGfxVersion() const
uint64_t completion_signal
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
std::unique_ptr< TranslationGen > TranslationGenPtr
#define panic(...)
This implements a cprintf based panic() function.
GPUComputeDriver * driver()
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17