Go to the documentation of this file.
38 #include "debug/GPUCommandProc.hh"
39 #include "debug/GPUKernelInfo.hh"
46 #include "params/GPUCommandProcessor.hh"
57 walker(
p.walker), hsaPP(
p.hsapp)
86 auto process =
sys->
threads[0]->getProcessPtr();
88 return process->pTable->translateRange(
vaddr, size);
119 static int dynamic_task_id = 0;
137 bool is_system_page =
true;
149 phys_addr, tmp_bytes, BaseMMU::Mode::Read,
163 if (is_system_page) {
164 DPRINTF(GPUCommandProc,
"kernel_object in system, using proxy\n");
169 DPRINTF(GPUCommandProc,
"kernel_object in device, using device mem\n");
173 Addr mmhubAddr = phys_addr +
i*
sizeof(uint8_t);
175 RequestPtr request = std::make_shared<Request>(
180 raw_akc[
i] = readPkt->
getLE<uint8_t>();
186 DPRINTF(GPUCommandProc,
"GPU machine code is %lli bytes from start of the "
189 DPRINTF(GPUCommandProc,
"GPUCommandProc: Sending dispatch pkt to %lu\n",
190 (uint64_t)tc->cpuId());
196 DPRINTF(GPUCommandProc,
"Machine code starts at addr: %#x\n",
199 std::string kernel_name;
211 kernel_name =
"Some kernel";
213 kernel_name =
"Blit kernel";
216 DPRINTF(GPUKernelInfo,
"Kernel name: %s\n", kernel_name.c_str());
219 dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
221 DPRINTF(GPUCommandProc,
"Task ID: %i Got AQL: wg size (%dx%dx%d), "
222 "grid size (%dx%dx%d) kernarg addr: %#x, completion "
229 DPRINTF(GPUCommandProc,
"Extracted code object: %s (num vector regs: %d, "
230 "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
256 DPRINTF(GPUCommandProc,
"Triggering completion signal: %x!\n", value_addr);
270 if (*mailbox_ptr != 0) {
276 DPRINTF(GPUCommandProc,
"Calling signal wakeup event on "
277 "signal event value %d\n", *event_val);
348 DPRINTF(GPUCommandProc,
"Agent Dispatch Packet NOP\n");
349 }
else if (agent_pkt->
type == AgentCmd::Steal) {
351 int kid = agent_pkt->
arg[0];
353 "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
360 DPRINTF(GPUCommandProc,
"Return Addr: %p\n",return_address);
363 *new_signal_addr = (
Addr)signal_addr;
367 "Agent Dispatch Packet Stealing signal handle from kid %d :" \
368 "(%x:%x) writing into %x\n",
369 kid,signal_addr,new_signal_addr,return_address);
373 panic(
"The agent dispatch packet provided an unknown argument in" \
374 "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
408 [ = ] (
const uint32_t &readDispIdOffset)
414 dmaReadVirt(hostReadIdxPtr +
sizeof(hostReadIdxPtr),
Addr getHsaSignalEventAddr(Addr signal_handle)
virtual void signalWakeupEvent(uint32_t event_id)
uint64_t runtime_loader_kernel_symbol
HSAPacketProcessor * hsaPP
uint16_t workgroup_size_x
void signalWakeupEvent(uint32_t event_id)
void attachDriver(GPUComputeDriver *driver)
Wraps a std::function object in a DmaCallback.
Addr completionSignal() const
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
@ PHYSICAL
The virtual address is also the physical address.
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
HSAQueueEntry * hsaTask(int disp_id)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
GPUCommandProcessor()=delete
HSAPacketProcessor & hsaPacketProc()
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Addr getHsaSignalMailboxAddr(Addr signal_handle)
int numScalarRegs() const
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
This proxy attempts to translate virtual addresses using the TLBs.
def format Nop(code, *opt_flags)
Fault startFunctional(Addr base, Addr vaddr, PageTableEntry &pte, unsigned &logBytes, BaseMMU::Mode mode)
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
int numVectorRegs() const
void finishPkt(void *pkt, uint32_t rl_idx)
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
void setShader(Shader *shader)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
ProbePointArg< PacketInfo > Packet
Packet probe point.
std::shared_ptr< Request > RequestPtr
GPUComputeDriver * _driver
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
This object is a proxy for a port or other object which implements the functional response protocol,...
uint16_t workgroup_size_z
Device model for an AMD GPU.
RequestorID getDevRequestor() const
void readBlob(Addr addr, void *p, int size) const
Higher level interfaces based on the above.
Addr getHsaSignalValueAddr(Addr signal_handle)
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void setGPUDevice(AMDGPUDevice *gpu_device)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Addr getPageTableBase(uint16_t vmid)
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
uint16_t workgroup_size_y
void allocate()
Allocate memory for the packet.
void setDevice(GPUCommandProcessor *dev)
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
int64_t kernel_code_entry_byte_offset
void setDevRequestor(RequestorID mid)
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
GPUDispatcher & dispatcher
uint64_t completion_signal
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
std::unique_ptr< TranslationGen > TranslationGenPtr
#define panic(...)
This implements a cprintf based panic() function.
GPUComputeDriver * driver()
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17