38 #include "debug/GPUCommandProc.hh" 
   39 #include "debug/GPUKernelInfo.hh" 
   46 #include "params/GPUCommandProcessor.hh" 
   57       walker(
p.walker), hsaPP(
p.hsapp)
 
   86         auto process = 
sys->
threads[0]->getProcessPtr();
 
   88         return process->pTable->translateRange(
vaddr, size);
 
  119     static int dynamic_task_id = 0;
 
  138     bool is_system_page = 
true;
 
  150                                 phys_addr, tmp_bytes, BaseMMU::Mode::Read,
 
  154     DPRINTF(GPUCommandProc, 
"kernobj vaddr %#lx paddr %#lx size %d s:%d\n",
 
  168     if (is_system_page) {
 
  169         DPRINTF(GPUCommandProc, 
"kernel_object in system, using proxy\n");
 
  174         DPRINTF(GPUCommandProc, 
"kernel_object in device, using device mem\n");
 
  179                            system()->cacheLineSize());
 
  185                                     chunk_addr, dummy, BaseMMU::Mode::Read,
 
  189             RequestPtr request = std::make_shared<Request>(chunk_addr,
 
  198     DPRINTF(GPUCommandProc, 
"GPU machine code is %lli bytes from start of the " 
  201     DPRINTF(GPUCommandProc,
"GPUCommandProc: Sending dispatch pkt to %lu\n",
 
  202         (uint64_t)tc->cpuId());
 
  208     DPRINTF(GPUCommandProc, 
"Machine code starts at addr: %#x\n",
 
  211     std::string kernel_name;
 
  223         kernel_name = 
"Some kernel";
 
  225         kernel_name = 
"Blit kernel";
 
  228     DPRINTF(GPUKernelInfo, 
"Kernel name: %s\n", kernel_name.c_str());
 
  231         dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
 
  233     DPRINTF(GPUCommandProc, 
"Task ID: %i Got AQL: wg size (%dx%dx%d), " 
  234         "grid size (%dx%dx%d) kernarg addr: %#x, completion " 
  241     DPRINTF(GPUCommandProc, 
"Extracted code object: %s (num vector regs: %d, " 
  242         "num scalar regs: %d, code addr: %#x, kernarg size: %d, " 
  268     DPRINTF(GPUCommandProc, 
"Triggering completion signal: %x!\n", value_addr);
 
  282     if (*mailbox_ptr != 0) {
 
  288         DPRINTF(GPUCommandProc, 
"Calling signal wakeup event on " 
  289                 "signal event value %d\n", *event_val);
 
  360         DPRINTF(GPUCommandProc, 
"Agent Dispatch Packet NOP\n");
 
  361     } 
else if (agent_pkt->
type == AgentCmd::Steal) {
 
  363         int kid = agent_pkt->
arg[0];
 
  365             "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
 
  372         DPRINTF(GPUCommandProc, 
"Return Addr: %p\n",return_address);
 
  375         *new_signal_addr  = (
Addr)signal_addr;
 
  379             "Agent Dispatch Packet Stealing signal handle from kid %d :" \
 
  380             "(%x:%x) writing into %x\n",
 
  381             kid,signal_addr,new_signal_addr,return_address);
 
  385         panic(
"The agent dispatch packet provided an unknown argument in" \
 
  386         "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
 
  420         [ = ] (
const uint32_t &readDispIdOffset)
 
  426     dmaReadVirt(hostReadIdxPtr + 
sizeof(hostReadIdxPtr),
 
AbstractMemory declaration.
Declaration and inline definition of ChunkGenerator object.
Device model for an AMD GPU.
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Addr getPageTableBase(uint16_t vmid)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
GPUComputeDriver * driver()
Addr getHsaSignalMailboxAddr(Addr signal_handle)
GPUCommandProcessor()=delete
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
GPUComputeDriver * _driver
void setShader(Shader *shader)
HSAPacketProcessor & hsaPacketProc()
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Addr getHsaSignalValueAddr(Addr signal_handle)
HSAPacketProcessor * hsaPP
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
GPUDispatcher & dispatcher
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
virtual void signalWakeupEvent(uint32_t event_id)
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
HSAQueueEntry * hsaTask(int disp_id)
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
void finishPkt(void *pkt, uint32_t rl_idx)
void setDevice(GPUCommandProcessor *dev)
int numVectorRegs() const
Addr completionSignal() const
int numScalarRegs() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
This object is a proxy for a port or other object which implements the functional response protocol,...
void readBlob(Addr addr, void *p, int size) const
Higher level interfaces based on the above.
@ PHYSICAL
The virtual address is also the physical address.
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
This proxy attempts to translate virtual addresses using the TLBs.
void setDevRequestor(RequestorID mid)
Fault startFunctional(Addr base, Addr vaddr, PageTableEntry &pte, unsigned &logBytes, BaseMMU::Mode mode)
RequestorID getDevRequestor() const
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
Addr addr() const
Return starting address of current chunk.
Addr complete() const
Number of bytes we have already chunked up.
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
bool next()
Advance generator to next chunk.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
ProbePointArg< PacketInfo > Packet
Packet probe point.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
std::unique_ptr< TranslationGen > TranslationGenPtr
def format Nop(code, *opt_flags)
int64_t kernel_code_entry_byte_offset
uint64_t runtime_loader_kernel_symbol
uint64_t completion_signal
uint16_t workgroup_size_y
uint16_t workgroup_size_z
uint16_t workgroup_size_x
This file defines buffer classes used to handle pointer arguments in emulated syscalls.