gem5  v21.0.1.0
gpu_command_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "debug/GPUCommandProc.hh"
37 #include "debug/GPUKernelInfo.hh"
39 #include "params/GPUCommandProcessor.hh"
40 #include "sim/process.hh"
41 #include "sim/proxy_ptr.hh"
42 #include "sim/syscall_emul_buf.hh"
43 
45  : HSADevice(p), dispatcher(*p.dispatcher), driver(nullptr)
46 {
48 }
49 
67 void
68 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
69  Addr host_pkt_addr)
70 {
71  static int dynamic_task_id = 0;
72  _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
73 
78  auto *tc = sys->threads[0];
79  auto &virt_proxy = tc->getVirtProxy();
80 
90  AMDKernelCode akc;
91  virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
92  sizeof(AMDKernelCode));
93 
94  DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
95  "kernel object\n", akc.kernel_code_entry_byte_offset);
96 
97  DPRINTF(GPUCommandProc,"GPUCommandProc: Sending dispatch pkt to %lu\n",
98  (uint64_t)tc->cpuId());
99 
100 
101  Addr machine_code_addr = (Addr)disp_pkt->kernel_object
103 
104  DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
105  machine_code_addr);
106 
107  Addr kern_name_addr(0);
108  std::string kernel_name;
109 
120  virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10,
121  (uint8_t*)&kern_name_addr, 0x8);
122 
123  virt_proxy.readString(kernel_name, kern_name_addr);
124  } else {
125  kernel_name = "Blit kernel";
126  }
127 
128  DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
129 
130  HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
131  dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
132 
133  DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
134  "grid size (%dx%dx%d) kernarg addr: %#x, completion "
135  "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
136  disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
137  disp_pkt->grid_size_x, disp_pkt->grid_size_y,
138  disp_pkt->grid_size_z, disp_pkt->kernarg_address,
139  disp_pkt->completion_signal);
140 
141  DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
142  "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
143  "LDS size: %d)\n", kernel_name, task->numVectorRegs(),
144  task->numScalarRegs(), task->codeAddr(), 0, 0);
145 
146  initABI(task);
147  ++dynamic_task_id;
148 }
149 
150 uint64_t
152 {
153  Addr value_addr = getHsaSignalValueAddr(signal_handle);
154  auto tc = system()->threads[0];
155  ConstVPtr<Addr> prev_value(value_addr, tc);
156  return *prev_value;
157 }
158 
159 void
160 GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value)
161 {
162  // The signal value is aligned 8 bytes from
163  // the actual handle in the runtime
164  Addr value_addr = getHsaSignalValueAddr(signal_handle);
165  Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
166  Addr event_addr = getHsaSignalEventAddr(signal_handle);
167  DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);
168 
169  Addr *new_signal = new Addr;
170  *new_signal = signal_value;
171 
172  dmaWriteVirt(value_addr, sizeof(Addr), nullptr, new_signal, 0);
173 
174  auto tc = system()->threads[0];
175  ConstVPtr<uint64_t> mailbox_ptr(mailbox_addr, tc);
176 
177  // Notifying an event with its mailbox pointer is
178  // not supported in the current implementation. Just use
179  // mailbox pointer to distinguish between interruptible
180  // and default signal. Interruptible signal will have
181  // a valid mailbox pointer.
182  if (*mailbox_ptr != 0) {
183  // This is an interruptible signal. Now, read the
184  // event ID and directly communicate with the driver
185  // about that event notification.
186  ConstVPtr<uint32_t> event_val(event_addr, tc);
187 
188  DPRINTF(GPUCommandProc, "Calling signal wakeup event on "
189  "signal event value %d\n", *event_val);
190  signalWakeupEvent(*event_val);
191  }
192 }
193 
194 void
196 {
197  fatal_if(driver, "Should not overwrite driver.");
198  driver = hsa_driver;
199 }
200 
218 void
219 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
220  Addr host_pkt_addr)
221 {
222  hsaPP->finishPkt(raw_pkt, queue_id);
223 }
224 
232 void
233 GPUCommandProcessor::submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
234  Addr host_pkt_addr)
235 {
236  //Parse the Packet, see what it wants us to do
237  _hsa_agent_dispatch_packet_t * agent_pkt =
238  (_hsa_agent_dispatch_packet_t *)raw_pkt;
239 
240  if (agent_pkt->type == AgentCmd::Nop) {
241  DPRINTF(GPUCommandProc, "Agent Dispatch Packet NOP\n");
242  } else if (agent_pkt->type == AgentCmd::Steal) {
243  //This is where we steal the HSA Task's completion signal
244  int kid = agent_pkt->arg[0];
245  DPRINTF(GPUCommandProc,
246  "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
247  kid);
248 
249  HSAQueueEntry *task = dispatcher.hsaTask(kid);
250  uint64_t signal_addr = task->completionSignal();// + sizeof(uint64_t);
251 
252  uint64_t return_address = agent_pkt->return_address;
253  DPRINTF(GPUCommandProc, "Return Addr: %p\n",return_address);
254  //*return_address = signal_addr;
255  Addr *new_signal_addr = new Addr;
256  *new_signal_addr = (Addr)signal_addr;
257  dmaWriteVirt(return_address, sizeof(Addr), nullptr, new_signal_addr, 0);
258 
259  DPRINTF(GPUCommandProc,
260  "Agent Dispatch Packet Stealing signal handle from kid %d :" \
261  "(%x:%x) writing into %x\n",
262  kid,signal_addr,new_signal_addr,return_address);
263 
264  } else
265  {
266  panic("The agent dispatch packet provided an unknown argument in" \
267  "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
268  }
269 
270  hsaPP->finishPkt(raw_pkt, queue_id);
271 }
272 
279 void
281 {
282  dispatcher.dispatch(task);
283 }
284 
285 void
287 {
288  driver->signalWakeupEvent(event_id);
289 }
290 
297 void
299 {
300  auto *readDispIdOffEvent = new ReadDispIdOffsetDmaEvent(*this, task);
301 
302  Addr hostReadIdxPtr
304 
305  dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
306  sizeof(readDispIdOffEvent->readDispIdOffset), readDispIdOffEvent,
307  &readDispIdOffEvent->readDispIdOffset);
308 }
309 
310 System*
312 {
313  return sys;
314 }
315 
318 {
319  AddrRangeList ranges;
320  return ranges;
321 }
322 
323 void
325 {
326  _shader = shader;
327 }
328 
329 Shader*
331 {
332  return _shader;
333 }
HSADevice::hsaPP
HSAPacketProcessor * hsaPP
Definition: hsa_device.hh:116
_hsa_agent_dispatch_packet_s
Definition: hsa_packet.hh:69
GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle) override
Definition: gpu_command_processor.cc:151
GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:280
GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:109
GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:108
Shader
Definition: shader.hh:87
HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:295
ConstProxyPtr
Definition: proxy_ptr.hh:106
_hsa_dispatch_packet_s::completion_signal
uint64_t completion_signal
Definition: hsa_packet.hh:66
HSADevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Definition: hsa_device.cc:53
GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:317
_hsa_dispatch_packet_s::grid_size_z
uint32_t grid_size_z
Definition: hsa_packet.hh:60
GPUCommandProcessor::ReadDispIdOffsetDmaEvent
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:126
GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
_hsa_agent_dispatch_packet_s::type
uint16_t type
Definition: hsa_packet.hh:71
GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) override
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:68
_hsa_dispatch_packet_s::grid_size_y
uint32_t grid_size_y
Definition: hsa_packet.hh:59
HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:661
GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:62
proxy_ptr.hh
syscall_emul_buf.hh
_hsa_agent_dispatch_packet_s::return_address
uint64_t return_address
Definition: hsa_packet.hh:73
HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:134
HSADevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Definition: hsa_device.cc:46
HSAQueueEntry
Definition: hsa_queue_entry.hh:58
GPUCommandProcessor::getHsaSignalValueAddr
Addr getHsaSignalValueAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:92
_hsa_dispatch_packet_s::grid_size_x
uint32_t grid_size_x
Definition: hsa_packet.hh:58
AMDKernelCode
Definition: kernel_code.hh:81
GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:330
_hsa_dispatch_packet_s::kernarg_address
uint64_t kernarg_address
Definition: hsa_packet.hh:64
HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:170
PioDevice::sys
System * sys
Definition: io_device.hh:102
Nop
def format Nop(code, *opt_flags)
Definition: nop.cc:82
HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:146
HSAQueueDescriptor::hostReadIndexPtr
uint64_t hostReadIndexPtr
Definition: hsa_packet_processor.hh:79
GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value) override
Definition: gpu_command_processor.cc:160
GPUCommandProcessor::submitAgentDispatchPkt
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) override
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Definition: gpu_command_processor.cc:233
System
Definition: system.hh:73
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:113
GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:324
GPUCommandProcessor::driver
HSADriver * driver
Definition: gpu_command_processor.hh:110
_hsa_dispatch_packet_s::workgroup_size_z
uint16_t workgroup_size_z
Definition: hsa_packet.hh:56
process.hh
GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:69
GPUCommandProcessor::getHsaSignalEventAddr
Addr getHsaSignalEventAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:102
_hsa_dispatch_packet_s::kernel_object
uint64_t kernel_object
Definition: hsa_packet.hh:63
_hsa_dispatch_packet_s::workgroup_size_y
uint16_t workgroup_size_y
Definition: hsa_packet.hh:55
gpu_command_processor.hh
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:176
HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:140
System::threads
Threads threads
Definition: system.hh:304
_hsa_dispatch_packet_s
Definition: hsa_packet.hh:51
GPUCommandProcessor::attachDriver
void attachDriver(HSADriver *driver) override
Definition: gpu_command_processor.cc:195
GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) override
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:219
_hsa_dispatch_packet_s::workgroup_size_x
uint16_t workgroup_size_x
Definition: hsa_packet.hh:54
GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:311
GPUCommandProcessor::signalWakeupEvent
void signalWakeupEvent(uint32_t event_id)
Definition: gpu_command_processor.cc:286
AMDKernelCode::runtime_loader_kernel_symbol
uint64_t runtime_loader_kernel_symbol
Definition: kernel_code.hh:186
HSADriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: hsa_driver.cc:157
HSADriver
Definition: hsa_driver.hh:61
AMDKernelCode::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition: kernel_code.hh:89
dispatcher.hh
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
std::list< AddrRange >
GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:298
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
HSADevice
Definition: hsa_device.hh:43
GPUCommandProcessor::getHsaSignalMailboxAddr
Addr getHsaSignalMailboxAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:97
DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:206
_hsa_agent_dispatch_packet_s::arg
uint64_t arg[4]
Definition: hsa_packet.hh:74
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17