gem5  v21.1.0.2
gpu_command_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <cassert>
37 
38 #include "base/chunk_generator.hh"
39 #include "debug/GPUCommandProc.hh"
40 #include "debug/GPUKernelInfo.hh"
42 #include "params/GPUCommandProcessor.hh"
43 #include "sim/process.hh"
44 #include "sim/proxy_ptr.hh"
45 #include "sim/syscall_emul_buf.hh"
46 
47 namespace gem5
48 {
49 
51  : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
52  hsaPP(p.hsapp)
53 {
54  assert(hsaPP);
55  hsaPP->setDevice(this);
57 }
58 
61 {
62  return *hsaPP;
63 }
64 
65 void
67 {
73  auto process = sys->threads[0]->getProcessPtr();
74 
75  if (!process->pTable->translate(vaddr, paddr)) {
76  fatal("failed translation: vaddr 0x%x\n", vaddr);
77  }
78 }
79 
97 void
98 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
99  Addr host_pkt_addr)
100 {
101  static int dynamic_task_id = 0;
102  _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
103 
108  auto *tc = sys->threads[0];
109  auto &virt_proxy = tc->getVirtProxy();
110 
120  AMDKernelCode akc;
121  virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
122  sizeof(AMDKernelCode));
123 
124  DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
125  "kernel object\n", akc.kernel_code_entry_byte_offset);
126 
127  DPRINTF(GPUCommandProc,"GPUCommandProc: Sending dispatch pkt to %lu\n",
128  (uint64_t)tc->cpuId());
129 
130 
131  Addr machine_code_addr = (Addr)disp_pkt->kernel_object
133 
134  DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
135  machine_code_addr);
136 
137  std::string kernel_name;
138 
149  kernel_name = "Some kernel";
150  } else {
151  kernel_name = "Blit kernel";
152  }
153 
154  DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
155 
156  HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
157  dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
158 
159  DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
160  "grid size (%dx%dx%d) kernarg addr: %#x, completion "
161  "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
162  disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
163  disp_pkt->grid_size_x, disp_pkt->grid_size_y,
164  disp_pkt->grid_size_z, disp_pkt->kernarg_address,
165  disp_pkt->completion_signal);
166 
167  DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
168  "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
169  "LDS size: %d)\n", kernel_name, task->numVectorRegs(),
170  task->numScalarRegs(), task->codeAddr(), 0, 0);
171 
172  initABI(task);
173  ++dynamic_task_id;
174 }
175 
176 uint64_t
178 {
179  Addr value_addr = getHsaSignalValueAddr(signal_handle);
180  auto tc = system()->threads[0];
181  ConstVPtr<Addr> prev_value(value_addr, tc);
182  return *prev_value;
183 }
184 
185 void
186 GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value,
187  HsaSignalCallbackFunction function)
188 {
189  // The signal value is aligned 8 bytes from
190  // the actual handle in the runtime
191  Addr value_addr = getHsaSignalValueAddr(signal_handle);
192  Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
193  Addr event_addr = getHsaSignalEventAddr(signal_handle);
194  DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);
195 
196  auto cb = new DmaVirtCallback<uint64_t>(function, signal_value);
197 
198  dmaWriteVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
199 
200  auto tc = system()->threads[0];
201  ConstVPtr<uint64_t> mailbox_ptr(mailbox_addr, tc);
202 
203  // Notifying an event with its mailbox pointer is
204  // not supported in the current implementation. Just use
205  // mailbox pointer to distinguish between interruptible
206  // and default signal. Interruptible signal will have
207  // a valid mailbox pointer.
208  if (*mailbox_ptr != 0) {
209  // This is an interruptible signal. Now, read the
210  // event ID and directly communicate with the driver
211  // about that event notification.
212  ConstVPtr<uint32_t> event_val(event_addr, tc);
213 
214  DPRINTF(GPUCommandProc, "Calling signal wakeup event on "
215  "signal event value %d\n", *event_val);
216  signalWakeupEvent(*event_val);
217  }
218 }
219 
220 void
222 {
223  fatal_if(_driver, "Should not overwrite driver.");
224  // TODO: GPU Driver inheritance hierarchy doesn't really make sense.
225  // Should get rid of the base class.
226  _driver = gpu_driver;
227  assert(_driver);
228 }
229 
232 {
233  return _driver;
234 }
235 
253 void
254 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
255  Addr host_pkt_addr)
256 {
257  hsaPP->finishPkt(raw_pkt, queue_id);
258 }
259 
267 void
268 GPUCommandProcessor::submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
269  Addr host_pkt_addr)
270 {
271  //Parse the Packet, see what it wants us to do
272  _hsa_agent_dispatch_packet_t * agent_pkt =
273  (_hsa_agent_dispatch_packet_t *)raw_pkt;
274 
275  if (agent_pkt->type == AgentCmd::Nop) {
276  DPRINTF(GPUCommandProc, "Agent Dispatch Packet NOP\n");
277  } else if (agent_pkt->type == AgentCmd::Steal) {
278  //This is where we steal the HSA Task's completion signal
279  int kid = agent_pkt->arg[0];
280  DPRINTF(GPUCommandProc,
281  "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
282  kid);
283 
284  HSAQueueEntry *task = dispatcher.hsaTask(kid);
285  uint64_t signal_addr = task->completionSignal();// + sizeof(uint64_t);
286 
287  uint64_t return_address = agent_pkt->return_address;
288  DPRINTF(GPUCommandProc, "Return Addr: %p\n",return_address);
289  //*return_address = signal_addr;
290  Addr *new_signal_addr = new Addr;
291  *new_signal_addr = (Addr)signal_addr;
292  dmaWriteVirt(return_address, sizeof(Addr), nullptr, new_signal_addr, 0);
293 
294  DPRINTF(GPUCommandProc,
295  "Agent Dispatch Packet Stealing signal handle from kid %d :" \
296  "(%x:%x) writing into %x\n",
297  kid,signal_addr,new_signal_addr,return_address);
298 
299  } else
300  {
301  panic("The agent dispatch packet provided an unknown argument in" \
302  "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
303  }
304 
305  hsaPP->finishPkt(raw_pkt, queue_id);
306 }
307 
314 void
316 {
317  dispatcher.dispatch(task);
318 }
319 
320 void
322 {
323  _driver->signalWakeupEvent(event_id);
324 }
325 
332 void
334 {
335  auto cb = new DmaVirtCallback<uint32_t>(
336  [ = ] (const uint32_t &readDispIdOffset)
337  { ReadDispIdOffsetDmaEvent(task, readDispIdOffset); }, 0);
338 
339  Addr hostReadIdxPtr
340  = hsaPP->getQueueDesc(task->queueId())->hostReadIndexPtr;
341 
342  dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
343  sizeof(uint32_t), cb, &cb->dmaBuffer);
344 }
345 
346 System*
348 {
349  return sys;
350 }
351 
354 {
355  AddrRangeList ranges;
356  return ranges;
357 }
358 
359 void
361 {
362  _shader = shader;
363 }
364 
365 Shader*
367 {
368  return _shader;
369 }
370 
371 } // namespace gem5
gem5::_hsa_agent_dispatch_packet_t
Definition: hsa_packet.hh:74
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
gem5::_hsa_agent_dispatch_packet_t::arg
uint64_t arg[4]
Definition: hsa_packet.hh:80
gem5::GPUCommandProcessor::getHsaSignalEventAddr
Addr getHsaSignalEventAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:124
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:184
gem5::AMDKernelCode::runtime_loader_kernel_symbol
uint64_t runtime_loader_kernel_symbol
Definition: kernel_code.hh:189
gem5::ConstProxyPtr
Definition: proxy_ptr.hh:109
gem5::GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:366
gem5::GPUCommandProcessor::hsaPP
HSAPacketProcessor * hsaPP
Definition: gpu_command_processor.hh:137
gem5::_hsa_dispatch_packet_t::workgroup_size_x
uint16_t workgroup_size_x
Definition: hsa_packet.hh:59
gem5::GPUCommandProcessor::signalWakeupEvent
void signalWakeupEvent(uint32_t event_id)
Definition: gpu_command_processor.cc:321
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:221
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:55
gem5::HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:173
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
gem5::HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:298
gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:149
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:65
gem5::GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:353
gem5::GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
proxy_ptr.hh
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:60
syscall_emul_buf.hh
gem5::GPUCommandProcessor::getHsaSignalMailboxAddr
Addr getHsaSignalMailboxAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:119
gem5::HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:143
gem5::GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:98
gem5::DmaVirtDevice::DmaVirtCallback::dmaBuffer
T dmaBuffer
Definition: dma_virt_device.hh:66
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:186
Nop
def format Nop(code, *opt_flags)
Definition: nop.cc:82
gem5::System
Definition: system.hh:77
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:116
gem5::HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:137
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:630
gem5::GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:315
gem5::GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:360
gem5::HSAPacketProcessor
Definition: hsa_packet_processor.hh:224
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::_hsa_dispatch_packet_t::grid_size_x
uint32_t grid_size_x
Definition: hsa_packet.hh:63
gem5::PioDevice::sys
System * sys
Definition: io_device.hh:105
gem5::_hsa_dispatch_packet_t::kernel_object
uint64_t kernel_object
Definition: hsa_packet.hh:68
gem5::GPUCommandProcessor::_driver
GPUComputeDriver * _driver
Definition: gpu_command_processor.hh:132
process.hh
gem5::GPUCommandProcessor::submitAgentDispatchPkt
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Definition: gpu_command_processor.cc:268
gem5::_hsa_dispatch_packet_t::workgroup_size_z
uint16_t workgroup_size_z
Definition: hsa_packet.hh:61
gem5::DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:209
gem5::HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:179
gpu_command_processor.hh
gem5::_hsa_dispatch_packet_t
Definition: hsa_packet.hh:55
gem5::GPUCommandProcessor::getHsaSignalValueAddr
Addr getHsaSignalValueAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:114
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:72
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:52
gem5::_hsa_agent_dispatch_packet_t::type
uint16_t type
Definition: hsa_packet.hh:77
gem5::_hsa_dispatch_packet_t::kernarg_address
uint64_t kernarg_address
Definition: hsa_packet.hh:69
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:347
gem5::_hsa_dispatch_packet_t::grid_size_y
uint32_t grid_size_y
Definition: hsa_packet.hh:64
gem5::_hsa_dispatch_packet_t::grid_size_z
uint32_t grid_size_z
Definition: hsa_packet.hh:65
gem5::_hsa_dispatch_packet_t::workgroup_size_y
uint16_t workgroup_size_y
Definition: hsa_packet.hh:60
gem5::GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:130
gem5::HSAPacketProcessor::setDevice
void setDevice(GPUCommandProcessor *dev)
Definition: hsa_packet_processor.cc:605
gem5::System::threads
Threads threads
Definition: system.hh:316
gem5::GPUCommandProcessor::ReadDispIdOffsetDmaEvent
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:153
gem5::GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:254
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:177
gem5::GPUComputeDriver
Definition: gpu_compute_driver.hh:64
chunk_generator.hh
gem5::AMDKernelCode::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition: kernel_code.hh:92
dispatcher.hh
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:42
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::GPUCommandProcessor::translateOrDie
void translateOrDie(Addr vaddr, Addr &paddr) override
Function used to translate from virtual to physical addresses.
Definition: gpu_command_processor.cc:66
std::list< AddrRange >
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::_hsa_agent_dispatch_packet_t::return_address
uint64_t return_address
Definition: hsa_packet.hh:79
gem5::GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:131
gem5::_hsa_dispatch_packet_t::completion_signal
uint64_t completion_signal
Definition: hsa_packet.hh:71
gem5::GPUCommandProcessor::HsaSignalCallbackFunction
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
Definition: gpu_command_processor.hh:75
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::GPUCommandProcessor::driver
GPUComputeDriver * driver()
Definition: gpu_command_processor.cc:231
gem5::AMDKernelCode
Definition: kernel_code.hh:84
gem5::Shader
Definition: shader.hh:84
gem5::GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:333

Generated on Tue Sep 21 2021 12:25:23 for gem5 by doxygen 1.8.17