gem5  v20.1.0.0
gpu_command_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Anthony Gutierrez
34  */
35 
37 
38 #include "debug/GPUCommandProc.hh"
39 #include "debug/GPUKernelInfo.hh"
41 #include "params/GPUCommandProcessor.hh"
42 
44  : HSADevice(p), dispatcher(*p->dispatcher)
45 {
47 }
48 
66 void
67 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
68  Addr host_pkt_addr)
69 {
70  static int dynamic_task_id = 0;
71  _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
72 
77  auto *tc = sys->threads[0];
78  auto &virt_proxy = tc->getVirtProxy();
79 
89  AMDKernelCode akc;
90  virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
91  sizeof(AMDKernelCode));
92 
93  DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
94  "kernel object\n", akc.kernel_code_entry_byte_offset);
95 
96  Addr machine_code_addr = (Addr)disp_pkt->kernel_object
98 
99  DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
100  machine_code_addr);
101 
102  Addr kern_name_addr(0);
103  std::string kernel_name;
104 
115  virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10,
116  (uint8_t*)&kern_name_addr, 0x8);
117 
118  virt_proxy.readString(kernel_name, kern_name_addr);
119  } else {
120  kernel_name = "Blit kernel";
121  }
122 
123  DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
124 
125  HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
126  dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
127 
128  DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
129  "grid size (%dx%dx%d) kernarg addr: %#x, completion "
130  "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
131  disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
132  disp_pkt->grid_size_x, disp_pkt->grid_size_y,
133  disp_pkt->grid_size_z, disp_pkt->kernarg_address,
134  disp_pkt->completion_signal);
135 
136  DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
137  "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
138  "LDS size: %d)\n", kernel_name, task->numVectorRegs(),
139  task->numScalarRegs(), task->codeAddr(), 0, 0);
140 
141  initABI(task);
142  ++dynamic_task_id;
143 }
144 
162 void
163 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
164  Addr host_pkt_addr)
165 {
166  hsaPP->finishPkt(raw_pkt, queue_id);
167 }
168 
175 void
177 {
178  dispatcher.dispatch(task);
179 }
180 
187 void
189 {
190  auto *readDispIdOffEvent = new ReadDispIdOffsetDmaEvent(*this, task);
191 
192  Addr hostReadIdxPtr
194 
195  dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
196  sizeof(readDispIdOffEvent->readDispIdOffset), readDispIdOffEvent,
197  &readDispIdOffEvent->readDispIdOffset);
198 }
199 
200 System*
202 {
203  return sys;
204 }
205 
208 {
209  AddrRangeList ranges;
210  return ranges;
211 }
212 
213 void
215 {
216  _shader = shader;
217 }
218 
219 Shader*
221 {
222  return _shader;
223 }
224 
226 GPUCommandProcessorParams::create()
227 {
228  return new GPUCommandProcessor(this);
229 }
HSADevice::hsaPP
HSAPacketProcessor * hsaPP
Definition: hsa_device.hh:92
GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:176
GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:81
GPUCommandProcessor
Definition: gpu_command_processor.hh:57
GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:80
Shader
Definition: shader.hh:87
HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:295
_hsa_dispatch_packet_s::completion_signal
uint64_t completion_signal
Definition: hsa_packet.hh:68
GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:207
_hsa_dispatch_packet_s::grid_size_z
uint32_t grid_size_z
Definition: hsa_packet.hh:62
GPUCommandProcessor::ReadDispIdOffsetDmaEvent
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:97
GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) override
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:67
_hsa_dispatch_packet_s::grid_size_y
uint32_t grid_size_y
Definition: hsa_packet.hh:61
HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:667
HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:136
HSADevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Definition: hsa_device.cc:51
HSAQueueEntry
Definition: hsa_queue_entry.hh:60
_hsa_dispatch_packet_s::grid_size_x
uint32_t grid_size_x
Definition: hsa_packet.hh:60
AMDKernelCode
Definition: kernel_code.hh:83
GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:220
_hsa_dispatch_packet_s::kernarg_address
uint64_t kernarg_address
Definition: hsa_packet.hh:66
PioDevice::sys
System * sys
Definition: io_device.hh:102
HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:148
HSAQueueDescriptor::hostReadIndexPtr
uint64_t hostReadIndexPtr
Definition: hsa_packet_processor.hh:79
System
Definition: system.hh:73
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:127
GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:214
_hsa_dispatch_packet_s::workgroup_size_z
uint16_t workgroup_size_z
Definition: hsa_packet.hh:58
GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:83
_hsa_dispatch_packet_s::kernel_object
uint64_t kernel_object
Definition: hsa_packet.hh:65
_hsa_dispatch_packet_s::workgroup_size_y
uint16_t workgroup_size_y
Definition: hsa_packet.hh:57
gpu_command_processor.hh
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:178
HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:142
System::threads
Threads threads
Definition: system.hh:309
_hsa_dispatch_packet_s
Definition: hsa_packet.hh:53
GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) override
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:163
_hsa_dispatch_packet_s::workgroup_size_x
uint16_t workgroup_size_x
Definition: hsa_packet.hh:56
GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:201
AMDKernelCode::runtime_loader_kernel_symbol
uint64_t runtime_loader_kernel_symbol
Definition: kernel_code.hh:188
AMDKernelCode::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition: kernel_code.hh:91
dispatcher.hh
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
std::list< AddrRange >
GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:188
HSADevice
Definition: hsa_device.hh:46
DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:171

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17