gem5  v21.2.1.1
gpu_command_processor.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
43 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
45 
46 #include <cstdint>
47 #include <functional>
48 
49 #include "base/logging.hh"
50 #include "base/trace.hh"
51 #include "base/types.hh"
52 #include "debug/GPUCommandProc.hh"
53 #include "dev/dma_virt_device.hh"
55 #include "dev/hsa/hsa_signal.hh"
59 #include "params/GPUCommandProcessor.hh"
60 
61 namespace gem5
62 {
63 
64 struct GPUCommandProcessorParams;
65 class GPUComputeDriver;
66 class GPUDispatcher;
67 class Shader;
68 
70 {
71  public:
72  typedef GPUCommandProcessorParams Params;
73  typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
74 
75  GPUCommandProcessor() = delete;
77 
79 
80  void setShader(Shader *shader);
81  Shader* shader();
83 
84  enum AgentCmd
85  {
86  Nop = 0,
87  Steal = 1
88  };
89 
90  void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
91  Addr host_pkt_addr);
92  void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
93  Addr host_pkt_addr);
94  void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
95  Addr host_pkt_addr);
97 
98  void dispatchPkt(HSAQueueEntry *task);
99  void signalWakeupEvent(uint32_t event_id);
100 
101  Tick write(PacketPtr pkt) override { return 0; }
102  Tick read(PacketPtr pkt) override { return 0; }
103  AddrRangeList getAddrRanges() const override;
104  System *system();
105 
106  void updateHsaSignal(Addr signal_handle, uint64_t signal_value,
107  HsaSignalCallbackFunction function =
108  [] (const uint64_t &) { });
109 
110  uint64_t functionalReadHsaSignal(Addr signal_handle);
111 
113  {
114  return signal_handle + offsetof(amd_signal_t, value);
115  }
116 
118  {
119  return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
120  }
121 
123  {
124  return signal_handle + offsetof(amd_signal_t, event_id);
125  }
126 
127  private:
131 
132  // Typedefing dmaRead and dmaWrite function pointer
133  typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
134  void initABI(HSAQueueEntry *task);
136  TranslationGenPtr translate(Addr vaddr, Addr size) override;
137 
150  void
152  const uint32_t &readDispIdOffset)
153  {
162  task->queueId())->hostReadIndexPtr - readDispIdOffset;
163 
168  auto *mqdDmaEvent = new DmaVirtCallback<int>(
169  [ = ] (const int &) { MQDDmaEvent(task); });
170 
172  sizeof(_amd_queue_t), mqdDmaEvent, &task->amdQueue);
173  }
174 
182  void
184  {
197  if (task->privMemPerItem() >
199  // TODO: Raising this signal will potentially nuke scratch
200  // space for in-flight kernels that were launched from this
201  // queue. We need to drain all kernels and deschedule the
202  // queue before raising this signal. For now, just assert if
203  // there are any in-flight kernels and tell the user that this
204  // feature still needs to be implemented.
205  fatal_if(hsaPP->inFlightPkts(task->queueId()) > 1,
206  "Needed more scratch, but kernels are in flight for "
207  "this queue and it is unsafe to reallocate scratch. "
208  "We need to implement additional intelligence in the "
209  "hardware scheduling logic to support CP-driven "
210  "queue draining and scheduling.");
211  DPRINTF(GPUCommandProc, "Not enough scratch space to launch "
212  "kernel (%x available, %x requested bytes per "
213  "workitem). Asking host runtime to allocate more "
214  "space.\n",
216  task->privMemPerItem());
217 
219  [ = ] (const uint64_t &dma_buffer)
220  { WaitScratchDmaEvent(task, dma_buffer); });
221 
222  } else {
223  DPRINTF(GPUCommandProc, "Sufficient scratch space, launching "
224  "kernel (%x available, %x requested bytes per "
225  "workitem).\n",
227  task->privMemPerItem());
228  dispatchPkt(task);
229  }
230  }
231 
236  void
237  WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
238  {
239  if (dmaBuffer == 0) {
240  DPRINTF(GPUCommandProc, "Host scratch allocation complete. "
241  "Attempting to re-read MQD\n");
250  auto cb = new DmaVirtCallback<int>(
251  [ = ] (const int &) { MQDDmaEvent(task); });
252 
253  dmaReadVirt(task->hostAMDQueueAddr, sizeof(_amd_queue_t), cb,
254  &task->amdQueue);
255  } else {
260  Addr value_addr = getHsaSignalValueAddr(
262  DPRINTF(GPUCommandProc, "Polling queue inactive signal at "
263  "%p.\n", value_addr);
264  auto cb = new DmaVirtCallback<uint64_t>(
265  [ = ] (const uint64_t &dma_buffer)
266  { WaitScratchDmaEvent(task, dma_buffer); } );
267  dmaReadVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer);
268  }
269  }
270 };
271 
272 } // namespace gem5
273 
274 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
gem5::_amd_queue_t
Definition: hsa_queue.hh:64
gem5::GPUCommandProcessor::Params
GPUCommandProcessorParams Params
Definition: gpu_command_processor.hh:72
gem5::GPUCommandProcessor::getHsaSignalEventAddr
Addr getHsaSignalEventAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:122
hsa_queue_entry.hh
gem5::GPUCommandProcessor::read
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
Definition: gpu_command_processor.hh:102
gem5::GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:366
gem5::GPUCommandProcessor::hsaPP
HSAPacketProcessor * hsaPP
Definition: gpu_command_processor.hh:135
gem5::SparcISA::Nop
Nop class.
Definition: nop.hh:48
gem5::HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition: hsa_queue_entry.hh:194
gem5::GPUCommandProcessor::signalWakeupEvent
void signalWakeupEvent(uint32_t event_id)
Definition: gpu_command_processor.cc:321
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:221
gem5::HSAQueueDescriptor::hostReadIndexPtr
uint64_t hostReadIndexPtr
Definition: hsa_packet_processor.hh:84
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
hsa_packet_processor.hh
gem5::GPUCommandProcessor::DmaFnPtr
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
Definition: gpu_command_processor.hh:133
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
gem5::HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:323
gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:147
gem5::GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:353
gem5::GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
gem5::HSAPacketProcessor::inFlightPkts
uint64_t inFlightPkts(uint32_t queId)
Definition: hsa_packet_processor.hh:334
gem5::GPUCommandProcessor::WaitScratchDmaEvent
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
Definition: gpu_command_processor.hh:237
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:61
gem5::GPUCommandProcessor::getHsaSignalMailboxAddr
Addr getHsaSignalMailboxAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:117
gem5::GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:95
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:186
gem5::GPUCommandProcessor::MQDDmaEvent
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
Definition: gpu_command_processor.hh:183
gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:307
gem5::GPUCommandProcessor::AgentCmd
AgentCmd
Definition: gpu_command_processor.hh:84
gem5::System
Definition: system.hh:75
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:69
gem5::GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:315
gem5::GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:360
gem5::HSAPacketProcessor
Definition: hsa_packet_processor.hh:249
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUCommandProcessor::_driver
GPUComputeDriver * _driver
Definition: gpu_command_processor.hh:130
gem5::DmaDevice
Definition: dma_device.hh:203
gem5::GPUCommandProcessor::write
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
Definition: gpu_command_processor.hh:101
gem5::GPUCommandProcessor::submitAgentDispatchPkt
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Definition: gpu_command_processor.cc:268
gem5::DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:209
gem5::GPUCommandProcessor::getHsaSignalValueAddr
Addr getHsaSignalValueAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:112
gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:300
hsa_signal.hh
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:347
gem5::_hsa_signal_t::handle
uint64_t handle
Definition: hsa_queue.hh:48
gem5::GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:128
types.hh
gem5::GPUCommandProcessor::ReadDispIdOffsetDmaEvent
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:151
gem5::GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:254
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:177
gem5::GPUComputeDriver
Definition: gpu_compute_driver.hh:62
logging.hh
dma_virt_device.hh
trace.hh
gem5::GPUDispatcher
Definition: dispatcher.hh:62
dispatcher.hh
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gpu_compute_driver.hh
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::_amd_queue_t::queue_inactive_signal
_hsa_signal_t queue_inactive_signal
Definition: hsa_queue.hh:87
std::list< AddrRange >
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
gem5::_amd_queue_t::compute_tmpring_size_wavesize
uint32_t compute_tmpring_size_wavesize
Definition: hsa_queue.hh:79
gem5::amd_signal_s
Definition: hsa_signal.hh:50
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:129
gem5::GPUCommandProcessor::Steal
@ Steal
Definition: gpu_command_processor.hh:87
gem5::GPUCommandProcessor::HsaSignalCallbackFunction
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
Definition: gpu_command_processor.hh:73
gem5::GPUCommandProcessor::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
Definition: gpu_command_processor.cc:67
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
gem5::GPUCommandProcessor::driver
GPUComputeDriver * driver()
Definition: gpu_command_processor.cc:231
gem5::Shader
Definition: shader.hh:82
gem5::GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:333

Generated on Wed May 4 2022 12:13:58 for gem5 by doxygen 1.8.17