gem5  v22.1.0.0
gpu_command_processor.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
43 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
45 
46 #include <cstdint>
47 #include <functional>
48 
49 #include "base/logging.hh"
50 #include "base/trace.hh"
51 #include "base/types.hh"
52 #include "debug/GPUCommandProc.hh"
53 #include "dev/dma_virt_device.hh"
55 #include "dev/hsa/hsa_signal.hh"
59 #include "params/GPUCommandProcessor.hh"
60 #include "sim/full_system.hh"
61 
62 namespace gem5
63 {
64 
65 struct GPUCommandProcessorParams;
66 class GPUComputeDriver;
67 class GPUDispatcher;
68 class Shader;
69 
71 {
72  public:
73  typedef GPUCommandProcessorParams Params;
74  typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
75 
76  GPUCommandProcessor() = delete;
78 
81 
82  void setGPUDevice(AMDGPUDevice *gpu_device);
83  void setShader(Shader *shader);
84  Shader* shader();
86 
87  enum AgentCmd
88  {
89  Nop = 0,
90  Steal = 1
91  };
92 
93  void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
94  Addr host_pkt_addr);
95  void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
96  Addr host_pkt_addr);
97  void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
98  Addr host_pkt_addr);
100 
101  void dispatchPkt(HSAQueueEntry *task);
102  void signalWakeupEvent(uint32_t event_id);
103 
104  Tick write(PacketPtr pkt) override { return 0; }
105  Tick read(PacketPtr pkt) override { return 0; }
106  AddrRangeList getAddrRanges() const override;
107  System *system();
108 
109  void updateHsaSignal(Addr signal_handle, uint64_t signal_value,
110  HsaSignalCallbackFunction function =
111  [] (const uint64_t &) { });
112 
113  uint64_t functionalReadHsaSignal(Addr signal_handle);
114 
116  {
117  return signal_handle + offsetof(amd_signal_t, value);
118  }
119 
121  {
122  return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
123  }
124 
126  {
127  return signal_handle + offsetof(amd_signal_t, event_id);
128  }
129 
130  private:
136 
137  // Typedefing dmaRead and dmaWrite function pointer
138  typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
139  void initABI(HSAQueueEntry *task);
141  TranslationGenPtr translate(Addr vaddr, Addr size) override;
142 
155  void
157  const uint32_t &readDispIdOffset)
158  {
167  task->queueId())->hostReadIndexPtr - readDispIdOffset;
168 
173  auto *mqdDmaEvent = new DmaVirtCallback<int>(
174  [ = ] (const int &) { MQDDmaEvent(task); });
175 
177  sizeof(_amd_queue_t), mqdDmaEvent, &task->amdQueue);
178  }
179 
187  void
189  {
202  if (task->privMemPerItem() >
204  // TODO: Raising this signal will potentially nuke scratch
205  // space for in-flight kernels that were launched from this
206  // queue. We need to drain all kernels and deschedule the
207  // queue before raising this signal. For now, just assert if
208  // there are any in-flight kernels and tell the user that this
209  // feature still needs to be implemented.
210  fatal_if(hsaPP->inFlightPkts(task->queueId()) > 1,
211  "Needed more scratch, but kernels are in flight for "
212  "this queue and it is unsafe to reallocate scratch. "
213  "We need to implement additional intelligence in the "
214  "hardware scheduling logic to support CP-driven "
215  "queue draining and scheduling.");
216  DPRINTF(GPUCommandProc, "Not enough scratch space to launch "
217  "kernel (%x available, %x requested bytes per "
218  "workitem). Asking host runtime to allocate more "
219  "space.\n",
221  task->privMemPerItem());
222 
224  [ = ] (const uint64_t &dma_buffer)
225  { WaitScratchDmaEvent(task, dma_buffer); });
226 
227  } else {
228  DPRINTF(GPUCommandProc, "Sufficient scratch space, launching "
229  "kernel (%x available, %x requested bytes per "
230  "workitem).\n",
232  task->privMemPerItem());
233  dispatchPkt(task);
234  }
235  }
236 
241  void
242  WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
243  {
244  if (dmaBuffer == 0) {
245  DPRINTF(GPUCommandProc, "Host scratch allocation complete. "
246  "Attempting to re-read MQD\n");
255  auto cb = new DmaVirtCallback<int>(
256  [ = ] (const int &) { MQDDmaEvent(task); });
257 
258  dmaReadVirt(task->hostAMDQueueAddr, sizeof(_amd_queue_t), cb,
259  &task->amdQueue);
260  } else {
265  Addr value_addr = getHsaSignalValueAddr(
267  DPRINTF(GPUCommandProc, "Polling queue inactive signal at "
268  "%p.\n", value_addr);
269  auto cb = new DmaVirtCallback<uint64_t>(
270  [ = ] (const uint64_t &dma_buffer)
271  { WaitScratchDmaEvent(task, dma_buffer); } );
272 
280  dmaReadVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 1e9);
281  }
282  }
283 };
284 
285 } // namespace gem5
286 
287 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#define DPRINTF(x,...)
Definition: trace.hh:186
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
DmaDeviceParams Params
Definition: dma_device.hh:209
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
Addr getHsaSignalMailboxAddr(Addr signal_handle)
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
HSAPacketProcessor & hsaPacketProc()
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Addr getHsaSignalValueAddr(Addr signal_handle)
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
GPUCommandProcessorParams Params
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
uint64_t inFlightPkts(uint32_t queId)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
uint32_t queueId() const
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
int privMemPerItem() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
Nop class.
Definition: nop.hh:49
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 54 > p
Definition: pagetable.hh:70
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
uint16_t RequestorID
Definition: request.hh:95
std::unique_ptr< TranslationGen > TranslationGenPtr
_hsa_signal_t queue_inactive_signal
Definition: hsa_queue.hh:87
uint32_t compute_tmpring_size_wavesize
Definition: hsa_queue.hh:79

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1