gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
gpu_command_processor.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
42
43#ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
44#define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
45
46#include <cstdint>
47#include <functional>
48
50#include "base/logging.hh"
51#include "base/trace.hh"
52#include "base/types.hh"
53#include "debug/GPUCommandProc.hh"
56#include "dev/hsa/hsa_signal.hh"
60#include "params/GPUCommandProcessor.hh"
61#include "sim/full_system.hh"
62
63namespace gem5
64{
65
66struct GPUCommandProcessorParams;
68class GPUDispatcher;
69class Shader;
70
72{
73 public:
74 typedef GPUCommandProcessorParams Params;
75 typedef std::function<void(const uint64_t &)> HsaSignalCallbackFunction;
76
79
82 GfxVersion getGfxVersion() const;
83
84 void setGPUDevice(AMDGPUDevice *gpu_device);
85 void setShader(Shader *shader);
86 Shader* shader();
88
90 {
91 AMDKernelCode *akc = nullptr;
92 void *raw_pkt = nullptr;
93 uint32_t queue_id = 0;
95 PacketPtr readPkt = nullptr;
96 HSAQueueEntry *task = nullptr;
97 };
98
100
102 {
103 Nop = 0,
105 };
106
107 void performTimingRead(PacketPtr pkt, int dispType);
108
109 void completeTimingRead(int dispType);
110
111 void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
112 Addr host_pkt_addr);
113 void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
114 Addr host_pkt_addr);
115 void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
116 Addr host_pkt_addr);
118
119 void dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
120 uint32_t queue_id, Addr host_pkt_addr);
121 void dispatchPkt(HSAQueueEntry *task);
122 void signalWakeupEvent(uint32_t event_id);
123
124 Tick write(PacketPtr pkt) override { return 0; }
125 Tick read(PacketPtr pkt) override { return 0; }
126 AddrRangeList getAddrRanges() const override;
127 System *system();
128
129 void sendCompletionSignal(Addr signal_handle);
130 void updateHsaSignal(Addr signal_handle, uint64_t signal_value,
132 [] (const uint64_t &) { });
133 void updateHsaSignalAsync(Addr signal_handle, int64_t diff);
134 void updateHsaSignalData(Addr value_addr, int64_t diff,
135 uint64_t *prev_value);
136 void updateHsaSignalDone(uint64_t *signal_value);
137 void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value);
138 void updateHsaEventData(Addr signal_handle, uint64_t *event_value);
139 void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value);
140
141 uint64_t functionalReadHsaSignal(Addr signal_handle);
142
144 {
145 return signal_handle + offsetof(amd_signal_t, value);
146 }
147
149 {
150 return signal_handle + offsetof(amd_signal_t, event_mailbox_ptr);
151 }
152
154 {
155 return signal_handle + offsetof(amd_signal_t, event_id);
156 }
157
158 private:
164
165 // Typedefing dmaRead and dmaWrite function pointer
166 typedef void (DmaDevice::*DmaFnPtr)(Addr, int, Event*, uint8_t*, Tick);
167 void initABI(HSAQueueEntry *task);
168 void sanityCheckAKC(AMDKernelCode *akc);
170 TranslationGenPtr translate(Addr vaddr, Addr size) override;
171
172 // Running counter of dispatched tasks
174
175 // Running counter of dispatched user (non-blit) kernels
177
178 // Skip all user (non-blit) kernels until reaching this kernel
180
181 // Keep track of start times for task dispatches.
182 std::unordered_map<Addr, Tick> dispatchStartTime;
183
196 void
198 const uint32_t &readDispIdOffset)
199 {
207 task->hostAMDQueueAddr = hsaPP->getQueueDesc(
208 task->queueId())->hostReadIndexPtr - readDispIdOffset;
209
214 auto *mqdDmaEvent = new DmaVirtCallback<int>(
215 [ = ] (const int &) { MQDDmaEvent(task); });
216
218 sizeof(_amd_queue_t), mqdDmaEvent, &task->amdQueue);
219 }
220
228 void
230 {
245 // TODO: Raising this signal will potentially nuke scratch
246 // space for in-flight kernels that were launched from this
247 // queue. We need to drain all kernels and deschedule the
248 // queue before raising this signal. For now, just assert if
249 // there are any in-flight kernels and tell the user that this
250 // feature still needs to be implemented.
251 fatal_if(hsaPP->inFlightPkts(task->queueId()) > 1,
252 "Needed more scratch, but kernels are in flight for "
253 "this queue and it is unsafe to reallocate scratch. "
254 "We need to implement additional intelligence in the "
255 "hardware scheduling logic to support CP-driven "
256 "queue draining and scheduling.");
257 DPRINTF(GPUCommandProc, "Not enough scratch space to launch "
258 "kernel (%x available, %x requested bytes per "
259 "workitem). Asking host runtime to allocate more "
260 "space.\n",
262 task->privMemPerItem());
263
265 [ = ] (const uint64_t &dma_buffer)
266 { WaitScratchDmaEvent(task, dma_buffer); });
267
268 } else {
269 DPRINTF(GPUCommandProc, "Sufficient scratch space, launching "
270 "kernel (%x available, %x requested bytes per "
271 "workitem).\n",
273 task->privMemPerItem());
274 dispatchPkt(task);
275 }
276 }
277
282 void
283 WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
284 {
285 if (dmaBuffer == 0) {
286 DPRINTF(GPUCommandProc, "Host scratch allocation complete. "
287 "Attempting to re-read MQD\n");
296 auto cb = new DmaVirtCallback<int>(
297 [ = ] (const int &) { MQDDmaEvent(task); });
298
299 dmaReadVirt(task->hostAMDQueueAddr, sizeof(_amd_queue_t), cb,
300 &task->amdQueue);
301 } else {
306 Addr value_addr = getHsaSignalValueAddr(
308 DPRINTF(GPUCommandProc, "Polling queue inactive signal at "
309 "%p.\n", value_addr);
310 auto cb = new DmaVirtCallback<uint64_t>(
311 [ = ] (const uint64_t &dma_buffer)
312 { WaitScratchDmaEvent(task, dma_buffer); } );
313
321 dmaReadVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 1e9);
322 }
323 }
324
325 void readPreload(AMDKernelCode *akc, HSAQueueEntry *task);
326 void initPreload(AMDKernelCode *akc, HSAQueueEntry *task);
327};
328
329} // namespace gem5
330
331#endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
#define DPRINTF(x,...)
Definition trace.hh:209
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void sendCompletionSignal(Addr signal_handle)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
Addr getHsaSignalMailboxAddr(Addr signal_handle)
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
void setGPUDevice(AMDGPUDevice *gpu_device)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void signalWakeupEvent(uint32_t event_id)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
void updateHsaSignalDone(uint64_t *signal_value)
HSAPacketProcessor & hsaPacketProc()
void performTimingRead(PacketPtr pkt, int dispType)
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
std::list< struct KernelDispatchData > kernelDispatchList
Addr getHsaSignalValueAddr(Addr signal_handle)
void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value)
void dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
void MQDDmaEvent(HSAQueueEntry *task)
Perform a DMA read of the MQD that corresponds to a hardware queue descriptor (HQD).
void attachDriver(GPUComputeDriver *driver)
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
void updateHsaSignalAsync(Addr signal_handle, int64_t diff)
std::unordered_map< Addr, Tick > dispatchStartTime
Addr getHsaSignalEventAddr(Addr signal_handle)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
void sanityCheckAKC(AMDKernelCode *akc)
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
void initPreload(AMDKernelCode *akc, HSAQueueEntry *task)
GPUCommandProcessorParams Params
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
void readPreload(AMDKernelCode *akc, HSAQueueEntry *task)
void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value)
void updateHsaEventData(Addr signal_handle, uint64_t *event_value)
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
uint64_t functionalReadHsaSignal(Addr signal_handle)
void WaitScratchDmaEvent(HSAQueueEntry *task, const uint64_t &dmaBuffer)
Poll on queue_inactive signal until the runtime can get around to taking care of our lack of scratch ...
void updateHsaSignalData(Addr value_addr, int64_t diff, uint64_t *prev_value)
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
uint32_t queueId() const
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
STL list class.
Definition stl.hh:51
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
The GPUComputeDriver implements an HSADriver for an HSA AMD GPU agent.
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
const int NumVecElemPerVecReg(64)
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED AMDKernelCode
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
uint16_t RequestorID
Definition request.hh:95
Packet * PacketPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::amd_signal_s amd_signal_t
_hsa_signal_t queue_inactive_signal
Definition hsa_queue.hh:87
uint32_t compute_tmpring_size_wavesize
Definition hsa_queue.hh:79

Generated on Mon May 26 2025 09:19:10 for gem5 by doxygen 1.13.2