gem5 v23.0.0.1
Loading...
Searching...
No Matches
gpu_compute_driver.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
40#ifndef __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
41#define __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
42
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <unordered_map>
47
49#include "base/types.hh"
50#include "enums/GfxVersion.hh"
51#include "mem/request.hh"
52#include "sim/emul_driver.hh"
53
54namespace gem5
55{
56
57struct GPUComputeDriverParams;
58class GPUCommandProcessor;
59class PortProxy;
60class ThreadContext;
61
62class GPUComputeDriver final : public EmulatedDriver
63{
64 public:
65 typedef GPUComputeDriverParams Params;
67 int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override;
68
69 int open(ThreadContext *tc, int mode, int flags) override;
70 Addr mmap(ThreadContext *tc, Addr start, uint64_t length,
71 int prot, int tgt_flags, int tgt_fd, off_t offset) override;
72 virtual void signalWakeupEvent(uint32_t event_id);
73 void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout);
83 void setMtype(RequestPtr req);
84
85 int
87 {
88 switch (gfxVersion) {
89 case GfxVersion::gfx801:
90 case GfxVersion::gfx803:
91 case GfxVersion::gfx902:
92 return 4;
93 case GfxVersion::gfx900:
94 // gfx900 supports large BAR, so it has a larger doorbell
95 return 8;
96 default:
97 fatal("Invalid GPU type\n");
98 }
99 return 4;
100 }
101
103 {
104 public:
106 ThreadContext *thrd_cntxt)
107 : driver(gpu_driver), tc(thrd_cntxt) {}
108 void process() override;
109 const char *description() const override;
110 void scheduleWakeup(Tick wakeup_delay);
111 private:
114 };
115
117 {
118 public:
120 mailBoxPtr(0), tc(nullptr), threadWaiting(false), setEvent(false)
121 {}
122 // Mail box pointer for this address. Current implementation does not
123 // use this mailBoxPtr to notify events but directly calls
124 // signalWakeupEvent from dispatcher (GPU) to notifiy events. So,
125 // currently this mailBoxPtr is not used. But a future implementation
126 // may communicate to the driver using mailBoxPtr.
128 // Thread context waiting on this even. We do not support multiple
129 // threads waiting on an event currently.
131 // threadWaiting = true, if some thread context is waiting on this
132 // event. A thread context waiting on this event is put to sleep.
134 // setEvent = true, if this event is triggered but when this event
135 // triggered, no thread context was waiting on it. In the future, some
136 // thread context will try to wait on this event but since event has
137 // already happened, we will not allow that thread context to go to
138 // sleep. The above mentioned scneario can happen when the waiting
139 // thread and wakeup thread race on this event and the wakeup thread
140 // beat the waiting thread at the driver.
142 };
144
145 GfxVersion getGfxVersion() const { return gfxVersion; }
146
147 private:
152 uint32_t queueId;
153 bool isdGPU;
154 GfxVersion gfxVersion;
158 //Event table that keeps track of events. It is indexed with event ID.
159 std::unordered_map<uint32_t, ETEntry> ETable;
160
165
170 {
175 };
176
178
179 // TCEvents map keeps trak of the events that can wakeup this thread. When
180 // multiple events can wake up this thread, this data structure helps to
181 // reset all events when one of those events wake up this thread. the
182 // signal events that can wake up this thread are stored in signalEvents
183 // whereas the timer wakeup event is stored in timerEvent.
185 {
186 public:
187 EventList() : driver(nullptr), timerEvent(nullptr, nullptr) {}
188 EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
189 : driver(gpu_driver), timerEvent(gpu_driver, thrd_cntxt)
190 { }
191 void clearEvents() {
192 assert(driver);
193 for (auto event : signalEvents) {
194 assert(event < driver->eventSlotIndex);
195 driver->ETable[event].tc = nullptr;
196 driver->ETable[event].threadWaiting = false;
197 }
198 signalEvents.clear();
199 if (timerEvent.scheduled()) {
201 }
202 }
205 // The set of events that can wake up the same thread.
206 std::set<uint32_t> signalEvents;
207 };
208 std::unordered_map<ThreadContext *, EventList> TCEvents;
209
215
231 Addr gpuVmApeBase(int gpuNum) const;
232 Addr gpuVmApeLimit(Addr apeBase) const;
233 Addr scratchApeBase(int gpuNum) const;
234 Addr scratchApeBaseV9() const;
235 Addr scratchApeLimit(Addr apeBase) const;
236 Addr ldsApeBase(int gpuNum) const;
237 Addr ldsApeBaseV9() const;
238 Addr ldsApeLimit(Addr apeBase) const;
239
247 Addr length);
249
250 void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr);
251
252};
253
254} // namespace gem5
255
256#endif // __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
The AddrRangeMap uses an STL map to implement an interval tree for address decoding.
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
const char * description() const override
Return a C string describing the event.
DriverWakeupEvent(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
MtypeFlags
Mtype bits {Cached, Read Write, Shared} for caches.
GfxVersion getGfxVersion() const
virtual void signalWakeupEvent(uint32_t event_id)
void registerUncacheableMemory(Addr start, Addr length)
Register a region of host memory as uncacheable from the perspective of the dGPU.
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Addr scratchApeLimit(Addr apeBase) const
Addr deallocateGpuVma(Addr start)
Addr scratchApeBase(int gpuNum) const
std::unordered_map< ThreadContext *, EventList > TCEvents
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Request::CacheCoherenceFlags defaultMtype
GPUComputeDriverParams Params
class EventTableEntry ETEntry
std::unordered_map< uint32_t, ETEntry > ETable
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
GPUCommandProcessor * device
GPU that is controlled by this driver.
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Addr ldsApeBase(int gpuNum) const
Addr ldsApeLimit(Addr apeBase) const
Addr gpuVmApeLimit(Addr apeBase) const
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition port_proxy.hh:87
ThreadContext is the external interface to all thread state for anything outside of the CPU.
void deschedule(Event &event)
Definition eventq.hh:1021
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
uint8_t flags
Definition helpers.cc:66
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 10, 5 > event
Bitfield< 0 > p
Bitfield< 7 > prot
Definition misc.hh:587
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
Declaration of a request, the overall memory request consisting of the parts of the request that are ...

Generated on Mon Jul 10 2023 15:32:03 for gem5 by doxygen 1.9.7