40#ifndef __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
41#define __GPU_COMPUTE_GPU_COMPUTE_DRIVER_HH__
46#include <unordered_map>
50#include "enums/GfxVersion.hh"
57struct GPUComputeDriverParams;
58class GPUCommandProcessor;
65 typedef GPUComputeDriverParams
Params;
71 int prot,
int tgt_flags,
int tgt_fd, off_t
offset)
override;
89 case GfxVersion::gfx902:
91 case GfxVersion::gfx900:
95 fatal(
"Invalid GPU type\n");
105 :
driver(gpu_driver),
tc(thrd_cntxt) {}
157 std::unordered_map<uint32_t, ETEntry>
ETable;
206 std::unordered_map<ThreadContext *, EventList>
TCEvents;
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
The AddrRangeMap uses an STL map to implement an interval tree for address decoding.
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
void scheduleWakeup(Tick wakeup_delay)
GPUComputeDriver * driver
const char * description() const override
Return a C string describing the event.
DriverWakeupEvent(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
GPUComputeDriver * driver
DriverWakeupEvent timerEvent
EventList(GPUComputeDriver *gpu_driver, ThreadContext *thrd_cntxt)
std::set< uint32_t > signalEvents
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
MtypeFlags
Mtype bits {Cached, Read Write, Shared} for caches.
GfxVersion getGfxVersion() const
virtual void signalWakeupEvent(uint32_t event_id)
void registerUncacheableMemory(Addr start, Addr length)
Register a region of host memory as uncacheable from the perspective of the dGPU.
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Addr scratchApeLimit(Addr apeBase) const
GPUComputeDriver(const Params &p)
Addr deallocateGpuVma(Addr start)
Addr scratchApeBase(int gpuNum) const
Addr scratchApeBaseV9() const
std::unordered_map< ThreadContext *, EventList > TCEvents
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Addr ldsApeBaseV9() const
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Request::CacheCoherenceFlags defaultMtype
GPUComputeDriverParams Params
class EventTableEntry ETEntry
std::unordered_map< uint32_t, ETEntry > ETable
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
GPUCommandProcessor * device
GPU that is controlled by this driver.
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Addr ldsApeBase(int gpuNum) const
Addr ldsApeLimit(Addr apeBase) const
Addr gpuVmApeLimit(Addr apeBase) const
This object is a proxy for a port or other object which implements the functional response protocol,...
ThreadContext is the external interface to all thread state for anything outside of the CPU.
void deschedule(Event &event)
bool scheduled() const
Determine if the current event is scheduled.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< Request > RequestPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...