Go to the documentation of this file.
42 #include "debug/GPUDriver.hh"
43 #include "debug/GPUShader.hh"
50 #include "params/GPUComputeDriver.hh"
59 isdGPU(
p.isdGPU), gfxVersion(
p.gfxVersion), dGPUPoolID(
p.dGPUPoolID),
60 eventPage(0), eventSlotIndex(0)
63 DPRINTF(GPUDriver,
"Constructing KFD: device\n");
67 if (MtypeFlags::SHARED &
p.m_type)
70 if (MtypeFlags::READ_WRITE &
p.m_type)
73 if (MtypeFlags::CACHED &
p.m_type)
80 return "DriverWakeupEvent";
91 auto device_fd_entry = std::make_shared<DeviceFDEntry>(
this,
filename);
92 int tgt_fd = process->fds->allocFD(device_fd_entry);
102 int prot,
int tgt_flags,
int tgt_fd, off_t
offset)
109 DPRINTF(GPUDriver,
"amdkfd mmap (start: %p, length: 0x%x,"
110 "offset: 0x%x)\n", start, length,
offset);
114 DPRINTF(GPUDriver,
"amdkfd mmap type DOORBELL offset\n");
115 start = mem_state->extendMmap(length);
120 DPRINTF(GPUDriver,
"amdkfd mmap type EVENTS offset\n");
122 "Start address should be provided by KFD\n");
124 "Requested length %d, expected length %d; length "
132 eventPage = mem_state->extendMmap(length);
137 warn_once(
"Unrecognized kfd mmap type %llx\n", mmap_type);
158 fatal(
"%s: Exceeded maximum number of HSA queues allowed\n",
name());
171 args->ring_base_address, args->queue_id,
180 driver->schedule(
this,
curTick() + wakeup_delay);
187 "Trying wakeup on an event that is not yet created\n");
188 if (
ETable[event_id].threadWaiting) {
190 "No thread context to wake up\n");
193 "Signal event: Waking up CPU %d\n", tc->
cpuId());
206 ETable[event_id].setEvent =
true;
214 "Timer event: Waking up CPU %d\n", tc->cpuId());
216 driver->TCEvents[tc].clearEvents();
231 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_VERSION\n");
242 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
253 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
254 "queue offset %d\n", args->queue_id);
274 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
279 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
285 args->system_clock_freq = 1000000000;
292 args->gpu_clock_counter = elapsed_nsec;
293 args->cpu_clock_counter = elapsed_nsec;
294 args->system_clock_counter = elapsed_nsec;
301 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
304 args->num_of_nodes = 1;
312 for (
int i = 0;
i < args->num_of_nodes; ++
i) {
321 case GfxVersion::gfx801:
322 case GfxVersion::gfx803:
323 args->process_apertures[
i].scratch_base =
325 args->process_apertures[
i].lds_base =
328 case GfxVersion::gfx900:
329 args->process_apertures[
i].scratch_base =
331 args->process_apertures[
i].lds_base =
335 fatal(
"Invalid gfx version\n");
339 args->process_apertures[
i].scratch_limit =
342 args->process_apertures[
i].lds_limit =
346 case GfxVersion::gfx801:
347 args->process_apertures[
i].gpuvm_base =
349 args->process_apertures[
i].gpuvm_limit =
352 case GfxVersion::gfx803:
353 case GfxVersion::gfx900:
354 case GfxVersion::gfx902:
356 args->process_apertures[
i].gpuvm_base = 0x1000000ull;
358 args->process_apertures[
i].gpuvm_limit =
359 0x0000800000000000ULL - 1;
362 fatal(
"Invalid gfx version");
376 case GfxVersion::gfx803:
377 args->process_apertures[
i].gpu_id = 50156;
379 case GfxVersion::gfx900:
380 args->process_apertures[
i].gpu_id = 22124;
383 fatal(
"Invalid gfx version for dGPU\n");
387 case GfxVersion::gfx801:
388 case GfxVersion::gfx902:
389 args->process_apertures[
i].gpu_id = 2765;
392 fatal(
"Invalid gfx version for APU\n");
396 DPRINTF(GPUDriver,
"GPUVM base for node[%i] = %#x\n",
i,
397 args->process_apertures[
i].gpuvm_base);
398 DPRINTF(GPUDriver,
"GPUVM limit for node[%i] = %#x\n",
i,
399 args->process_apertures[
i].gpuvm_limit);
401 DPRINTF(GPUDriver,
"LDS base for node[%i] = %#x\n",
i,
402 args->process_apertures[
i].lds_base);
403 DPRINTF(GPUDriver,
"LDS limit for node[%i] = %#x\n",
i,
404 args->process_apertures[
i].lds_limit);
406 DPRINTF(GPUDriver,
"Scratch base for node[%i] = %#x\n",
i,
407 args->process_apertures[
i].scratch_base);
408 DPRINTF(GPUDriver,
"Scratch limit for node[%i] = %#x\n",
i,
409 args->process_apertures[
i].scratch_limit);
417 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
419 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
421 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
423 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
425 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
427 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
429 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
431 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
440 warn(
"unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
445 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_EVENT\n");
450 warn(
"Signal events are only supported currently\n");
452 fatal(
"Signal event wasn't created; signal limit reached\n");
456 uint64_t page_index = 0;
463 args->event_trigger_data = args->event_id;
464 DPRINTF(GPUDriver,
"amdkfd create events"
465 "(event_id: 0x%x, offset: 0x%x)\n",
466 args->event_id, args->event_page_offset);
479 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
482 DPRINTF(GPUDriver,
"amdkfd destroying event %d\n", args->event_id);
484 "Event ID invalid, cannot destroy this event\n");
485 ETable.erase(args->event_id);
490 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_SET_EVENTS\n");
493 DPRINTF(GPUDriver,
"amdkfd set event %d\n", args->event_id);
495 "Event ID invlaid, cannot set this event\n");
496 ETable[args->event_id].setEvent =
true;
502 warn(
"unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
507 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
512 DPRINTF(GPUDriver,
"amdkfd wait for events"
513 "(wait on all: %d, timeout : %d, num_events: %s)\n",
514 args->wait_for_all, args->timeout, args->num_events);
515 panic_if(args->wait_for_all != 0 && args->num_events > 1,
516 "Wait for all events not supported\n");
517 bool should_sleep =
true;
521 TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
522 std::make_tuple(
this, tc));
523 DPRINTF(GPUDriver,
"\tamdkfd creating event list"
524 " for thread %d\n", tc->
cpuId());
527 "There are %d events that put this thread to sleep,"
528 " this thread should not be running\n",
530 for (
int i = 0;
i < args->num_events;
i++) {
532 "Event pointer invalid\n");
536 EventData.
copyIn(virt_proxy);
538 "\tamdkfd wait for event %d\n", EventData->event_id);
540 "Event ID invalid, cannot set this event\n");
541 if (
ETable[EventData->event_id].threadWaiting)
542 warn(
"Multiple threads waiting on the same event\n");
543 if (
ETable[EventData->event_id].setEvent) {
546 ETable[EventData->event_id].setEvent =
false;
547 should_sleep =
false;
551 ETable[EventData->event_id].threadWaiting =
true;
552 ETable[EventData->event_id].tc = tc;
553 TCEvents[tc].signalEvents.insert(EventData->event_id);
561 args->wait_result = 0;
574 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
579 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
584 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
589 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
594 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
599 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
604 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
610 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
615 ioc_args.
copyIn(virt_proxy);
616 ioc_args->num_of_nodes = 1;
618 for (
int i = 0;
i < ioc_args->num_of_nodes; ++
i) {
620 (ioc_args->kfd_process_device_apertures_ptr);
623 case GfxVersion::gfx801:
624 case GfxVersion::gfx803:
628 case GfxVersion::gfx900:
633 fatal(
"Invalid gfx version\n");
637 ape_args->scratch_limit =
639 ape_args->lds_limit =
ldsApeLimit(ape_args->lds_base);
642 case GfxVersion::gfx801:
644 ape_args->gpuvm_limit =
647 case GfxVersion::gfx803:
648 case GfxVersion::gfx900:
649 case GfxVersion::gfx902:
651 ape_args->gpuvm_base = 0x1000000ull;
653 ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
656 fatal(
"Invalid gfx version\n");
662 case GfxVersion::gfx803:
663 ape_args->gpu_id = 50156;
665 case GfxVersion::gfx900:
666 ape_args->gpu_id = 22124;
669 fatal(
"Invalid gfx version for dGPU\n");
673 case GfxVersion::gfx801:
674 case GfxVersion::gfx902:
675 ape_args->gpu_id = 2765;
678 fatal(
"Invalid gfx version for APU\n");
682 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
683 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
684 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
685 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
686 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
687 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
688 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
689 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
699 warn(
"unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
720 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
726 GEM5_VAR_USED
Addr mmap_offset = 0;
732 bool cacheable =
true;
735 DPRINTF(GPUDriver,
"amdkfd allocation type: VRAM\n");
736 args->mmap_offset = args->va_addr;
756 pa_addr = process->system->allocPhysPages(npages,
dGPUPoolID);
763 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
764 "%d\n", args->va_addr, pa_addr, args->
size);
767 DPRINTF(GPUDriver,
"amdkfd allocation type: USERPTR\n");
768 mmap_offset = args->mmap_offset;
771 pa_addr = process->system->allocPhysPages(npages);
773 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
774 "%d\n", args->va_addr, pa_addr, args->
size);
783 DPRINTF(GPUDriver,
"amdkfd allocation type: GTT\n");
784 args->mmap_offset = args->va_addr;
792 pa_addr = process->system->allocPhysPages(npages);
794 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
795 "%d\n", args->va_addr, pa_addr, args->
size);
807 DPRINTF(GPUDriver,
"amdkfd allocation type: DOORBELL\n");
817 DPRINTF(GPUDriver,
"amdkfd allocation arguments: va_addr %p "
818 "size %lu, mmap_offset %p, gpu_id %d\n",
819 args->va_addr, args->
size, mmap_offset, args->gpu_id);
823 process->pTable->map(args->va_addr, pa_addr, args->
size,
840 args->handle= args->va_addr;
846 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
851 DPRINTF(GPUDriver,
"amdkfd free arguments: handle %p ",
856 process->pTable->unmap(args->handle, size);
875 warn(
"unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
880 warn(
"unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
885 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
890 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
895 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
900 warn(
"unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
905 warn(
"unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
910 warn(
"unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
914 fatal(
"%s: bad ioctl %d\n", req);
924 Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
926 TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
929 "CPU %d is put to sleep\n", tc->
cpuId());
935 return ((
Addr)gpuNum << 61) + 0x1000000000000
L;
941 return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFF
L;
947 return ((
Addr)gpuNum << 61) + 0x100000000
L;
955 return ((
Addr)0x1 << 48);
961 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
967 return ((
Addr)gpuNum << 61) + 0x0;
975 return ((
Addr)0x2 << 48);
981 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
989 DPRINTF(GPUDriver,
"Registering [%p - %p] with MTYPE %d\n",
992 "Attempted to double register Mtypes for [%p - %p]\n",
999 auto vma =
gpuVmas.contains(start);
1001 assert((vma->first.start() == start));
1002 Addr size = vma->first.size();
1003 DPRINTF(GPUDriver,
"Unregistering [%p - %p]\n", vma->first.start(),
1015 auto vma =
gpuVmas.contains(range);
1017 DPRINTF(GPUShader,
"Setting req from [%p - %p] MTYPE %d\n"
1018 "%d\n", range.
start(), range.
end(), vma->second);
1019 req->setCacheCoherenceFlags(vma->second);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
std::unordered_map< ThreadContext *, EventList > TCEvents
virtual void signalWakeupEvent(uint32_t event_id)
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
#define KFD_SIGNAL_EVENT_LIMIT
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Addr start() const
Get the start address of the range.
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Addr deallocateGpuVma(Addr start)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
void attachDriver(GPUComputeDriver *driver)
#define AMDKFD_IOC_GET_TILE_CONFIG
AddrRange RangeSize(Addr start, Addr size)
#define AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_DESTROY_EVENT
void set(Type mask)
Set all flag's bits matching the given mask.
virtual BaseMMU * getMMUPtr()=0
void clear()
Clear all flag's bits.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Addr gpuVmApeLimit(Addr apeBase) const
virtual void activate()=0
Set the status to Active.
HSAPacketProcessor & hsaPacketProc()
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOCTL_MINOR_VERSION
#define KFD_MMAP_GPU_ID(gpu_id)
virtual int cpuId() const =0
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize)
#define AMDKFD_IOC_GET_VERSION
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
std::shared_ptr< MemState > memState
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
#define KFD_MMAP_TYPE_MASK
std::unordered_map< uint32_t, ETEntry > ETable
#define AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_DBG_REGISTER
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Request::CacheCoherenceFlags defaultMtype
virtual std::string name() const
virtual PortProxy & getVirtProxy()=0
const int size
buffer size
#define AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_SET_CU_MASK
uint64_t Tick
Tick count type.
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
std::shared_ptr< Request > RequestPtr
Addr ldsApeBaseV9() const
#define KFD_IOCTL_MAJOR_VERSION
This object is a proxy for a port or other object which implements the functional response protocol,...
Addr scratchApeBase(int gpuNum) const
#define AMDKFD_IOC_RESET_EVENT
GPUComputeDriver(const Params &p)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Addr end() const
Get the end address of the range.
#define KFD_IOC_EVENT_SIGNAL
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Addr scratchApeBaseV9() const
#define AMDKFD_IOC_IMPORT_DMABUF
#define KFD_MMAP_TYPE_EVENTS
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
static constexpr T divCeil(const T &a, const U &b)
virtual Process * getProcessPtr()=0
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
void scheduleWakeup(Tick wakeup_delay)
virtual void suspend()=0
Set the status to Suspended.
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
GPUCommandProcessor * device
GPU that is controlled by this driver.
#define AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
#define AMDKFD_IOC_SET_MEMORY_POLICY
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
#define AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_SMI_EVENTS
GPUComputeDriverParams Params
#define AMDKFD_IOC_UPDATE_QUEUE
Addr ldsApeLimit(Addr apeBase) const
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
const char * description() const override
Return a C string describing the event.
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Addr scratchApeLimit(Addr apeBase) const
Addr ldsApeBase(int gpuNum) const
const std::string & filename
filename for opening this driver (under /dev)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
#define KFD_MMAP_TYPE_DOORBELL
#define AMDKFD_IOC_GET_DMABUF_INFO
Generated on Tue Sep 21 2021 12:25:23 for gem5 by doxygen 1.8.17