Go to the documentation of this file.
41 #include "debug/GPUDriver.hh"
42 #include "debug/GPUShader.hh"
51 #include "params/GPUComputeDriver.hh"
62 isdGPU(
p.isdGPU), gfxVersion(
p.gfxVersion), dGPUPoolID(
p.dGPUPoolID),
63 eventPage(0), eventSlotIndex(0)
66 DPRINTF(GPUDriver,
"Constructing KFD: device\n");
70 if (MtypeFlags::SHARED &
p.m_type)
73 if (MtypeFlags::READ_WRITE &
p.m_type)
76 if (MtypeFlags::CACHED &
p.m_type)
83 return "DriverWakeupEvent";
94 auto device_fd_entry = std::make_shared<DeviceFDEntry>(
this,
filename);
95 int tgt_fd = process->fds->allocFD(device_fd_entry);
105 int prot,
int tgt_flags,
int tgt_fd, off_t
offset)
112 DPRINTF(GPUDriver,
"amdkfd mmap (start: %p, length: 0x%x,"
113 "offset: 0x%x)\n", start, length,
offset);
117 DPRINTF(GPUDriver,
"amdkfd mmap type DOORBELL offset\n");
118 start = mem_state->extendMmap(length);
123 DPRINTF(GPUDriver,
"amdkfd mmap type EVENTS offset\n");
125 "Start address should be provided by KFD\n");
127 "Requested length %d, expected length %d; length "
135 eventPage = mem_state->extendMmap(length);
140 warn_once(
"Unrecognized kfd mmap type %llx\n", mmap_type);
161 fatal(
"%s: Exceeded maximum number of HSA queues allowed\n",
name());
174 args->ring_base_address, args->queue_id,
183 driver->schedule(
this,
curTick() + wakeup_delay);
190 "Trying wakeup on an event that is not yet created\n");
191 if (
ETable[event_id].threadWaiting) {
193 "No thread context to wake up\n");
196 "Signal event: Waking up CPU %d\n", tc->
cpuId());
209 ETable[event_id].setEvent =
true;
217 "Timer event: Waking up CPU %d\n", tc->cpuId());
219 driver->TCEvents[tc].clearEvents();
236 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_VERSION\n");
247 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
258 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
259 "queue offset %d\n", args->queue_id);
279 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
284 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
290 args->system_clock_freq = 1000000000;
297 args->gpu_clock_counter = elapsed_nsec;
298 args->cpu_clock_counter = elapsed_nsec;
299 args->system_clock_counter = elapsed_nsec;
306 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
309 args->num_of_nodes = 1;
317 for (
int i = 0;
i < args->num_of_nodes; ++
i) {
326 case GfxVersion::gfx801:
327 case GfxVersion::gfx803:
328 args->process_apertures[
i].scratch_base =
330 args->process_apertures[
i].lds_base =
333 case GfxVersion::gfx900:
334 args->process_apertures[
i].scratch_base =
336 args->process_apertures[
i].lds_base =
340 fatal(
"Invalid gfx version\n");
344 args->process_apertures[
i].scratch_limit =
347 args->process_apertures[
i].lds_limit =
351 case GfxVersion::gfx801:
352 args->process_apertures[
i].gpuvm_base =
354 args->process_apertures[
i].gpuvm_limit =
357 case GfxVersion::gfx803:
358 case GfxVersion::gfx900:
359 case GfxVersion::gfx902:
361 args->process_apertures[
i].gpuvm_base = 0x1000000ull;
363 args->process_apertures[
i].gpuvm_limit =
364 0x0000800000000000ULL - 1;
367 fatal(
"Invalid gfx version");
381 case GfxVersion::gfx803:
382 args->process_apertures[
i].gpu_id = 50156;
384 case GfxVersion::gfx900:
385 args->process_apertures[
i].gpu_id = 22124;
388 fatal(
"Invalid gfx version for dGPU\n");
392 case GfxVersion::gfx801:
393 case GfxVersion::gfx902:
394 args->process_apertures[
i].gpu_id = 2765;
397 fatal(
"Invalid gfx version for APU\n");
401 DPRINTF(GPUDriver,
"GPUVM base for node[%i] = %#x\n",
i,
402 args->process_apertures[
i].gpuvm_base);
403 DPRINTF(GPUDriver,
"GPUVM limit for node[%i] = %#x\n",
i,
404 args->process_apertures[
i].gpuvm_limit);
406 DPRINTF(GPUDriver,
"LDS base for node[%i] = %#x\n",
i,
407 args->process_apertures[
i].lds_base);
408 DPRINTF(GPUDriver,
"LDS limit for node[%i] = %#x\n",
i,
409 args->process_apertures[
i].lds_limit);
411 DPRINTF(GPUDriver,
"Scratch base for node[%i] = %#x\n",
i,
412 args->process_apertures[
i].scratch_base);
413 DPRINTF(GPUDriver,
"Scratch limit for node[%i] = %#x\n",
i,
414 args->process_apertures[
i].scratch_limit);
422 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
424 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
426 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
428 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
430 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
432 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
434 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
436 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
445 warn(
"unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
450 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_EVENT\n");
455 warn(
"Signal events are only supported currently\n");
457 fatal(
"Signal event wasn't created; signal limit reached\n");
461 uint64_t page_index = 0;
468 args->event_trigger_data = args->event_id;
469 DPRINTF(GPUDriver,
"amdkfd create events"
470 "(event_id: 0x%x, offset: 0x%x)\n",
471 args->event_id, args->event_page_offset);
484 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
487 DPRINTF(GPUDriver,
"amdkfd destroying event %d\n", args->event_id);
489 "Event ID invalid, cannot destroy this event\n");
490 ETable.erase(args->event_id);
495 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_SET_EVENTS\n");
498 DPRINTF(GPUDriver,
"amdkfd set event %d\n", args->event_id);
500 "Event ID invlaid, cannot set this event\n");
501 ETable[args->event_id].setEvent =
true;
507 warn(
"unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
512 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
517 DPRINTF(GPUDriver,
"amdkfd wait for events"
518 "(wait on all: %d, timeout : %d, num_events: %s)\n",
519 args->wait_for_all, args->timeout, args->num_events);
520 panic_if(args->wait_for_all != 0 && args->num_events > 1,
521 "Wait for all events not supported\n");
522 bool should_sleep =
true;
526 TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
527 std::make_tuple(
this, tc));
528 DPRINTF(GPUDriver,
"\tamdkfd creating event list"
529 " for thread %d\n", tc->
cpuId());
532 "There are %d events that put this thread to sleep,"
533 " this thread should not be running\n",
535 for (
int i = 0;
i < args->num_events;
i++) {
537 "Event pointer invalid\n");
541 EventData.
copyIn(virt_proxy);
543 "\tamdkfd wait for event %d\n", EventData->event_id);
545 "Event ID invalid, cannot set this event\n");
546 if (
ETable[EventData->event_id].threadWaiting)
547 warn(
"Multiple threads waiting on the same event\n");
548 if (
ETable[EventData->event_id].setEvent) {
551 ETable[EventData->event_id].setEvent =
false;
552 should_sleep =
false;
556 ETable[EventData->event_id].threadWaiting =
true;
557 ETable[EventData->event_id].tc = tc;
558 TCEvents[tc].signalEvents.insert(EventData->event_id);
566 args->wait_result = 0;
579 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
584 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
589 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
594 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
599 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
604 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
609 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
615 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
620 ioc_args.
copyIn(virt_proxy);
621 ioc_args->num_of_nodes = 1;
623 for (
int i = 0;
i < ioc_args->num_of_nodes; ++
i) {
625 (ioc_args->kfd_process_device_apertures_ptr);
628 case GfxVersion::gfx801:
629 case GfxVersion::gfx803:
633 case GfxVersion::gfx900:
638 fatal(
"Invalid gfx version\n");
642 ape_args->scratch_limit =
644 ape_args->lds_limit =
ldsApeLimit(ape_args->lds_base);
647 case GfxVersion::gfx801:
649 ape_args->gpuvm_limit =
652 case GfxVersion::gfx803:
653 case GfxVersion::gfx900:
654 case GfxVersion::gfx902:
656 ape_args->gpuvm_base = 0x1000000ull;
658 ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
661 fatal(
"Invalid gfx version\n");
667 case GfxVersion::gfx803:
668 ape_args->gpu_id = 50156;
670 case GfxVersion::gfx900:
671 ape_args->gpu_id = 22124;
674 fatal(
"Invalid gfx version for dGPU\n");
678 case GfxVersion::gfx801:
679 case GfxVersion::gfx902:
680 ape_args->gpu_id = 2765;
683 fatal(
"Invalid gfx version for APU\n");
687 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
688 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
689 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
690 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
691 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
692 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
693 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
694 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
704 warn(
"unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
725 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
731 [[maybe_unused]]
Addr mmap_offset = 0;
737 bool cacheable =
true;
740 DPRINTF(GPUDriver,
"amdkfd allocation type: VRAM\n");
741 args->mmap_offset = args->va_addr;
761 pa_addr = process->seWorkload->allocPhysPages(
769 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
770 "%d\n", args->va_addr, pa_addr, args->
size);
773 DPRINTF(GPUDriver,
"amdkfd allocation type: USERPTR\n");
774 mmap_offset = args->mmap_offset;
777 pa_addr = process->seWorkload->allocPhysPages(npages);
779 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
780 "%d\n", args->va_addr, pa_addr, args->
size);
789 DPRINTF(GPUDriver,
"amdkfd allocation type: GTT\n");
790 args->mmap_offset = args->va_addr;
798 pa_addr = process->seWorkload->allocPhysPages(npages);
800 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
801 "%d\n", args->va_addr, pa_addr, args->
size);
813 DPRINTF(GPUDriver,
"amdkfd allocation type: DOORBELL\n");
823 DPRINTF(GPUDriver,
"amdkfd allocation arguments: va_addr %p "
824 "size %lu, mmap_offset %p, gpu_id %d\n",
825 args->va_addr, args->
size, mmap_offset, args->gpu_id);
829 process->pTable->map(args->va_addr, pa_addr, args->
size,
846 args->handle= args->va_addr;
852 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
857 DPRINTF(GPUDriver,
"amdkfd free arguments: handle %p ",
862 process->pTable->unmap(args->handle, size);
881 warn(
"unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
886 warn(
"unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
891 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
896 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
901 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
906 warn(
"unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
911 warn(
"unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
916 warn(
"unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
920 fatal(
"%s: bad ioctl %d\n", req);
930 Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
932 TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
935 "CPU %d is put to sleep\n", tc->
cpuId());
941 return ((
Addr)gpuNum << 61) + 0x1000000000000
L;
947 return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFF
L;
953 return ((
Addr)gpuNum << 61) + 0x100000000
L;
961 return ((
Addr)0x1 << 48);
967 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
973 return ((
Addr)gpuNum << 61) + 0x0;
981 return ((
Addr)0x2 << 48);
987 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
995 DPRINTF(GPUDriver,
"Registering [%p - %p] with MTYPE %d\n",
998 "Attempted to double register Mtypes for [%p - %p]\n",
1005 auto vma =
gpuVmas.contains(start);
1007 assert((vma->first.start() == start));
1008 Addr size = vma->first.size();
1009 DPRINTF(GPUDriver,
"Unregistering [%p - %p]\n", vma->first.start(),
1021 auto vma =
gpuVmas.contains(range);
1023 DPRINTF(GPUShader,
"Setting req from [%p - %p] MTYPE %d\n"
1024 "%d\n", range.
start(), range.
end(), vma->second);
1025 req->setCacheCoherenceFlags(vma->second);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
std::unordered_map< ThreadContext *, EventList > TCEvents
virtual void signalWakeupEvent(uint32_t event_id)
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
#define KFD_SIGNAL_EVENT_LIMIT
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Addr start() const
Get the start address of the range.
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Addr deallocateGpuVma(Addr start)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
void attachDriver(GPUComputeDriver *driver)
#define AMDKFD_IOC_GET_TILE_CONFIG
AddrRange RangeSize(Addr start, Addr size)
#define AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_DESTROY_EVENT
void set(Type mask)
Set all flag's bits matching the given mask.
virtual BaseMMU * getMMUPtr()=0
void clear()
Clear all flag's bits.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Addr gpuVmApeLimit(Addr apeBase) const
virtual void activate()=0
Set the status to Active.
HSAPacketProcessor & hsaPacketProc()
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOCTL_MINOR_VERSION
#define KFD_MMAP_GPU_ID(gpu_id)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion)
virtual int cpuId() const =0
#define AMDKFD_IOC_GET_VERSION
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
std::shared_ptr< MemState > memState
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
This proxy attempts to translate virtual addresses using the TLBs.
#define KFD_MMAP_TYPE_MASK
std::unordered_map< uint32_t, ETEntry > ETable
#define AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_DBG_REGISTER
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Request::CacheCoherenceFlags defaultMtype
virtual std::string name() const
const int size
buffer size
#define AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_SET_CU_MASK
uint64_t Tick
Tick count type.
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
std::shared_ptr< Request > RequestPtr
Addr ldsApeBaseV9() const
#define KFD_IOCTL_MAJOR_VERSION
This object is a proxy for a port or other object which implements the functional response protocol,...
Addr scratchApeBase(int gpuNum) const
#define AMDKFD_IOC_RESET_EVENT
GPUComputeDriver(const Params &p)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Addr end() const
Get the end address of the range.
#define KFD_IOC_EVENT_SIGNAL
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Addr scratchApeBaseV9() const
#define AMDKFD_IOC_IMPORT_DMABUF
#define KFD_MMAP_TYPE_EVENTS
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
static constexpr T divCeil(const T &a, const U &b)
virtual Process * getProcessPtr()=0
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
void scheduleWakeup(Tick wakeup_delay)
virtual void suspend()=0
Set the status to Suspended.
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
GPUCommandProcessor * device
GPU that is controlled by this driver.
#define AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
#define AMDKFD_IOC_SET_MEMORY_POLICY
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
#define AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_SMI_EVENTS
GPUComputeDriverParams Params
#define AMDKFD_IOC_UPDATE_QUEUE
Addr ldsApeLimit(Addr apeBase) const
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
const char * description() const override
Return a C string describing the event.
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Addr scratchApeLimit(Addr apeBase) const
Addr ldsApeBase(int gpuNum) const
const std::string & filename
filename for opening this driver (under /dev)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
#define KFD_MMAP_TYPE_DOORBELL
#define AMDKFD_IOC_GET_DMABUF_INFO
Generated on Tue Feb 8 2022 11:47:09 for gem5 by doxygen 1.8.17