Go to the documentation of this file.
41 #include "debug/GPUDriver.hh"
42 #include "debug/GPUShader.hh"
51 #include "params/GPUComputeDriver.hh"
62 isdGPU(
p.isdGPU), gfxVersion(
p.gfxVersion), dGPUPoolID(
p.dGPUPoolID),
63 eventPage(0), eventSlotIndex(0)
66 DPRINTF(GPUDriver,
"Constructing KFD: device\n");
70 std::bitset<MtypeFlags::NUM_MTYPE_BITS> mtype(
p.m_type);
71 if (mtype.test(MtypeFlags::SHARED)) {
75 if (mtype.test(MtypeFlags::READ_WRITE)) {
79 if (mtype.test(MtypeFlags::CACHED)) {
87 return "DriverWakeupEvent";
98 auto device_fd_entry = std::make_shared<DeviceFDEntry>(
this,
filename);
99 int tgt_fd = process->fds->allocFD(device_fd_entry);
109 int prot,
int tgt_flags,
int tgt_fd, off_t
offset)
116 DPRINTF(GPUDriver,
"amdkfd mmap (start: %p, length: 0x%x,"
117 "offset: 0x%x)\n", start, length,
offset);
121 DPRINTF(GPUDriver,
"amdkfd mmap type DOORBELL offset\n");
122 start = mem_state->extendMmap(length);
127 DPRINTF(GPUDriver,
"amdkfd mmap type EVENTS offset\n");
129 "Start address should be provided by KFD\n");
131 "Requested length %d, expected length %d; length "
139 eventPage = mem_state->extendMmap(length);
144 warn_once(
"Unrecognized kfd mmap type %llx\n", mmap_type);
165 fatal(
"%s: Exceeded maximum number of HSA queues allowed\n",
name());
178 args->ring_base_address, args->queue_id,
187 driver->schedule(
this,
curTick() + wakeup_delay);
194 "Trying wakeup on an event that is not yet created\n");
195 if (
ETable[event_id].threadWaiting) {
197 "No thread context to wake up\n");
200 "Signal event: Waking up CPU %d\n", tc->
cpuId());
213 ETable[event_id].setEvent =
true;
221 "Timer event: Waking up CPU %d\n", tc->cpuId());
223 driver->TCEvents[tc].clearEvents();
240 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_VERSION\n");
251 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
262 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
263 "queue offset %d\n", args->queue_id);
283 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
288 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
294 args->system_clock_freq = 1000000000;
301 args->gpu_clock_counter = elapsed_nsec;
302 args->cpu_clock_counter = elapsed_nsec;
303 args->system_clock_counter = elapsed_nsec;
310 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
313 args->num_of_nodes = 1;
321 for (
int i = 0;
i < args->num_of_nodes; ++
i) {
330 case GfxVersion::gfx801:
331 case GfxVersion::gfx803:
332 args->process_apertures[
i].scratch_base =
334 args->process_apertures[
i].lds_base =
337 case GfxVersion::gfx900:
338 case GfxVersion::gfx902:
339 args->process_apertures[
i].scratch_base =
341 args->process_apertures[
i].lds_base =
345 fatal(
"Invalid gfx version\n");
349 args->process_apertures[
i].scratch_limit =
352 args->process_apertures[
i].lds_limit =
356 case GfxVersion::gfx801:
357 args->process_apertures[
i].gpuvm_base =
359 args->process_apertures[
i].gpuvm_limit =
362 case GfxVersion::gfx803:
363 case GfxVersion::gfx900:
364 case GfxVersion::gfx902:
366 args->process_apertures[
i].gpuvm_base = 0x1000000ull;
368 args->process_apertures[
i].gpuvm_limit =
369 0x0000800000000000ULL - 1;
372 fatal(
"Invalid gfx version");
386 case GfxVersion::gfx803:
387 args->process_apertures[
i].gpu_id = 50156;
389 case GfxVersion::gfx900:
390 args->process_apertures[
i].gpu_id = 22124;
393 fatal(
"Invalid gfx version for dGPU\n");
397 case GfxVersion::gfx801:
398 case GfxVersion::gfx902:
399 args->process_apertures[
i].gpu_id = 2765;
402 fatal(
"Invalid gfx version for APU\n");
406 DPRINTF(GPUDriver,
"GPUVM base for node[%i] = %#x\n",
i,
407 args->process_apertures[
i].gpuvm_base);
408 DPRINTF(GPUDriver,
"GPUVM limit for node[%i] = %#x\n",
i,
409 args->process_apertures[
i].gpuvm_limit);
411 DPRINTF(GPUDriver,
"LDS base for node[%i] = %#x\n",
i,
412 args->process_apertures[
i].lds_base);
413 DPRINTF(GPUDriver,
"LDS limit for node[%i] = %#x\n",
i,
414 args->process_apertures[
i].lds_limit);
416 DPRINTF(GPUDriver,
"Scratch base for node[%i] = %#x\n",
i,
417 args->process_apertures[
i].scratch_base);
418 DPRINTF(GPUDriver,
"Scratch limit for node[%i] = %#x\n",
i,
419 args->process_apertures[
i].scratch_limit);
427 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
429 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
431 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
433 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
435 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
437 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
439 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
441 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
450 warn(
"unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
455 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_EVENT\n");
460 warn(
"Signal events are only supported currently\n");
462 fatal(
"Signal event wasn't created; signal limit reached\n");
466 uint64_t page_index = 0;
473 args->event_trigger_data = args->event_id;
474 DPRINTF(GPUDriver,
"amdkfd create events"
475 "(event_id: 0x%x, offset: 0x%x)\n",
476 args->event_id, args->event_page_offset);
489 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
492 DPRINTF(GPUDriver,
"amdkfd destroying event %d\n", args->event_id);
494 "Event ID invalid, cannot destroy this event\n");
495 ETable.erase(args->event_id);
500 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_SET_EVENTS\n");
503 DPRINTF(GPUDriver,
"amdkfd set event %d\n", args->event_id);
505 "Event ID invlaid, cannot set this event\n");
506 ETable[args->event_id].setEvent =
true;
512 warn(
"unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
517 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
522 DPRINTF(GPUDriver,
"amdkfd wait for events"
523 "(wait on all: %d, timeout : %d, num_events: %s)\n",
524 args->wait_for_all, args->timeout, args->num_events);
525 panic_if(args->wait_for_all != 0 && args->num_events > 1,
526 "Wait for all events not supported\n");
527 bool should_sleep =
true;
531 TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
532 std::make_tuple(
this, tc));
533 DPRINTF(GPUDriver,
"\tamdkfd creating event list"
534 " for thread %d\n", tc->
cpuId());
537 "There are %d events that put this thread to sleep,"
538 " this thread should not be running\n",
540 for (
int i = 0;
i < args->num_events;
i++) {
542 "Event pointer invalid\n");
546 EventData.
copyIn(virt_proxy);
548 "\tamdkfd wait for event %d\n", EventData->event_id);
550 "Event ID invalid, cannot set this event\n");
551 if (
ETable[EventData->event_id].threadWaiting)
552 warn(
"Multiple threads waiting on the same event\n");
553 if (
ETable[EventData->event_id].setEvent) {
556 ETable[EventData->event_id].setEvent =
false;
557 should_sleep =
false;
561 ETable[EventData->event_id].threadWaiting =
true;
562 ETable[EventData->event_id].tc = tc;
563 TCEvents[tc].signalEvents.insert(EventData->event_id);
571 args->wait_result = 0;
584 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
589 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
594 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
599 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
604 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
609 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
614 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
620 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
625 ioc_args.
copyIn(virt_proxy);
626 ioc_args->num_of_nodes = 1;
628 for (
int i = 0;
i < ioc_args->num_of_nodes; ++
i) {
630 (ioc_args->kfd_process_device_apertures_ptr);
633 case GfxVersion::gfx801:
634 case GfxVersion::gfx803:
638 case GfxVersion::gfx900:
639 case GfxVersion::gfx902:
644 fatal(
"Invalid gfx version\n");
648 ape_args->scratch_limit =
650 ape_args->lds_limit =
ldsApeLimit(ape_args->lds_base);
653 case GfxVersion::gfx801:
655 ape_args->gpuvm_limit =
658 case GfxVersion::gfx803:
659 case GfxVersion::gfx900:
660 case GfxVersion::gfx902:
662 ape_args->gpuvm_base = 0x1000000ull;
664 ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
667 fatal(
"Invalid gfx version\n");
673 case GfxVersion::gfx803:
674 ape_args->gpu_id = 50156;
676 case GfxVersion::gfx900:
677 ape_args->gpu_id = 22124;
680 fatal(
"Invalid gfx version for dGPU\n");
684 case GfxVersion::gfx801:
685 case GfxVersion::gfx902:
686 ape_args->gpu_id = 2765;
689 fatal(
"Invalid gfx version for APU\n");
693 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
694 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
695 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
696 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
697 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
698 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
699 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
700 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
710 warn(
"unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
731 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
737 [[maybe_unused]]
Addr mmap_offset = 0;
743 bool cacheable =
true;
746 DPRINTF(GPUDriver,
"amdkfd allocation type: VRAM\n");
747 args->mmap_offset = args->va_addr;
767 pa_addr = process->seWorkload->allocPhysPages(
775 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
776 "%d\n", args->va_addr, pa_addr, args->
size);
779 DPRINTF(GPUDriver,
"amdkfd allocation type: USERPTR\n");
780 mmap_offset = args->mmap_offset;
783 pa_addr = process->seWorkload->allocPhysPages(npages);
785 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
786 "%d\n", args->va_addr, pa_addr, args->
size);
795 DPRINTF(GPUDriver,
"amdkfd allocation type: GTT\n");
796 args->mmap_offset = args->va_addr;
804 pa_addr = process->seWorkload->allocPhysPages(npages);
806 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
807 "%d\n", args->va_addr, pa_addr, args->
size);
819 DPRINTF(GPUDriver,
"amdkfd allocation type: DOORBELL\n");
829 DPRINTF(GPUDriver,
"amdkfd allocation arguments: va_addr %p "
830 "size %lu, mmap_offset %p, gpu_id %d\n",
831 args->va_addr, args->
size, mmap_offset, args->gpu_id);
835 process->pTable->map(args->va_addr, pa_addr, args->
size,
852 args->handle= args->va_addr;
858 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
863 DPRINTF(GPUDriver,
"amdkfd free arguments: handle %p ",
868 process->pTable->unmap(args->handle, size);
887 warn(
"unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
892 warn(
"unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
897 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
902 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
907 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
912 warn(
"unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
917 warn(
"unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
922 warn(
"unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
926 fatal(
"%s: bad ioctl %d\n", req);
936 Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
938 TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
941 "CPU %d is put to sleep\n", tc->
cpuId());
947 return ((
Addr)gpuNum << 61) + 0x1000000000000
L;
953 return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFF
L;
959 return ((
Addr)gpuNum << 61) + 0x100000000
L;
967 return ((
Addr)0x1 << 48);
973 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
979 return ((
Addr)gpuNum << 61) + 0x0;
987 return ((
Addr)0x2 << 48);
993 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
1001 DPRINTF(GPUDriver,
"Registering [%p - %p] with MTYPE %d\n",
1002 range.
start(), range.
end(), mtype);
1004 "Attempted to double register Mtypes for [%p - %p]\n",
1011 auto vma =
gpuVmas.contains(start);
1013 assert((vma->first.start() == start));
1014 Addr size = vma->first.size();
1015 DPRINTF(GPUDriver,
"Unregistering [%p - %p]\n", vma->first.start(),
1028 auto vma =
gpuVmas.contains(range);
1030 DPRINTF(GPUShader,
"Setting req from [%p - %p] MTYPE %d\n"
1031 "%d\n", range.
start(), range.
end(), vma->second);
1032 req->setCacheCoherenceFlags(vma->second);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
std::unordered_map< ThreadContext *, EventList > TCEvents
virtual void signalWakeupEvent(uint32_t event_id)
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
#define KFD_SIGNAL_EVENT_LIMIT
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Addr start() const
Get the start address of the range.
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Addr deallocateGpuVma(Addr start)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
void attachDriver(GPUComputeDriver *driver)
#define AMDKFD_IOC_GET_TILE_CONFIG
AddrRange RangeSize(Addr start, Addr size)
#define AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_DESTROY_EVENT
void set(Type mask)
Set all flag's bits matching the given mask.
virtual BaseMMU * getMMUPtr()=0
void clear()
Clear all flag's bits.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Addr gpuVmApeLimit(Addr apeBase) const
virtual void activate()=0
Set the status to Active.
HSAPacketProcessor & hsaPacketProc()
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOCTL_MINOR_VERSION
#define KFD_MMAP_GPU_ID(gpu_id)
virtual int cpuId() const =0
#define AMDKFD_IOC_GET_VERSION
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
std::shared_ptr< MemState > memState
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
This proxy attempts to translate virtual addresses using the TLBs.
#define KFD_MMAP_TYPE_MASK
std::unordered_map< uint32_t, ETEntry > ETable
#define AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_DBG_REGISTER
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Request::CacheCoherenceFlags defaultMtype
virtual std::string name() const
const int size
buffer size
#define AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_SET_CU_MASK
uint64_t Tick
Tick count type.
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
std::shared_ptr< Request > RequestPtr
Addr ldsApeBaseV9() const
#define KFD_IOCTL_MAJOR_VERSION
This object is a proxy for a port or other object which implements the functional response protocol,...
Addr scratchApeBase(int gpuNum) const
#define AMDKFD_IOC_RESET_EVENT
GPUComputeDriver(const Params &p)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Addr end() const
Get the end address of the range.
#define KFD_IOC_EVENT_SIGNAL
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Addr scratchApeBaseV9() const
#define AMDKFD_IOC_IMPORT_DMABUF
#define KFD_MMAP_TYPE_EVENTS
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
static constexpr T divCeil(const T &a, const U &b)
virtual Process * getProcessPtr()=0
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
void scheduleWakeup(Tick wakeup_delay)
virtual void suspend()=0
Set the status to Suspended.
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
GPUCommandProcessor * device
GPU that is controlled by this driver.
#define AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
#define AMDKFD_IOC_SET_MEMORY_POLICY
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
#define AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_SMI_EVENTS
GPUComputeDriverParams Params
#define AMDKFD_IOC_UPDATE_QUEUE
Addr ldsApeLimit(Addr apeBase) const
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
const char * description() const override
Return a C string describing the event.
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Addr scratchApeLimit(Addr apeBase) const
Addr ldsApeBase(int gpuNum) const
const std::string & filename
filename for opening this driver (under /dev)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
#define KFD_MMAP_TYPE_DOORBELL
#define AMDKFD_IOC_GET_DMABUF_INFO
Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17