42 #include "debug/GPUDriver.hh"
43 #include "debug/GPUShader.hh"
50 #include "params/GPUComputeDriver.hh"
59 isdGPU(
p.isdGPU), gfxVersion(
p.gfxVersion), dGPUPoolID(
p.dGPUPoolID),
60 eventPage(0), eventSlotIndex(0)
63 DPRINTF(GPUDriver,
"Constructing KFD: device\n");
67 if (MtypeFlags::SHARED &
p.m_type)
70 if (MtypeFlags::READ_WRITE &
p.m_type)
73 if (MtypeFlags::CACHED &
p.m_type)
80 return "DriverWakeupEvent";
91 auto device_fd_entry = std::make_shared<DeviceFDEntry>(
this,
filename);
92 int tgt_fd = process->fds->allocFD(device_fd_entry);
102 int prot,
int tgt_flags,
int tgt_fd, off_t
offset)
109 DPRINTF(GPUDriver,
"amdkfd mmap (start: %p, length: 0x%x,"
110 "offset: 0x%x)\n", start, length,
offset);
114 DPRINTF(GPUDriver,
"amdkfd mmap type DOORBELL offset\n");
115 start = mem_state->extendMmap(length);
120 DPRINTF(GPUDriver,
"amdkfd mmap type EVENTS offset\n");
122 "Start address should be provided by KFD\n");
124 "Requested length %d, expected length %d; length "
132 eventPage = mem_state->extendMmap(length);
137 warn_once(
"Unrecognized kfd mmap type %llx\n", mmap_type);
158 fatal(
"%s: Exceeded maximum number of HSA queues allowed\n",
name());
171 args->ring_base_address, args->queue_id,
180 driver->schedule(
this,
curTick() + wakeup_delay);
187 "Trying wakeup on an event that is not yet created\n");
188 if (
ETable[event_id].threadWaiting) {
190 "No thread context to wake up\n");
193 "Signal event: Waking up CPU %d\n", tc->
cpuId());
206 ETable[event_id].setEvent =
true;
214 "Timer event: Waking up CPU %d\n", tc->cpuId());
216 driver->TCEvents[tc].clearEvents();
231 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_VERSION\n");
242 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
253 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
254 "queue offset %d\n", args->queue_id);
274 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
279 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
285 args->system_clock_freq = 1000000000;
292 args->gpu_clock_counter = elapsed_nsec;
293 args->cpu_clock_counter = elapsed_nsec;
294 args->system_clock_counter = elapsed_nsec;
301 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
304 args->num_of_nodes = 1;
312 for (
int i = 0;
i < args->num_of_nodes; ++
i) {
321 case GfxVersion::gfx801:
322 case GfxVersion::gfx803:
323 args->process_apertures[
i].scratch_base =
325 args->process_apertures[
i].lds_base =
328 case GfxVersion::gfx900:
329 args->process_apertures[
i].scratch_base =
331 args->process_apertures[
i].lds_base =
335 fatal(
"Invalid gfx version\n");
339 args->process_apertures[
i].scratch_limit =
342 args->process_apertures[
i].lds_limit =
346 case GfxVersion::gfx801:
347 args->process_apertures[
i].gpuvm_base =
349 args->process_apertures[
i].gpuvm_limit =
352 case GfxVersion::gfx803:
353 case GfxVersion::gfx900:
354 case GfxVersion::gfx902:
356 args->process_apertures[
i].gpuvm_base = 0x1000000ull;
358 args->process_apertures[
i].gpuvm_limit =
359 0x0000800000000000ULL - 1;
362 fatal(
"Invalid gfx version");
376 case GfxVersion::gfx803:
377 args->process_apertures[
i].gpu_id = 50156;
379 case GfxVersion::gfx900:
380 args->process_apertures[
i].gpu_id = 22124;
383 fatal(
"Invalid gfx version for dGPU\n");
387 case GfxVersion::gfx801:
388 case GfxVersion::gfx902:
389 args->process_apertures[
i].gpu_id = 2765;
392 fatal(
"Invalid gfx version for APU\n");
396 DPRINTF(GPUDriver,
"GPUVM base for node[%i] = %#x\n",
i,
397 args->process_apertures[
i].gpuvm_base);
398 DPRINTF(GPUDriver,
"GPUVM limit for node[%i] = %#x\n",
i,
399 args->process_apertures[
i].gpuvm_limit);
401 DPRINTF(GPUDriver,
"LDS base for node[%i] = %#x\n",
i,
402 args->process_apertures[
i].lds_base);
403 DPRINTF(GPUDriver,
"LDS limit for node[%i] = %#x\n",
i,
404 args->process_apertures[
i].lds_limit);
406 DPRINTF(GPUDriver,
"Scratch base for node[%i] = %#x\n",
i,
407 args->process_apertures[
i].scratch_base);
408 DPRINTF(GPUDriver,
"Scratch limit for node[%i] = %#x\n",
i,
409 args->process_apertures[
i].scratch_limit);
417 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
419 assert(bits<Addr>(args->process_apertures[
i].scratch_base, 63,
421 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
423 assert(bits<Addr>(args->process_apertures[
i].scratch_limit, 63,
425 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
427 assert(bits<Addr>(args->process_apertures[
i].lds_base, 63,
429 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
431 assert(bits<Addr>(args->process_apertures[
i].lds_limit, 63,
440 warn(
"unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
445 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_CREATE_EVENT\n");
450 warn(
"Signal events are only supported currently\n");
452 fatal(
"Signal event wasn't created; signal limit reached\n");
456 uint64_t page_index = 0;
463 args->event_trigger_data = args->event_id;
464 DPRINTF(GPUDriver,
"amdkfd create events"
465 "(event_id: 0x%x, offset: 0x%x)\n",
466 args->event_id, args->event_page_offset);
479 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
482 DPRINTF(GPUDriver,
"amdkfd destroying event %d\n", args->event_id);
484 "Event ID invalid, cannot destroy this event\n");
485 ETable.erase(args->event_id);
490 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_SET_EVENTS\n");
493 DPRINTF(GPUDriver,
"amdkfd set event %d\n", args->event_id);
495 "Event ID invlaid, cannot set this event\n");
496 ETable[args->event_id].setEvent =
true;
502 warn(
"unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
507 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
512 DPRINTF(GPUDriver,
"amdkfd wait for events"
513 "(wait on all: %d, timeout : %d, num_events: %s)\n",
514 args->wait_for_all, args->timeout, args->num_events);
515 panic_if(args->wait_for_all != 0 && args->num_events > 1,
516 "Wait for all events not supported\n");
517 bool should_sleep =
true;
521 TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
522 std::make_tuple(
this, tc));
523 DPRINTF(GPUDriver,
"\tamdkfd creating event list"
524 " for thread %d\n", tc->
cpuId());
527 "There are %d events that put this thread to sleep,"
528 " this thread should not be running\n",
530 for (
int i = 0;
i < args->num_events;
i++) {
532 "Event pointer invalid\n");
536 EventData.
copyIn(virt_proxy);
538 "\tamdkfd wait for event %d\n", EventData->event_id);
540 "Event ID invalid, cannot set this event\n");
541 if (
ETable[EventData->event_id].threadWaiting)
542 warn(
"Multiple threads waiting on the same event\n");
543 if (
ETable[EventData->event_id].setEvent) {
546 ETable[EventData->event_id].setEvent =
false;
547 should_sleep =
false;
551 ETable[EventData->event_id].threadWaiting =
true;
552 ETable[EventData->event_id].tc = tc;
553 TCEvents[tc].signalEvents.insert(EventData->event_id);
561 args->wait_result = 0;
574 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
579 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
584 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
589 warn(
"unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
594 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
599 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
604 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
610 "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
615 ioc_args.
copyIn(virt_proxy);
616 ioc_args->num_of_nodes = 1;
618 for (
int i = 0;
i < ioc_args->num_of_nodes; ++
i) {
620 (ioc_args->kfd_process_device_apertures_ptr);
623 case GfxVersion::gfx801:
624 case GfxVersion::gfx803:
628 case GfxVersion::gfx900:
633 fatal(
"Invalid gfx version\n");
637 ape_args->scratch_limit =
639 ape_args->lds_limit =
ldsApeLimit(ape_args->lds_base);
642 case GfxVersion::gfx801:
644 ape_args->gpuvm_limit =
647 case GfxVersion::gfx803:
648 case GfxVersion::gfx900:
649 case GfxVersion::gfx902:
651 ape_args->gpuvm_base = 0x1000000ull;
653 ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
656 fatal(
"Invalid gfx version\n");
662 case GfxVersion::gfx803:
663 ape_args->gpu_id = 50156;
665 case GfxVersion::gfx900:
666 ape_args->gpu_id = 22124;
669 fatal(
"Invalid gfx version for dGPU\n");
673 case GfxVersion::gfx801:
674 case GfxVersion::gfx902:
675 ape_args->gpu_id = 2765;
678 fatal(
"Invalid gfx version for APU\n");
682 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
683 assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
684 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
685 assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
686 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
687 assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
688 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
689 assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
699 warn(
"unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
720 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
726 GEM5_VAR_USED
Addr mmap_offset = 0;
732 bool cacheable =
true;
735 DPRINTF(GPUDriver,
"amdkfd allocation type: VRAM\n");
736 args->mmap_offset = args->va_addr;
756 pa_addr = process->system->allocPhysPages(npages,
dGPUPoolID);
763 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
764 "%d\n", args->va_addr, pa_addr, args->
size);
767 DPRINTF(GPUDriver,
"amdkfd allocation type: USERPTR\n");
768 mmap_offset = args->mmap_offset;
771 pa_addr = process->system->allocPhysPages(npages);
773 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
774 "%d\n", args->va_addr, pa_addr, args->
size);
783 DPRINTF(GPUDriver,
"amdkfd allocation type: GTT\n");
784 args->mmap_offset = args->va_addr;
792 pa_addr = process->system->allocPhysPages(npages);
794 DPRINTF(GPUDriver,
"Mapping VA %p to framebuffer PA %p size "
795 "%d\n", args->va_addr, pa_addr, args->
size);
807 DPRINTF(GPUDriver,
"amdkfd allocation type: DOORBELL\n");
817 DPRINTF(GPUDriver,
"amdkfd allocation arguments: va_addr %p "
818 "size %lu, mmap_offset %p, gpu_id %d\n",
819 args->va_addr, args->
size, mmap_offset, args->gpu_id);
823 process->pTable->map(args->va_addr, pa_addr, args->
size,
840 args->handle= args->va_addr;
846 DPRINTF(GPUDriver,
"ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
851 DPRINTF(GPUDriver,
"amdkfd free arguments: handle %p ",
856 process->pTable->unmap(args->handle, size);
875 warn(
"unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
880 warn(
"unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
885 warn(
"unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
890 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
895 warn(
"unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
900 warn(
"unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
905 warn(
"unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
910 warn(
"unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
914 fatal(
"%s: bad ioctl %d\n", req);
924 Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
926 TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
929 "CPU %d is put to sleep\n", tc->
cpuId());
935 return ((
Addr)gpuNum << 61) + 0x1000000000000
L;
941 return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFF
L;
947 return ((
Addr)gpuNum << 61) + 0x100000000
L;
955 return ((
Addr)0x1 << 48);
961 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
967 return ((
Addr)gpuNum << 61) + 0x0;
975 return ((
Addr)0x2 << 48);
981 return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
989 DPRINTF(GPUDriver,
"Registering [%p - %p] with MTYPE %d\n",
992 "Attempted to double register Mtypes for [%p - %p]\n",
999 auto vma =
gpuVmas.contains(start);
1001 assert((vma->first.start() == start));
1002 Addr size = vma->first.size();
1003 DPRINTF(GPUDriver,
"Unregistering [%p - %p]\n", vma->first.start(),
1015 auto vma =
gpuVmas.contains(range);
1017 DPRINTF(GPUShader,
"Setting req from [%p - %p] MTYPE %d\n"
1018 "%d\n", range.
start(), range.
end(), vma->second);
1019 req->setCacheCoherenceFlags(vma->second);