gem5  v21.1.0.2
gpu_compute_driver.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <memory>
37 
38 #include "base/compiler.hh"
39 #include "base/logging.hh"
40 #include "base/trace.hh"
41 #include "cpu/thread_context.hh"
42 #include "debug/GPUDriver.hh"
43 #include "debug/GPUShader.hh"
46 #include "dev/hsa/kfd_ioctl.h"
48 #include "gpu-compute/shader.hh"
49 #include "mem/port_proxy.hh"
50 #include "params/GPUComputeDriver.hh"
51 #include "sim/process.hh"
52 #include "sim/syscall_emul_buf.hh"
53 
54 namespace gem5
55 {
56 
58  : EmulatedDriver(p), device(p.device), queueId(0),
59  isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID),
60  eventPage(0), eventSlotIndex(0)
61 {
62  device->attachDriver(this);
63  DPRINTF(GPUDriver, "Constructing KFD: device\n");
64 
65  // Convert the 3 bit mtype specified in Shader.py to the proper type
66  // used for requests.
67  if (MtypeFlags::SHARED & p.m_type)
69 
70  if (MtypeFlags::READ_WRITE & p.m_type)
72 
73  if (MtypeFlags::CACHED & p.m_type)
75 }
76 
77 const char*
79 {
80  return "DriverWakeupEvent";
81 }
82 
86 int
88 {
89  DPRINTF(GPUDriver, "Opened %s\n", filename);
90  auto process = tc->getProcessPtr();
91  auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
92  int tgt_fd = process->fds->allocFD(device_fd_entry);
93  return tgt_fd;
94 }
95 
100 Addr
101 GPUComputeDriver::mmap(ThreadContext *tc, Addr start, uint64_t length,
102  int prot, int tgt_flags, int tgt_fd, off_t offset)
103 {
104  auto process = tc->getProcessPtr();
105  auto mem_state = process->memState;
106 
107  Addr pg_off = offset >> PAGE_SHIFT;
108  Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
109  DPRINTF(GPUDriver, "amdkfd mmap (start: %p, length: 0x%x,"
110  "offset: 0x%x)\n", start, length, offset);
111 
112  switch(mmap_type) {
114  DPRINTF(GPUDriver, "amdkfd mmap type DOORBELL offset\n");
115  start = mem_state->extendMmap(length);
116  process->pTable->map(start, device->hsaPacketProc().pioAddr,
117  length, false);
118  break;
120  DPRINTF(GPUDriver, "amdkfd mmap type EVENTS offset\n");
121  panic_if(start != 0,
122  "Start address should be provided by KFD\n");
123  panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
124  "Requested length %d, expected length %d; length "
125  "mismatch\n", length, 8* KFD_SIGNAL_EVENT_LIMIT);
131  if (!eventPage) {
132  eventPage = mem_state->extendMmap(length);
133  start = eventPage;
134  }
135  break;
136  default:
137  warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
138  break;
139  }
140 
141  return start;
142 }
143 
151 void
153 {
155  args.copyIn(mem_proxy);
156 
157  if ((doorbellSize() * queueId) > 4096) {
158  fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
159  }
160 
161  args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
162  KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
163 
164  // for vega offset needs to include exact value of doorbell
165  if (doorbellSize())
166  args->doorbell_offset += queueId * doorbellSize();
167 
168  args->queue_id = queueId++;
169  auto &hsa_pp = device->hsaPacketProc();
170  hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
171  args->ring_base_address, args->queue_id,
172  args->ring_size, doorbellSize());
173  args.copyOut(mem_proxy);
174 }
175 
176 void
178 {
179  assert(driver);
180  driver->schedule(this, curTick() + wakeup_delay);
181 }
182 
183 void
185 {
186  panic_if(event_id >= eventSlotIndex,
187  "Trying wakeup on an event that is not yet created\n");
188  if (ETable[event_id].threadWaiting) {
189  panic_if(!ETable[event_id].tc,
190  "No thread context to wake up\n");
191  ThreadContext *tc = ETable[event_id].tc;
192  DPRINTF(GPUDriver,
193  "Signal event: Waking up CPU %d\n", tc->cpuId());
194  // Remove events that can wakeup this thread
195  TCEvents[tc].clearEvents();
196  // Now wakeup this thread
197  tc->activate();
198  } else {
199  // This may be a race condition between an ioctl call asking to wait on
200  // this event and this signalWakeupEvent. Taking care of this race
201  // condition here by setting the event here. The ioctl call should take
202  // the necessary action when waiting on an already set event. However,
203  // this may be a genuine instance in which the runtime has decided not
204  // to wait on this event. But since we cannot distinguish this case with
205  // the race condition, we are any way setting the event.
206  ETable[event_id].setEvent = true;
207  }
208 }
209 
210 void
212 {
213  DPRINTF(GPUDriver,
214  "Timer event: Waking up CPU %d\n", tc->cpuId());
215  // Remove events that can wakeup this thread
216  driver->TCEvents[tc].clearEvents();
217  // Now wakeup this thread
218  tc->activate();
219 }
220 
221 int
222 GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
223 {
224  auto &virt_proxy = tc->getVirtProxy();
225  auto process = tc->getProcessPtr();
226  auto mem_state = process->memState;
227 
228  switch (req) {
230  {
231  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
232 
234  args->major_version = KFD_IOCTL_MAJOR_VERSION;
235  args->minor_version = KFD_IOCTL_MINOR_VERSION;
236 
237  args.copyOut(virt_proxy);
238  }
239  break;
241  {
242  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
243 
244  allocateQueue(virt_proxy, ioc_buf);
245 
246  DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
247  }
248  break;
250  {
252  args.copyIn(virt_proxy);
253  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
254  "queue offset %d\n", args->queue_id);
255  device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
256  doorbellSize());
257  }
258  break;
260  {
274  warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
275  }
276  break;
278  {
279  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
280 
282  args.copyIn(virt_proxy);
283 
284  // Set nanosecond resolution
285  args->system_clock_freq = 1000000000;
286 
291  uint64_t elapsed_nsec = curTick() / sim_clock::as_int::ns;
292  args->gpu_clock_counter = elapsed_nsec;
293  args->cpu_clock_counter = elapsed_nsec;
294  args->system_clock_counter = elapsed_nsec;
295 
296  args.copyOut(virt_proxy);
297  }
298  break;
300  {
301  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
302 
304  args->num_of_nodes = 1;
305 
312  for (int i = 0; i < args->num_of_nodes; ++i) {
320  switch (gfxVersion) {
321  case GfxVersion::gfx801:
322  case GfxVersion::gfx803:
323  args->process_apertures[i].scratch_base =
324  scratchApeBase(i + 1);
325  args->process_apertures[i].lds_base =
326  ldsApeBase(i + 1);
327  break;
328  case GfxVersion::gfx900:
329  args->process_apertures[i].scratch_base =
331  args->process_apertures[i].lds_base =
332  ldsApeBaseV9();
333  break;
334  default:
335  fatal("Invalid gfx version\n");
336  }
337 
338  // GFX8 and GFX9 set lds and scratch limits the same way
339  args->process_apertures[i].scratch_limit =
340  scratchApeLimit(args->process_apertures[i].scratch_base);
341 
342  args->process_apertures[i].lds_limit =
343  ldsApeLimit(args->process_apertures[i].lds_base);
344 
345  switch (gfxVersion) {
346  case GfxVersion::gfx801:
347  args->process_apertures[i].gpuvm_base =
348  gpuVmApeBase(i + 1);
349  args->process_apertures[i].gpuvm_limit =
350  gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
351  break;
352  case GfxVersion::gfx803:
353  case GfxVersion::gfx900:
354  case GfxVersion::gfx902:
355  // Taken from SVM_USE_BASE in Linux kernel
356  args->process_apertures[i].gpuvm_base = 0x1000000ull;
357  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
358  args->process_apertures[i].gpuvm_limit =
359  0x0000800000000000ULL - 1;
360  break;
361  default:
362  fatal("Invalid gfx version");
363  }
364 
365  // NOTE: Must match ID populated by hsaTopology.py
366  //
367  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/
368  // blob/6a986c0943e9acd8c4c0cf2a9d510ff42167b43f/include/uapi/
369  // linux/kfd_ioctl.h#L564
370  //
371  // The gpu_id is a device identifier used by the driver for
372  // ioctls that allocate arguments. Each device has an unique
373  // id composed out of a non-zero base and an offset.
374  if (isdGPU) {
375  switch (gfxVersion) {
376  case GfxVersion::gfx803:
377  args->process_apertures[i].gpu_id = 50156;
378  break;
379  case GfxVersion::gfx900:
380  args->process_apertures[i].gpu_id = 22124;
381  break;
382  default:
383  fatal("Invalid gfx version for dGPU\n");
384  }
385  } else {
386  switch (gfxVersion) {
387  case GfxVersion::gfx801:
388  case GfxVersion::gfx902:
389  args->process_apertures[i].gpu_id = 2765;
390  break;
391  default:
392  fatal("Invalid gfx version for APU\n");
393  }
394  }
395 
396  DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
397  args->process_apertures[i].gpuvm_base);
398  DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,
399  args->process_apertures[i].gpuvm_limit);
400 
401  DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,
402  args->process_apertures[i].lds_base);
403  DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,
404  args->process_apertures[i].lds_limit);
405 
406  DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,
407  args->process_apertures[i].scratch_base);
408  DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,
409  args->process_apertures[i].scratch_limit);
410 
417  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
418  47) != 0x1ffff);
419  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
420  47) != 0);
421  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
422  47) != 0x1ffff);
423  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
424  47) != 0);
425  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
426  47) != 0x1ffff);
427  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
428  47) != 0);
429  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
430  47) != 0x1ffff);
431  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
432  47) != 0);
433  }
434 
435  args.copyOut(virt_proxy);
436  }
437  break;
439  {
440  warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
441  }
442  break;
444  {
445  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");
446 
448  args.copyIn(virt_proxy);
449  if (args->event_type != KFD_IOC_EVENT_SIGNAL) {
450  warn("Signal events are only supported currently\n");
451  } else if (eventSlotIndex == SLOTS_PER_PAGE) {
452  fatal("Signal event wasn't created; signal limit reached\n");
453  }
454  // Currently, we allocate only one signal_page for events.
455  // Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT
456  uint64_t page_index = 0;
457  args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);
458  args->event_page_offset <<= PAGE_SHIFT;
459  // TODO: Currently we support only signal events, hence using
460  // the same ID for both signal slot and event slot
461  args->event_slot_index = eventSlotIndex;
462  args->event_id = eventSlotIndex++;
463  args->event_trigger_data = args->event_id;
464  DPRINTF(GPUDriver, "amdkfd create events"
465  "(event_id: 0x%x, offset: 0x%x)\n",
466  args->event_id, args->event_page_offset);
467  // Since eventSlotIndex is increased everytime a new event is
468  // created ETable at eventSlotIndex(event_id) is guaranteed to be
469  // empty. In a future implementation that reuses deleted event_ids,
470  // we should check if event table at this
471  // eventSlotIndex(event_id) is empty before inserting a new event
472  // table entry
473  ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));
474  args.copyOut(virt_proxy);
475  }
476  break;
478  {
479  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
481  args.copyIn(virt_proxy);
482  DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);
483  fatal_if(ETable.count(args->event_id) == 0,
484  "Event ID invalid, cannot destroy this event\n");
485  ETable.erase(args->event_id);
486  }
487  break;
489  {
490  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");
492  args.copyIn(virt_proxy);
493  DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);
494  fatal_if(ETable.count(args->event_id) == 0,
495  "Event ID invlaid, cannot set this event\n");
496  ETable[args->event_id].setEvent = true;
497  signalWakeupEvent(args->event_id);
498  }
499  break;
501  {
502  warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
503  }
504  break;
506  {
507  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
509  args.copyIn(virt_proxy);
510  kfd_event_data *events =
511  (kfd_event_data *)args->events_ptr;
512  DPRINTF(GPUDriver, "amdkfd wait for events"
513  "(wait on all: %d, timeout : %d, num_events: %s)\n",
514  args->wait_for_all, args->timeout, args->num_events);
515  panic_if(args->wait_for_all != 0 && args->num_events > 1,
516  "Wait for all events not supported\n");
517  bool should_sleep = true;
518  if (TCEvents.count(tc) == 0) {
519  // This thread context trying to wait on an event for the first
520  // time, initialize it.
521  TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
522  std::make_tuple(this, tc));
523  DPRINTF(GPUDriver, "\tamdkfd creating event list"
524  " for thread %d\n", tc->cpuId());
525  }
526  panic_if(TCEvents[tc].signalEvents.size() != 0,
527  "There are %d events that put this thread to sleep,"
528  " this thread should not be running\n",
529  TCEvents[tc].signalEvents.size());
530  for (int i = 0; i < args->num_events; i++) {
531  panic_if(!events,
532  "Event pointer invalid\n");
533  Addr eventDataAddr = (Addr)(events + i);
535  eventDataAddr, sizeof(kfd_event_data));
536  EventData.copyIn(virt_proxy);
537  DPRINTF(GPUDriver,
538  "\tamdkfd wait for event %d\n", EventData->event_id);
539  panic_if(ETable.count(EventData->event_id) == 0,
540  "Event ID invalid, cannot set this event\n");
541  if (ETable[EventData->event_id].threadWaiting)
542  warn("Multiple threads waiting on the same event\n");
543  if (ETable[EventData->event_id].setEvent) {
544  // If event is already set, the event has already happened.
545  // Just unset the event and dont put this thread to sleep.
546  ETable[EventData->event_id].setEvent = false;
547  should_sleep = false;
548  }
549  if (should_sleep) {
550  // Put this thread to sleep
551  ETable[EventData->event_id].threadWaiting = true;
552  ETable[EventData->event_id].tc = tc;
553  TCEvents[tc].signalEvents.insert(EventData->event_id);
554  }
555  }
556 
557  // TODO: Return the correct wait_result back. Currently, returning
558  // success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.
559  // Ideally, this needs to be done after the event is triggered and
560  // after the thread is woken up.
561  args->wait_result = 0;
562  args.copyOut(virt_proxy);
563  if (should_sleep) {
564  // Put this thread to sleep
565  sleepCPU(tc, args->timeout);
566  } else {
567  // Remove events that tried to put this thread to sleep
568  TCEvents[tc].clearEvents();
569  }
570  }
571  break;
573  {
574  warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
575  }
576  break;
578  {
579  warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
580  }
581  break;
583  {
584  warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
585  }
586  break;
588  {
589  warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
590  }
591  break;
593  {
594  warn("unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
595  }
596  break;
598  {
599  warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
600  }
601  break;
603  {
604  warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
605  }
606  break;
608  {
609  DPRINTF(GPUDriver,
610  "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
611 
613  ioc_args(ioc_buf);
614 
615  ioc_args.copyIn(virt_proxy);
616  ioc_args->num_of_nodes = 1;
617 
618  for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
620  (ioc_args->kfd_process_device_apertures_ptr);
621 
622  switch (gfxVersion) {
623  case GfxVersion::gfx801:
624  case GfxVersion::gfx803:
625  ape_args->scratch_base = scratchApeBase(i + 1);
626  ape_args->lds_base = ldsApeBase(i + 1);
627  break;
628  case GfxVersion::gfx900:
629  ape_args->scratch_base = scratchApeBaseV9();
630  ape_args->lds_base = ldsApeBaseV9();
631  break;
632  default:
633  fatal("Invalid gfx version\n");
634  }
635 
636  // GFX8 and GFX9 set lds and scratch limits the same way
637  ape_args->scratch_limit =
638  scratchApeLimit(ape_args->scratch_base);
639  ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
640 
641  switch (gfxVersion) {
642  case GfxVersion::gfx801:
643  ape_args->gpuvm_base = gpuVmApeBase(i + 1);
644  ape_args->gpuvm_limit =
645  gpuVmApeLimit(ape_args->gpuvm_base);
646  break;
647  case GfxVersion::gfx803:
648  case GfxVersion::gfx900:
649  case GfxVersion::gfx902:
650  // Taken from SVM_USE_BASE in Linux kernel
651  ape_args->gpuvm_base = 0x1000000ull;
652  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
653  ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
654  break;
655  default:
656  fatal("Invalid gfx version\n");
657  }
658 
659  // NOTE: Must match ID populated by hsaTopology.py
660  if (isdGPU) {
661  switch (gfxVersion) {
662  case GfxVersion::gfx803:
663  ape_args->gpu_id = 50156;
664  break;
665  case GfxVersion::gfx900:
666  ape_args->gpu_id = 22124;
667  break;
668  default:
669  fatal("Invalid gfx version for dGPU\n");
670  }
671  } else {
672  switch (gfxVersion) {
673  case GfxVersion::gfx801:
674  case GfxVersion::gfx902:
675  ape_args->gpu_id = 2765;
676  break;
677  default:
678  fatal("Invalid gfx version for APU\n");
679  }
680  }
681 
682  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
683  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
684  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
685  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
686  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
687  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
688  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
689  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
690 
691  ape_args.copyOut(virt_proxy);
692  }
693 
694  ioc_args.copyOut(virt_proxy);
695  }
696  break;
698  {
699  warn("unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
700  }
701  break;
719  {
720  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
722  args.copyIn(virt_proxy);
723 
724  assert(isdGPU || gfxVersion == GfxVersion::gfx902);
725  assert((args->va_addr % TheISA::PageBytes) == 0);
726  GEM5_VAR_USED Addr mmap_offset = 0;
727 
729  Addr pa_addr = 0;
730 
731  int npages = divCeil(args->size, (int64_t)TheISA::PageBytes);
732  bool cacheable = true;
733 
734  if (KFD_IOC_ALLOC_MEM_FLAGS_VRAM & args->flags) {
735  DPRINTF(GPUDriver, "amdkfd allocation type: VRAM\n");
736  args->mmap_offset = args->va_addr;
737  // VRAM allocations are device memory mapped into GPUVM
738  // space.
739  //
740  // We can't rely on the lazy host allocator (fixupFault) to
741  // handle this mapping since it needs to be placed in dGPU
742  // framebuffer memory. The lazy allocator will try to place
743  // this in host memory.
744  //
745  // TODO: We don't have the appropriate bifurcation of the
746  // physical address space with different memory controllers
747  // yet. This is where we will explicitly add the PT maps to
748  // dGPU memory in the future.
749  //
750  // Bind the VA space to the dGPU physical memory pool. Mark
751  // this region as Uncacheable. The Uncacheable flag is only
752  // really used by the CPU and is ignored by the GPU. We mark
753  // this as uncacheable from the CPU so that we can implement
754  // direct CPU framebuffer access similar to what we currently
755  // offer in real HW through the so-called Large BAR feature.
756  pa_addr = process->system->allocPhysPages(npages, dGPUPoolID);
757  //
758  // TODO: Uncacheable accesses need to be supported by the
759  // CPU-side protocol for this to work correctly. I believe
760  // it only works right now if the physical memory is MMIO
761  cacheable = false;
762 
763  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
764  "%d\n", args->va_addr, pa_addr, args->size);
765 
766  } else if (KFD_IOC_ALLOC_MEM_FLAGS_USERPTR & args->flags) {
767  DPRINTF(GPUDriver, "amdkfd allocation type: USERPTR\n");
768  mmap_offset = args->mmap_offset;
769  // USERPTR allocations are system memory mapped into GPUVM
770  // space. The user provides the driver with the pointer.
771  pa_addr = process->system->allocPhysPages(npages);
772 
773  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
774  "%d\n", args->va_addr, pa_addr, args->size);
775 
776  // If the HSA runtime requests system coherent memory, than we
777  // need to explicity mark this region as uncacheable from the
778  // perspective of the GPU.
779  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
780  mtype.clear();
781 
782  } else if (KFD_IOC_ALLOC_MEM_FLAGS_GTT & args->flags) {
783  DPRINTF(GPUDriver, "amdkfd allocation type: GTT\n");
784  args->mmap_offset = args->va_addr;
785  // GTT allocations are system memory mapped into GPUVM space.
786  // It's different than a USERPTR allocation since the driver
787  // itself allocates the physical memory on the host.
788  //
789  // We will lazily map it into host memory on first touch. The
790  // fixupFault will find the original SVM aperture mapped to the
791  // host.
792  pa_addr = process->system->allocPhysPages(npages);
793 
794  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
795  "%d\n", args->va_addr, pa_addr, args->size);
796 
797  // If the HSA runtime requests system coherent memory, than we
798  // need to explicity mark this region as uncacheable from the
799  // perspective of the GPU.
800  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
801  mtype.clear();
802 
803  // Note that for GTT the thunk layer needs to call mmap on the
804  // driver FD later if it wants the host to have access to this
805  // memory (which it probably does). This will be ignored.
806  } else if (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL & args->flags) {
807  DPRINTF(GPUDriver, "amdkfd allocation type: DOORBELL\n");
808  // DOORBELL allocations are the queue doorbells that are
809  // memory mapped into GPUVM space.
810  //
811  // Explicitly map this virtual address to our PIO doorbell
812  // interface in the page tables (non-cacheable)
813  pa_addr = device->hsaPacketProc().pioAddr;
814  cacheable = false;
815  }
816 
817  DPRINTF(GPUDriver, "amdkfd allocation arguments: va_addr %p "
818  "size %lu, mmap_offset %p, gpu_id %d\n",
819  args->va_addr, args->size, mmap_offset, args->gpu_id);
820 
821  // Bind selected physical memory to provided virtual address range
822  // in X86 page tables.
823  process->pTable->map(args->va_addr, pa_addr, args->size,
824  cacheable);
825 
826  // We keep track of allocated regions of GPU mapped memory,
827  // just like the driver would. This allows us to provide the
828  // user with a unique handle for a given allocation. The user
829  // will only provide us with a handle after allocation and expect
830  // us to be able to use said handle to extract all the properties
831  // of the region.
832  //
833  // This is a simplified version of regular system VMAs, but for
834  // GPUVM space (non of the clobber/remap nonsense we find in real
835  // OS managed memory).
836  allocateGpuVma(mtype, args->va_addr, args->size);
837 
838  // Used by the runtime to uniquely identify this allocation.
839  // We can just use the starting address of the VMA region.
840  args->handle= args->va_addr;
841  args.copyOut(virt_proxy);
842  }
843  break;
845  {
846  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
848  args.copyIn(virt_proxy);
849 
850  assert(isdGPU);
851  DPRINTF(GPUDriver, "amdkfd free arguments: handle %p ",
852  args->handle);
853 
854  // We don't recycle physical pages in SE mode
855  Addr size = deallocateGpuVma(args->handle);
856  process->pTable->unmap(args->handle, size);
857 
858  // TODO: IOMMU and GPUTLBs do not seem to correctly support
859  // shootdown. This is also a potential issue for APU systems
860  // that perform unmap or remap with system memory.
861  tc->getMMUPtr()->flushAll();
862 
863  args.copyOut(virt_proxy);
864  }
865  break;
874  {
875  warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
876  }
877  break;
879  {
880  warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
881  }
882  break;
884  {
885  warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
886  }
887  break;
889  {
890  warn("unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
891  }
892  break;
894  {
895  warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
896  }
897  break;
899  {
900  warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
901  }
902  break;
904  {
905  warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
906  }
907  break;
909  {
910  warn("unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
911  }
912  break;
913  default:
914  fatal("%s: bad ioctl %d\n", req);
915  break;
916  }
917  return 0;
918 }
919 
920 void
921 GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
922 {
923  // Convert millisecs to ticks
924  Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
925  assert(TCEvents.count(tc) == 1);
926  TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
927  tc->suspend();
928  DPRINTF(GPUDriver,
929  "CPU %d is put to sleep\n", tc->cpuId());
930 }
931 
932 Addr
934 {
935  return ((Addr)gpuNum << 61) + 0x1000000000000L;
936 }
937 
938 Addr
940 {
941  return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
942 }
943 
944 Addr
946 {
947  return ((Addr)gpuNum << 61) + 0x100000000L;
948 }
949 
950 // Used for GFX9 devices
951 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
952 Addr
954 {
955  return ((Addr)0x1 << 48);
956 }
957 
958 Addr
960 {
961  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
962 }
963 
964 Addr
966 {
967  return ((Addr)gpuNum << 61) + 0x0;
968 }
969 
970 //Used for GFX9 devices
971 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
972 Addr
974 {
975  return ((Addr)0x2 << 48);
976 }
977 
978 Addr
980 {
981  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
982 }
983 
984 void
986  Addr start, Addr length)
987 {
988  AddrRange range = AddrRange(start, start + length);
989  DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n",
990  range.start(), range.end(), mtype);
991  fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(),
992  "Attempted to double register Mtypes for [%p - %p]\n",
993  range.start(), range.end());
994 }
995 
996 Addr
998 {
999  auto vma = gpuVmas.contains(start);
1000  assert(vma != gpuVmas.end());
1001  assert((vma->first.start() == start));
1002  Addr size = vma->first.size();
1003  DPRINTF(GPUDriver, "Unregistering [%p - %p]\n", vma->first.start(),
1004  vma->first.end());
1005  gpuVmas.erase(vma);
1006  return size;
1007 }
1008 
1009 void
1011 {
1012  // If we are a dGPU then set the MTYPE from our VMAs.
1013  if (isdGPU) {
1014  AddrRange range = RangeSize(req->getVaddr(), req->getSize());
1015  auto vma = gpuVmas.contains(range);
1016  assert(vma != gpuVmas.end());
1017  DPRINTF(GPUShader, "Setting req from [%p - %p] MTYPE %d\n"
1018  "%d\n", range.start(), range.end(), vma->second);
1019  req->setCacheCoherenceFlags(vma->second);
1020  // APUs always get the default MTYPE
1021  } else {
1022  req->setCacheCoherenceFlags(defaultMtype);
1023  }
1024 }
1025 
1026 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
AMDKFD_IOC_MAP_MEMORY_TO_GPU
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
Definition: kfd_ioctl.h:593
gem5::kfd_event_data
Definition: kfd_ioctl.h:307
gem5::HSAPacketProcessor::pioAddr
Addr pioAddr
Definition: hsa_packet_processor.hh:316
gem5::GPUComputeDriver::TCEvents
std::unordered_map< ThreadContext *, EventList > TCEvents
Definition: gpu_compute_driver.hh:207
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:184
gem5::GPUComputeDriver::gpuVmas
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Definition: gpu_compute_driver.hh:164
gem5::Request::READ_WRITE
@ READ_WRITE
Definition: request.hh:318
KFD_SIGNAL_EVENT_LIMIT
#define KFD_SIGNAL_EVENT_LIMIT
Definition: kfd_ioctl.h:229
gem5::GPUComputeDriver::allocateQueue
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Definition: gpu_compute_driver.cc:152
gem5::AddrRange::start
Addr start() const
Get the start address of the range.
Definition: addr_range.hh:317
warn
#define warn(...)
Definition: logging.hh:245
gem5::TypedBufferArg
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Definition: syscall_emul_buf.hh:132
gem5::GPUComputeDriver::deallocateGpuVma
Addr deallocateGpuVma(Addr start)
Definition: gpu_compute_driver.cc:997
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
Definition: kfd_ioctl.h:378
shader.hh
gem5::X86ISA::L
Bitfield< 7, 0 > L
Definition: int.hh:59
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:221
AMDKFD_IOC_GET_TILE_CONFIG
#define AMDKFD_IOC_GET_TILE_CONFIG
Definition: kfd_ioctl.h:574
gem5::RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:661
warn_once
#define warn_once(...)
Definition: logging.hh:249
AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_DBG_WAVE_CONTROL
Definition: kfd_ioctl.h:568
AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
Definition: kfd_ioctl.h:590
hsa_packet_processor.hh
AMDKFD_IOC_DESTROY_EVENT
#define AMDKFD_IOC_DESTROY_EVENT
Definition: kfd_ioctl.h:547
gem5::Flags::set
void set(Type mask)
Set all flag's bits matching the given mask.
Definition: flags.hh:116
gem5::ThreadContext::getMMUPtr
virtual BaseMMU * getMMUPtr()=0
gem5::Flags::clear
void clear()
Clear all flag's bits.
Definition: flags.hh:102
gem5::HSAPacketProcessor::unsetDeviceQueueDesc
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
Definition: hsa_packet_processor.cc:94
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Definition: kfd_ioctl.h:379
gem5::GPUComputeDriver::gpuVmApeLimit
Addr gpuVmApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:939
gem5::ThreadContext::activate
virtual void activate()=0
Set the status to Active.
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:60
KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
Definition: kfd_ioctl.h:376
syscall_emul_buf.hh
KFD_IOCTL_MINOR_VERSION
#define KFD_IOCTL_MINOR_VERSION
Definition: kfd_ioctl.h:39
KFD_MMAP_GPU_ID
#define KFD_MMAP_GPU_ID(gpu_id)
Definition: kfd_event_defines.h:53
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::ThreadContext::cpuId
virtual int cpuId() const =0
gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize)
Definition: hsa_packet_processor.cc:100
AMDKFD_IOC_GET_VERSION
#define AMDKFD_IOC_GET_VERSION
Definition: kfd_ioctl.h:523
gem5::GPUComputeDriver::allocateGpuVma
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
Definition: gpu_compute_driver.cc:985
gem5::Process::memState
std::shared_ptr< MemState > memState
Definition: process.hh:276
gem5::GPUComputeDriver::eventSlotIndex
uint32_t eventSlotIndex
Definition: gpu_compute_driver.hh:157
gem5::GPUComputeDriver::setMtype
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
Definition: gpu_compute_driver.cc:1010
KFD_MMAP_TYPE_MASK
#define KFD_MMAP_TYPE_MASK
Definition: kfd_event_defines.h:45
gem5::GPUComputeDriver::ETable
std::unordered_map< uint32_t, ETEntry > ETable
Definition: gpu_compute_driver.hh:159
AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_SET_EVENT
Definition: kfd_ioctl.h:550
gem5::Flags< CacheCoherenceFlagsType >
AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_GET_PROCESS_APERTURES
Definition: kfd_ioctl.h:538
AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_SET_TRAP_HANDLER
Definition: kfd_ioctl.h:577
AMDKFD_IOC_DBG_REGISTER
#define AMDKFD_IOC_DBG_REGISTER
Definition: kfd_ioctl.h:559
gem5::GPUComputeDriver::sleepCPU
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Definition: gpu_compute_driver.cc:921
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:93
gem5::GPUComputeDriver::defaultMtype
Request::CacheCoherenceFlags defaultMtype
Definition: gpu_compute_driver.hh:176
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::ThreadContext::getVirtProxy
virtual PortProxy & getVirtProxy()=0
gem5::GPUComputeDriver::queueId
uint32_t queueId
Definition: gpu_compute_driver.hh:152
gem5::BaseBufferArg::size
const int size
buffer size
Definition: syscall_emul_buf.hh:99
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_ACQUIRE_VM
Definition: kfd_ioctl.h:584
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
AMDKFD_IOC_SET_CU_MASK
#define AMDKFD_IOC_SET_CU_MASK
Definition: kfd_ioctl.h:599
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
AMDKFD_IOC_GET_QUEUE_WAVE_STATE
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
Definition: kfd_ioctl.h:602
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::GPUComputeDriver::ldsApeBaseV9
Addr ldsApeBaseV9() const
Definition: gpu_compute_driver.cc:973
port_proxy.hh
process.hh
PAGE_SHIFT
#define PAGE_SHIFT
Definition: kfd_event_defines.h:43
kfd_event_defines.h
KFD_IOCTL_MAJOR_VERSION
#define KFD_IOCTL_MAJOR_VERSION
Definition: kfd_ioctl.h:38
gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition: port_proxy.hh:86
gem5::GPUComputeDriver::scratchApeBase
Addr scratchApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:945
AMDKFD_IOC_RESET_EVENT
#define AMDKFD_IOC_RESET_EVENT
Definition: kfd_ioctl.h:553
gem5::GPUComputeDriver::GPUComputeDriver
GPUComputeDriver(const Params &p)
Definition: gpu_compute_driver.cc:57
KFD_IOC_ALLOC_MEM_FLAGS_GTT
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Definition: kfd_ioctl.h:377
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::AddrRange::end
Addr end() const
Get the end address of the range.
Definition: addr_range.hh:324
compiler.hh
gpu_command_processor.hh
gem5::Request::SHARED
@ SHARED
Definition: request.hh:319
KFD_IOC_EVENT_SIGNAL
#define KFD_IOC_EVENT_SIGNAL
Definition: kfd_ioctl.h:215
gem5::Request::CACHED
@ CACHED
mtype flags
Definition: request.hh:317
AMDKFD_IOC_DBG_ADDRESS_WATCH
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
Definition: kfd_ioctl.h:565
std::pair
STL pair class.
Definition: stl.hh:58
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUComputeDriver::open
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Definition: gpu_compute_driver.cc:87
gem5::GPUComputeDriver::scratchApeBaseV9
Addr scratchApeBaseV9() const
Definition: gpu_compute_driver.cc:953
gem5::GPUComputeDriver::dGPUPoolID
int dGPUPoolID
Definition: gpu_compute_driver.hh:155
gem5::GPUComputeDriver::doorbellSize
int doorbellSize()
Definition: gpu_compute_driver.hh:88
AMDKFD_IOC_IMPORT_DMABUF
#define AMDKFD_IOC_IMPORT_DMABUF
Definition: kfd_ioctl.h:608
KFD_MMAP_TYPE_EVENTS
#define KFD_MMAP_TYPE_EVENTS
Definition: kfd_event_defines.h:47
gem5::BaseBufferArg::copyIn
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
Definition: syscall_emul_buf.hh:81
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
gem5::EmulatedDriver
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
Definition: emul_driver.hh:55
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::GPUComputeDriver::ioctl
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
Definition: gpu_compute_driver.cc:222
gem5::GPUComputeDriver::DriverWakeupEvent::scheduleWakeup
void scheduleWakeup(Tick wakeup_delay)
Definition: gpu_compute_driver.cc:177
gem5::ThreadContext::suspend
virtual void suspend()=0
Set the status to Suspended.
KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
Definition: kfd_ioctl.h:387
gem5::GPUComputeDriver::device
GPUCommandProcessor * device
GPU that is controlled by this driver.
Definition: gpu_compute_driver.hh:151
AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_CREATE_EVENT
Definition: kfd_ioctl.h:544
AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_DESTROY_QUEUE
Definition: kfd_ioctl.h:529
AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
Definition: kfd_ioctl.h:596
gem5::BaseBufferArg::copyOut
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
Definition: syscall_emul_buf.hh:91
SLOTS_PER_PAGE
#define SLOTS_PER_PAGE
Definition: kfd_event_defines.h:48
gem5::GPUComputeDriver::isdGPU
bool isdGPU
Definition: gpu_compute_driver.hh:153
gem5::ArmISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:53
AMDKFD_IOC_SET_MEMORY_POLICY
#define AMDKFD_IOC_SET_MEMORY_POLICY
Definition: kfd_ioctl.h:532
gem5::GPUComputeDriver::mmap
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
Definition: gpu_compute_driver.cc:101
AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_WAIT_EVENTS
Definition: kfd_ioctl.h:556
gem5::BaseMMU::flushAll
virtual void flushAll()
Definition: mmu.cc:51
logging.hh
AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
Definition: kfd_ioctl.h:571
AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_DBG_UNREGISTER
Definition: kfd_ioctl.h:562
AMDKFD_IOC_SMI_EVENTS
#define AMDKFD_IOC_SMI_EVENTS
Definition: kfd_ioctl.h:614
gem5::GPUComputeDriver::Params
GPUComputeDriverParams Params
Definition: gpu_compute_driver.hh:67
gem5::GPUComputeDriver::DriverWakeupEvent::process
void process() override
Definition: gpu_compute_driver.cc:211
trace.hh
AMDKFD_IOC_UPDATE_QUEUE
#define AMDKFD_IOC_UPDATE_QUEUE
Definition: kfd_ioctl.h:541
gem5::GPUComputeDriver::ldsApeLimit
Addr ldsApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:979
gpu_compute_driver.hh
gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:71
gem5::GPUComputeDriver::DriverWakeupEvent::description
const char * description() const override
Return a C string describing the event.
Definition: gpu_compute_driver.cc:78
AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
Definition: kfd_ioctl.h:580
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
AMDKFD_IOC_ALLOC_QUEUE_GWS
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Definition: kfd_ioctl.h:611
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::GPUComputeDriver::gpuVmApeBase
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Definition: gpu_compute_driver.cc:933
gem5::GPUComputeDriver::scratchApeLimit
Addr scratchApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:959
gem5::GPUComputeDriver::ldsApeBase
Addr ldsApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:965
gem5::EmulatedDriver::filename
const std::string & filename
filename for opening this driver (under /dev)
Definition: emul_driver.hh:61
kfd_ioctl.h
thread_context.hh
gem5::GPUComputeDriver::gfxVersion
GfxVersion gfxVersion
Definition: gpu_compute_driver.hh:154
AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
Definition: kfd_ioctl.h:587
gem5::X86ISA::prot
Bitfield< 7 > prot
Definition: misc.hh:588
AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_CREATE_QUEUE
Definition: kfd_ioctl.h:526
AMDKFD_IOC_GET_CLOCK_COUNTERS
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
Definition: kfd_ioctl.h:535
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:73
KFD_MMAP_TYPE_DOORBELL
#define KFD_MMAP_TYPE_DOORBELL
Definition: kfd_event_defines.h:46
AMDKFD_IOC_GET_DMABUF_INFO
#define AMDKFD_IOC_GET_DMABUF_INFO
Definition: kfd_ioctl.h:605
gem5::GPUComputeDriver::eventPage
Addr eventPage
Definition: gpu_compute_driver.hh:156
gem5::sim_clock::as_int::ns
Tick ns
nanosecond
Definition: core.cc:71

Generated on Tue Sep 21 2021 12:25:23 for gem5 by doxygen 1.8.17