gem5  v21.2.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_compute_driver.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <memory>
35 
36 #include "arch/x86/page_size.hh"
37 #include "base/compiler.hh"
38 #include "base/logging.hh"
39 #include "base/trace.hh"
40 #include "cpu/thread_context.hh"
41 #include "debug/GPUDriver.hh"
42 #include "debug/GPUShader.hh"
45 #include "dev/hsa/kfd_ioctl.h"
47 #include "gpu-compute/shader.hh"
48 #include "mem/port_proxy.hh"
51 #include "params/GPUComputeDriver.hh"
52 #include "sim/full_system.hh"
53 #include "sim/process.hh"
54 #include "sim/se_workload.hh"
55 #include "sim/syscall_emul_buf.hh"
56 
57 namespace gem5
58 {
59 
61  : EmulatedDriver(p), device(p.device), queueId(0),
62  isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID),
63  eventPage(0), eventSlotIndex(0)
64 {
65  device->attachDriver(this);
66  DPRINTF(GPUDriver, "Constructing KFD: device\n");
67 
68  // Convert the 3 bit mtype specified in Shader.py to the proper type
69  // used for requests.
70  if (MtypeFlags::SHARED & p.m_type)
72 
73  if (MtypeFlags::READ_WRITE & p.m_type)
75 
76  if (MtypeFlags::CACHED & p.m_type)
78 }
79 
80 const char*
82 {
83  return "DriverWakeupEvent";
84 }
85 
89 int
91 {
92  DPRINTF(GPUDriver, "Opened %s\n", filename);
93  auto process = tc->getProcessPtr();
94  auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
95  int tgt_fd = process->fds->allocFD(device_fd_entry);
96  return tgt_fd;
97 }
98 
103 Addr
104 GPUComputeDriver::mmap(ThreadContext *tc, Addr start, uint64_t length,
105  int prot, int tgt_flags, int tgt_fd, off_t offset)
106 {
107  auto process = tc->getProcessPtr();
108  auto mem_state = process->memState;
109 
110  Addr pg_off = offset >> PAGE_SHIFT;
111  Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
112  DPRINTF(GPUDriver, "amdkfd mmap (start: %p, length: 0x%x,"
113  "offset: 0x%x)\n", start, length, offset);
114 
115  switch(mmap_type) {
117  DPRINTF(GPUDriver, "amdkfd mmap type DOORBELL offset\n");
118  start = mem_state->extendMmap(length);
119  process->pTable->map(start, device->hsaPacketProc().pioAddr,
120  length, false);
121  break;
123  DPRINTF(GPUDriver, "amdkfd mmap type EVENTS offset\n");
124  panic_if(start != 0,
125  "Start address should be provided by KFD\n");
126  panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
127  "Requested length %d, expected length %d; length "
128  "mismatch\n", length, 8* KFD_SIGNAL_EVENT_LIMIT);
134  if (!eventPage) {
135  eventPage = mem_state->extendMmap(length);
136  start = eventPage;
137  }
138  break;
139  default:
140  warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
141  break;
142  }
143 
144  return start;
145 }
146 
154 void
156 {
158  args.copyIn(mem_proxy);
159 
160  if ((doorbellSize() * queueId) > 4096) {
161  fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
162  }
163 
164  args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
165  KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
166 
167  // for vega offset needs to include exact value of doorbell
168  if (doorbellSize())
169  args->doorbell_offset += queueId * doorbellSize();
170 
171  args->queue_id = queueId++;
172  auto &hsa_pp = device->hsaPacketProc();
173  hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
174  args->ring_base_address, args->queue_id,
175  args->ring_size, doorbellSize(), gfxVersion);
176  args.copyOut(mem_proxy);
177 }
178 
179 void
181 {
182  assert(driver);
183  driver->schedule(this, curTick() + wakeup_delay);
184 }
185 
186 void
188 {
189  panic_if(event_id >= eventSlotIndex,
190  "Trying wakeup on an event that is not yet created\n");
191  if (ETable[event_id].threadWaiting) {
192  panic_if(!ETable[event_id].tc,
193  "No thread context to wake up\n");
194  ThreadContext *tc = ETable[event_id].tc;
195  DPRINTF(GPUDriver,
196  "Signal event: Waking up CPU %d\n", tc->cpuId());
197  // Remove events that can wakeup this thread
198  TCEvents[tc].clearEvents();
199  // Now wakeup this thread
200  tc->activate();
201  } else {
202  // This may be a race condition between an ioctl call asking to wait on
203  // this event and this signalWakeupEvent. Taking care of this race
204  // condition here by setting the event here. The ioctl call should take
205  // the necessary action when waiting on an already set event. However,
206  // this may be a genuine instance in which the runtime has decided not
207  // to wait on this event. But since we cannot distinguish this case with
208  // the race condition, we are any way setting the event.
209  ETable[event_id].setEvent = true;
210  }
211 }
212 
213 void
215 {
216  DPRINTF(GPUDriver,
217  "Timer event: Waking up CPU %d\n", tc->cpuId());
218  // Remove events that can wakeup this thread
219  driver->TCEvents[tc].clearEvents();
220  // Now wakeup this thread
221  tc->activate();
222 }
223 
224 int
225 GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
226 {
227  TranslatingPortProxy fs_proxy(tc);
228  SETranslatingPortProxy se_proxy(tc);
229  PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;
230  auto process = tc->getProcessPtr();
231  auto mem_state = process->memState;
232 
233  switch (req) {
235  {
236  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
237 
239  args->major_version = KFD_IOCTL_MAJOR_VERSION;
240  args->minor_version = KFD_IOCTL_MINOR_VERSION;
241 
242  args.copyOut(virt_proxy);
243  }
244  break;
246  {
247  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
248 
249  allocateQueue(virt_proxy, ioc_buf);
250 
251  DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
252  }
253  break;
255  {
257  args.copyIn(virt_proxy);
258  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
259  "queue offset %d\n", args->queue_id);
260  device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
261  doorbellSize());
262  }
263  break;
265  {
279  warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
280  }
281  break;
283  {
284  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
285 
287  args.copyIn(virt_proxy);
288 
289  // Set nanosecond resolution
290  args->system_clock_freq = 1000000000;
291 
296  uint64_t elapsed_nsec = curTick() / sim_clock::as_int::ns;
297  args->gpu_clock_counter = elapsed_nsec;
298  args->cpu_clock_counter = elapsed_nsec;
299  args->system_clock_counter = elapsed_nsec;
300 
301  args.copyOut(virt_proxy);
302  }
303  break;
305  {
306  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
307 
309  args->num_of_nodes = 1;
310 
317  for (int i = 0; i < args->num_of_nodes; ++i) {
325  switch (gfxVersion) {
326  case GfxVersion::gfx801:
327  case GfxVersion::gfx803:
328  args->process_apertures[i].scratch_base =
329  scratchApeBase(i + 1);
330  args->process_apertures[i].lds_base =
331  ldsApeBase(i + 1);
332  break;
333  case GfxVersion::gfx900:
334  args->process_apertures[i].scratch_base =
336  args->process_apertures[i].lds_base =
337  ldsApeBaseV9();
338  break;
339  default:
340  fatal("Invalid gfx version\n");
341  }
342 
343  // GFX8 and GFX9 set lds and scratch limits the same way
344  args->process_apertures[i].scratch_limit =
345  scratchApeLimit(args->process_apertures[i].scratch_base);
346 
347  args->process_apertures[i].lds_limit =
348  ldsApeLimit(args->process_apertures[i].lds_base);
349 
350  switch (gfxVersion) {
351  case GfxVersion::gfx801:
352  args->process_apertures[i].gpuvm_base =
353  gpuVmApeBase(i + 1);
354  args->process_apertures[i].gpuvm_limit =
355  gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
356  break;
357  case GfxVersion::gfx803:
358  case GfxVersion::gfx900:
359  case GfxVersion::gfx902:
360  // Taken from SVM_USE_BASE in Linux kernel
361  args->process_apertures[i].gpuvm_base = 0x1000000ull;
362  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
363  args->process_apertures[i].gpuvm_limit =
364  0x0000800000000000ULL - 1;
365  break;
366  default:
367  fatal("Invalid gfx version");
368  }
369 
370  // NOTE: Must match ID populated by hsaTopology.py
371  //
372  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/
373  // blob/6a986c0943e9acd8c4c0cf2a9d510ff42167b43f/include/uapi/
374  // linux/kfd_ioctl.h#L564
375  //
376  // The gpu_id is a device identifier used by the driver for
377  // ioctls that allocate arguments. Each device has an unique
378  // id composed out of a non-zero base and an offset.
379  if (isdGPU) {
380  switch (gfxVersion) {
381  case GfxVersion::gfx803:
382  args->process_apertures[i].gpu_id = 50156;
383  break;
384  case GfxVersion::gfx900:
385  args->process_apertures[i].gpu_id = 22124;
386  break;
387  default:
388  fatal("Invalid gfx version for dGPU\n");
389  }
390  } else {
391  switch (gfxVersion) {
392  case GfxVersion::gfx801:
393  case GfxVersion::gfx902:
394  args->process_apertures[i].gpu_id = 2765;
395  break;
396  default:
397  fatal("Invalid gfx version for APU\n");
398  }
399  }
400 
401  DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
402  args->process_apertures[i].gpuvm_base);
403  DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,
404  args->process_apertures[i].gpuvm_limit);
405 
406  DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,
407  args->process_apertures[i].lds_base);
408  DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,
409  args->process_apertures[i].lds_limit);
410 
411  DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,
412  args->process_apertures[i].scratch_base);
413  DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,
414  args->process_apertures[i].scratch_limit);
415 
422  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
423  47) != 0x1ffff);
424  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
425  47) != 0);
426  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
427  47) != 0x1ffff);
428  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
429  47) != 0);
430  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
431  47) != 0x1ffff);
432  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
433  47) != 0);
434  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
435  47) != 0x1ffff);
436  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
437  47) != 0);
438  }
439 
440  args.copyOut(virt_proxy);
441  }
442  break;
444  {
445  warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
446  }
447  break;
449  {
450  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");
451 
453  args.copyIn(virt_proxy);
454  if (args->event_type != KFD_IOC_EVENT_SIGNAL) {
455  warn("Signal events are only supported currently\n");
456  } else if (eventSlotIndex == SLOTS_PER_PAGE) {
457  fatal("Signal event wasn't created; signal limit reached\n");
458  }
459  // Currently, we allocate only one signal_page for events.
460  // Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT
461  uint64_t page_index = 0;
462  args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);
463  args->event_page_offset <<= PAGE_SHIFT;
464  // TODO: Currently we support only signal events, hence using
465  // the same ID for both signal slot and event slot
466  args->event_slot_index = eventSlotIndex;
467  args->event_id = eventSlotIndex++;
468  args->event_trigger_data = args->event_id;
469  DPRINTF(GPUDriver, "amdkfd create events"
470  "(event_id: 0x%x, offset: 0x%x)\n",
471  args->event_id, args->event_page_offset);
472  // Since eventSlotIndex is increased everytime a new event is
473  // created ETable at eventSlotIndex(event_id) is guaranteed to be
474  // empty. In a future implementation that reuses deleted event_ids,
475  // we should check if event table at this
476  // eventSlotIndex(event_id) is empty before inserting a new event
477  // table entry
478  ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));
479  args.copyOut(virt_proxy);
480  }
481  break;
483  {
484  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
486  args.copyIn(virt_proxy);
487  DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);
488  fatal_if(ETable.count(args->event_id) == 0,
489  "Event ID invalid, cannot destroy this event\n");
490  ETable.erase(args->event_id);
491  }
492  break;
494  {
495  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");
497  args.copyIn(virt_proxy);
498  DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);
499  fatal_if(ETable.count(args->event_id) == 0,
500  "Event ID invlaid, cannot set this event\n");
501  ETable[args->event_id].setEvent = true;
502  signalWakeupEvent(args->event_id);
503  }
504  break;
506  {
507  warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
508  }
509  break;
511  {
512  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
514  args.copyIn(virt_proxy);
515  kfd_event_data *events =
516  (kfd_event_data *)args->events_ptr;
517  DPRINTF(GPUDriver, "amdkfd wait for events"
518  "(wait on all: %d, timeout : %d, num_events: %s)\n",
519  args->wait_for_all, args->timeout, args->num_events);
520  panic_if(args->wait_for_all != 0 && args->num_events > 1,
521  "Wait for all events not supported\n");
522  bool should_sleep = true;
523  if (TCEvents.count(tc) == 0) {
524  // This thread context trying to wait on an event for the first
525  // time, initialize it.
526  TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
527  std::make_tuple(this, tc));
528  DPRINTF(GPUDriver, "\tamdkfd creating event list"
529  " for thread %d\n", tc->cpuId());
530  }
531  panic_if(TCEvents[tc].signalEvents.size() != 0,
532  "There are %d events that put this thread to sleep,"
533  " this thread should not be running\n",
534  TCEvents[tc].signalEvents.size());
535  for (int i = 0; i < args->num_events; i++) {
536  panic_if(!events,
537  "Event pointer invalid\n");
538  Addr eventDataAddr = (Addr)(events + i);
540  eventDataAddr, sizeof(kfd_event_data));
541  EventData.copyIn(virt_proxy);
542  DPRINTF(GPUDriver,
543  "\tamdkfd wait for event %d\n", EventData->event_id);
544  panic_if(ETable.count(EventData->event_id) == 0,
545  "Event ID invalid, cannot set this event\n");
546  if (ETable[EventData->event_id].threadWaiting)
547  warn("Multiple threads waiting on the same event\n");
548  if (ETable[EventData->event_id].setEvent) {
549  // If event is already set, the event has already happened.
550  // Just unset the event and dont put this thread to sleep.
551  ETable[EventData->event_id].setEvent = false;
552  should_sleep = false;
553  }
554  if (should_sleep) {
555  // Put this thread to sleep
556  ETable[EventData->event_id].threadWaiting = true;
557  ETable[EventData->event_id].tc = tc;
558  TCEvents[tc].signalEvents.insert(EventData->event_id);
559  }
560  }
561 
562  // TODO: Return the correct wait_result back. Currently, returning
563  // success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.
564  // Ideally, this needs to be done after the event is triggered and
565  // after the thread is woken up.
566  args->wait_result = 0;
567  args.copyOut(virt_proxy);
568  if (should_sleep) {
569  // Put this thread to sleep
570  sleepCPU(tc, args->timeout);
571  } else {
572  // Remove events that tried to put this thread to sleep
573  TCEvents[tc].clearEvents();
574  }
575  }
576  break;
578  {
579  warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
580  }
581  break;
583  {
584  warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
585  }
586  break;
588  {
589  warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
590  }
591  break;
593  {
594  warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
595  }
596  break;
598  {
599  warn("unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
600  }
601  break;
603  {
604  warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
605  }
606  break;
608  {
609  warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
610  }
611  break;
613  {
614  DPRINTF(GPUDriver,
615  "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
616 
618  ioc_args(ioc_buf);
619 
620  ioc_args.copyIn(virt_proxy);
621  ioc_args->num_of_nodes = 1;
622 
623  for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
625  (ioc_args->kfd_process_device_apertures_ptr);
626 
627  switch (gfxVersion) {
628  case GfxVersion::gfx801:
629  case GfxVersion::gfx803:
630  ape_args->scratch_base = scratchApeBase(i + 1);
631  ape_args->lds_base = ldsApeBase(i + 1);
632  break;
633  case GfxVersion::gfx900:
634  ape_args->scratch_base = scratchApeBaseV9();
635  ape_args->lds_base = ldsApeBaseV9();
636  break;
637  default:
638  fatal("Invalid gfx version\n");
639  }
640 
641  // GFX8 and GFX9 set lds and scratch limits the same way
642  ape_args->scratch_limit =
643  scratchApeLimit(ape_args->scratch_base);
644  ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
645 
646  switch (gfxVersion) {
647  case GfxVersion::gfx801:
648  ape_args->gpuvm_base = gpuVmApeBase(i + 1);
649  ape_args->gpuvm_limit =
650  gpuVmApeLimit(ape_args->gpuvm_base);
651  break;
652  case GfxVersion::gfx803:
653  case GfxVersion::gfx900:
654  case GfxVersion::gfx902:
655  // Taken from SVM_USE_BASE in Linux kernel
656  ape_args->gpuvm_base = 0x1000000ull;
657  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
658  ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
659  break;
660  default:
661  fatal("Invalid gfx version\n");
662  }
663 
664  // NOTE: Must match ID populated by hsaTopology.py
665  if (isdGPU) {
666  switch (gfxVersion) {
667  case GfxVersion::gfx803:
668  ape_args->gpu_id = 50156;
669  break;
670  case GfxVersion::gfx900:
671  ape_args->gpu_id = 22124;
672  break;
673  default:
674  fatal("Invalid gfx version for dGPU\n");
675  }
676  } else {
677  switch (gfxVersion) {
678  case GfxVersion::gfx801:
679  case GfxVersion::gfx902:
680  ape_args->gpu_id = 2765;
681  break;
682  default:
683  fatal("Invalid gfx version for APU\n");
684  }
685  }
686 
687  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
688  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
689  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
690  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
691  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
692  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
693  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
694  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
695 
696  ape_args.copyOut(virt_proxy);
697  }
698 
699  ioc_args.copyOut(virt_proxy);
700  }
701  break;
703  {
704  warn("unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
705  }
706  break;
724  {
725  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
727  args.copyIn(virt_proxy);
728 
729  assert(isdGPU || gfxVersion == GfxVersion::gfx902);
730  assert((args->va_addr % TheISA::PageBytes) == 0);
731  [[maybe_unused]] Addr mmap_offset = 0;
732 
734  Addr pa_addr = 0;
735 
736  int npages = divCeil(args->size, (int64_t)TheISA::PageBytes);
737  bool cacheable = true;
738 
739  if (KFD_IOC_ALLOC_MEM_FLAGS_VRAM & args->flags) {
740  DPRINTF(GPUDriver, "amdkfd allocation type: VRAM\n");
741  args->mmap_offset = args->va_addr;
742  // VRAM allocations are device memory mapped into GPUVM
743  // space.
744  //
745  // We can't rely on the lazy host allocator (fixupFault) to
746  // handle this mapping since it needs to be placed in dGPU
747  // framebuffer memory. The lazy allocator will try to place
748  // this in host memory.
749  //
750  // TODO: We don't have the appropriate bifurcation of the
751  // physical address space with different memory controllers
752  // yet. This is where we will explicitly add the PT maps to
753  // dGPU memory in the future.
754  //
755  // Bind the VA space to the dGPU physical memory pool. Mark
756  // this region as Uncacheable. The Uncacheable flag is only
757  // really used by the CPU and is ignored by the GPU. We mark
758  // this as uncacheable from the CPU so that we can implement
759  // direct CPU framebuffer access similar to what we currently
760  // offer in real HW through the so-called Large BAR feature.
761  pa_addr = process->seWorkload->allocPhysPages(
762  npages, dGPUPoolID);
763  //
764  // TODO: Uncacheable accesses need to be supported by the
765  // CPU-side protocol for this to work correctly. I believe
766  // it only works right now if the physical memory is MMIO
767  cacheable = false;
768 
769  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
770  "%d\n", args->va_addr, pa_addr, args->size);
771 
772  } else if (KFD_IOC_ALLOC_MEM_FLAGS_USERPTR & args->flags) {
773  DPRINTF(GPUDriver, "amdkfd allocation type: USERPTR\n");
774  mmap_offset = args->mmap_offset;
775  // USERPTR allocations are system memory mapped into GPUVM
776  // space. The user provides the driver with the pointer.
777  pa_addr = process->seWorkload->allocPhysPages(npages);
778 
779  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
780  "%d\n", args->va_addr, pa_addr, args->size);
781 
782  // If the HSA runtime requests system coherent memory, than we
783  // need to explicity mark this region as uncacheable from the
784  // perspective of the GPU.
785  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
786  mtype.clear();
787 
788  } else if (KFD_IOC_ALLOC_MEM_FLAGS_GTT & args->flags) {
789  DPRINTF(GPUDriver, "amdkfd allocation type: GTT\n");
790  args->mmap_offset = args->va_addr;
791  // GTT allocations are system memory mapped into GPUVM space.
792  // It's different than a USERPTR allocation since the driver
793  // itself allocates the physical memory on the host.
794  //
795  // We will lazily map it into host memory on first touch. The
796  // fixupFault will find the original SVM aperture mapped to the
797  // host.
798  pa_addr = process->seWorkload->allocPhysPages(npages);
799 
800  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
801  "%d\n", args->va_addr, pa_addr, args->size);
802 
803  // If the HSA runtime requests system coherent memory, than we
804  // need to explicity mark this region as uncacheable from the
805  // perspective of the GPU.
806  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
807  mtype.clear();
808 
809  // Note that for GTT the thunk layer needs to call mmap on the
810  // driver FD later if it wants the host to have access to this
811  // memory (which it probably does). This will be ignored.
812  } else if (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL & args->flags) {
813  DPRINTF(GPUDriver, "amdkfd allocation type: DOORBELL\n");
814  // DOORBELL allocations are the queue doorbells that are
815  // memory mapped into GPUVM space.
816  //
817  // Explicitly map this virtual address to our PIO doorbell
818  // interface in the page tables (non-cacheable)
819  pa_addr = device->hsaPacketProc().pioAddr;
820  cacheable = false;
821  }
822 
823  DPRINTF(GPUDriver, "amdkfd allocation arguments: va_addr %p "
824  "size %lu, mmap_offset %p, gpu_id %d\n",
825  args->va_addr, args->size, mmap_offset, args->gpu_id);
826 
827  // Bind selected physical memory to provided virtual address range
828  // in X86 page tables.
829  process->pTable->map(args->va_addr, pa_addr, args->size,
830  cacheable);
831 
832  // We keep track of allocated regions of GPU mapped memory,
833  // just like the driver would. This allows us to provide the
834  // user with a unique handle for a given allocation. The user
835  // will only provide us with a handle after allocation and expect
836  // us to be able to use said handle to extract all the properties
837  // of the region.
838  //
839  // This is a simplified version of regular system VMAs, but for
840  // GPUVM space (none of the clobber/remap nonsense we find in real
841  // OS managed memory).
842  allocateGpuVma(mtype, args->va_addr, args->size);
843 
844  // Used by the runtime to uniquely identify this allocation.
845  // We can just use the starting address of the VMA region.
846  args->handle= args->va_addr;
847  args.copyOut(virt_proxy);
848  }
849  break;
851  {
852  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
854  args.copyIn(virt_proxy);
855 
856  assert(isdGPU);
857  DPRINTF(GPUDriver, "amdkfd free arguments: handle %p ",
858  args->handle);
859 
860  // We don't recycle physical pages in SE mode
861  Addr size = deallocateGpuVma(args->handle);
862  process->pTable->unmap(args->handle, size);
863 
864  // TODO: IOMMU and GPUTLBs do not seem to correctly support
865  // shootdown. This is also a potential issue for APU systems
866  // that perform unmap or remap with system memory.
867  tc->getMMUPtr()->flushAll();
868 
869  args.copyOut(virt_proxy);
870  }
871  break;
880  {
881  warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
882  }
883  break;
885  {
886  warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
887  }
888  break;
890  {
891  warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
892  }
893  break;
895  {
896  warn("unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
897  }
898  break;
900  {
901  warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
902  }
903  break;
905  {
906  warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
907  }
908  break;
910  {
911  warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
912  }
913  break;
915  {
916  warn("unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
917  }
918  break;
919  default:
920  fatal("%s: bad ioctl %d\n", req);
921  break;
922  }
923  return 0;
924 }
925 
926 void
927 GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
928 {
929  // Convert millisecs to ticks
930  Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
931  assert(TCEvents.count(tc) == 1);
932  TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
933  tc->suspend();
934  DPRINTF(GPUDriver,
935  "CPU %d is put to sleep\n", tc->cpuId());
936 }
937 
938 Addr
940 {
941  return ((Addr)gpuNum << 61) + 0x1000000000000L;
942 }
943 
944 Addr
946 {
947  return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
948 }
949 
950 Addr
952 {
953  return ((Addr)gpuNum << 61) + 0x100000000L;
954 }
955 
956 // Used for GFX9 devices
957 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
958 Addr
960 {
961  return ((Addr)0x1 << 48);
962 }
963 
964 Addr
966 {
967  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
968 }
969 
970 Addr
972 {
973  return ((Addr)gpuNum << 61) + 0x0;
974 }
975 
976 //Used for GFX9 devices
977 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
978 Addr
980 {
981  return ((Addr)0x2 << 48);
982 }
983 
984 Addr
986 {
987  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
988 }
989 
990 void
992  Addr start, Addr length)
993 {
994  AddrRange range = AddrRange(start, start + length);
995  DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n",
996  range.start(), range.end(), mtype);
997  fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(),
998  "Attempted to double register Mtypes for [%p - %p]\n",
999  range.start(), range.end());
1000 }
1001 
1002 Addr
1004 {
1005  auto vma = gpuVmas.contains(start);
1006  assert(vma != gpuVmas.end());
1007  assert((vma->first.start() == start));
1008  Addr size = vma->first.size();
1009  DPRINTF(GPUDriver, "Unregistering [%p - %p]\n", vma->first.start(),
1010  vma->first.end());
1011  gpuVmas.erase(vma);
1012  return size;
1013 }
1014 
1015 void
1017 {
1018  // If we are a dGPU then set the MTYPE from our VMAs.
1019  if (isdGPU) {
1020  AddrRange range = RangeSize(req->getVaddr(), req->getSize());
1021  auto vma = gpuVmas.contains(range);
1022  assert(vma != gpuVmas.end());
1023  DPRINTF(GPUShader, "Setting req from [%p - %p] MTYPE %d\n"
1024  "%d\n", range.start(), range.end(), vma->second);
1025  req->setCacheCoherenceFlags(vma->second);
1026  // APUs always get the default MTYPE
1027  } else {
1028  req->setCacheCoherenceFlags(defaultMtype);
1029  }
1030 }
1031 
1032 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
AMDKFD_IOC_MAP_MEMORY_TO_GPU
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
Definition: kfd_ioctl.h:592
gem5::kfd_event_data
Definition: kfd_ioctl.h:306
gem5::HSAPacketProcessor::pioAddr
Addr pioAddr
Definition: hsa_packet_processor.hh:341
gem5::GPUComputeDriver::TCEvents
std::unordered_map< ThreadContext *, EventList > TCEvents
Definition: gpu_compute_driver.hh:205
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:187
gem5::SETranslatingPortProxy
Definition: se_translating_port_proxy.hh:49
gem5::GPUComputeDriver::gpuVmas
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Definition: gpu_compute_driver.hh:162
KFD_SIGNAL_EVENT_LIMIT
#define KFD_SIGNAL_EVENT_LIMIT
Definition: kfd_ioctl.h:228
gem5::GPUComputeDriver::allocateQueue
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Definition: gpu_compute_driver.cc:155
gem5::AddrRange::start
Addr start() const
Get the start address of the range.
Definition: addr_range.hh:343
warn
#define warn(...)
Definition: logging.hh:246
gem5::TypedBufferArg
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Definition: syscall_emul_buf.hh:132
gem5::GPUComputeDriver::deallocateGpuVma
Addr deallocateGpuVma(Addr start)
Definition: gpu_compute_driver.cc:1003
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
Definition: kfd_ioctl.h:377
shader.hh
gem5::X86ISA::L
Bitfield< 7, 0 > L
Definition: int.hh:59
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:221
AMDKFD_IOC_GET_TILE_CONFIG
#define AMDKFD_IOC_GET_TILE_CONFIG
Definition: kfd_ioctl.h:573
gem5::RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:815
warn_once
#define warn_once(...)
Definition: logging.hh:250
AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_DBG_WAVE_CONTROL
Definition: kfd_ioctl.h:567
AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
Definition: kfd_ioctl.h:589
hsa_packet_processor.hh
AMDKFD_IOC_DESTROY_EVENT
#define AMDKFD_IOC_DESTROY_EVENT
Definition: kfd_ioctl.h:546
gem5::Flags::set
void set(Type mask)
Set all flag's bits matching the given mask.
Definition: flags.hh:116
gem5::ThreadContext::getMMUPtr
virtual BaseMMU * getMMUPtr()=0
gem5::Flags::clear
void clear()
Clear all flag's bits.
Definition: flags.hh:102
gem5::HSAPacketProcessor::unsetDeviceQueueDesc
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
Definition: hsa_packet_processor.cc:93
translating_port_proxy.hh
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Definition: kfd_ioctl.h:378
gem5::Request::READ_WRITE
@ READ_WRITE
Definition: request.hh:320
gem5::GPUComputeDriver::gpuVmApeLimit
Addr gpuVmApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:945
gem5::ThreadContext::activate
virtual void activate()=0
Set the status to Active.
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:61
KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
Definition: kfd_ioctl.h:375
syscall_emul_buf.hh
KFD_IOCTL_MINOR_VERSION
#define KFD_IOCTL_MINOR_VERSION
Definition: kfd_ioctl.h:38
KFD_MMAP_GPU_ID
#define KFD_MMAP_GPU_ID(gpu_id)
Definition: kfd_event_defines.h:51
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion)
Definition: hsa_packet_processor.cc:99
gem5::ThreadContext::cpuId
virtual int cpuId() const =0
AMDKFD_IOC_GET_VERSION
#define AMDKFD_IOC_GET_VERSION
Definition: kfd_ioctl.h:522
gem5::GPUComputeDriver::allocateGpuVma
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
Definition: gpu_compute_driver.cc:991
gem5::Process::memState
std::shared_ptr< MemState > memState
Definition: process.hh:290
gem5::GPUComputeDriver::eventSlotIndex
uint32_t eventSlotIndex
Definition: gpu_compute_driver.hh:155
gem5::GPUComputeDriver::setMtype
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
Definition: gpu_compute_driver.cc:1016
gem5::TranslatingPortProxy
This proxy attempts to translate virtual addresses using the TLBs.
Definition: translating_port_proxy.hh:60
KFD_MMAP_TYPE_MASK
#define KFD_MMAP_TYPE_MASK
Definition: kfd_event_defines.h:43
gem5::GPUComputeDriver::ETable
std::unordered_map< uint32_t, ETEntry > ETable
Definition: gpu_compute_driver.hh:157
AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_SET_EVENT
Definition: kfd_ioctl.h:549
gem5::Flags< CacheCoherenceFlagsType >
AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_GET_PROCESS_APERTURES
Definition: kfd_ioctl.h:537
AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_SET_TRAP_HANDLER
Definition: kfd_ioctl.h:576
AMDKFD_IOC_DBG_REGISTER
#define AMDKFD_IOC_DBG_REGISTER
Definition: kfd_ioctl.h:558
gem5::GPUComputeDriver::sleepCPU
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Definition: gpu_compute_driver.cc:927
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::GPUComputeDriver::defaultMtype
Request::CacheCoherenceFlags defaultMtype
Definition: gpu_compute_driver.hh:174
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::Request::CACHED
@ CACHED
mtype flags
Definition: request.hh:319
gem5::GPUComputeDriver::queueId
uint32_t queueId
Definition: gpu_compute_driver.hh:150
gem5::BaseBufferArg::size
const int size
buffer size
Definition: syscall_emul_buf.hh:99
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_ACQUIRE_VM
Definition: kfd_ioctl.h:583
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
AMDKFD_IOC_SET_CU_MASK
#define AMDKFD_IOC_SET_CU_MASK
Definition: kfd_ioctl.h:598
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
AMDKFD_IOC_GET_QUEUE_WAVE_STATE
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
Definition: kfd_ioctl.h:601
gem5::Request::SHARED
@ SHARED
Definition: request.hh:321
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::GPUComputeDriver::ldsApeBaseV9
Addr ldsApeBaseV9() const
Definition: gpu_compute_driver.cc:979
port_proxy.hh
process.hh
PAGE_SHIFT
#define PAGE_SHIFT
Definition: kfd_event_defines.h:41
kfd_event_defines.h
KFD_IOCTL_MAJOR_VERSION
#define KFD_IOCTL_MAJOR_VERSION
Definition: kfd_ioctl.h:37
gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition: port_proxy.hh:86
gem5::GPUComputeDriver::scratchApeBase
Addr scratchApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:951
AMDKFD_IOC_RESET_EVENT
#define AMDKFD_IOC_RESET_EVENT
Definition: kfd_ioctl.h:552
gem5::GPUComputeDriver::GPUComputeDriver
GPUComputeDriver(const Params &p)
Definition: gpu_compute_driver.cc:60
page_size.hh
KFD_IOC_ALLOC_MEM_FLAGS_GTT
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Definition: kfd_ioctl.h:376
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::AddrRange::end
Addr end() const
Get the end address of the range.
Definition: addr_range.hh:350
compiler.hh
gpu_command_processor.hh
KFD_IOC_EVENT_SIGNAL
#define KFD_IOC_EVENT_SIGNAL
Definition: kfd_ioctl.h:214
AMDKFD_IOC_DBG_ADDRESS_WATCH
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
Definition: kfd_ioctl.h:564
std::pair
STL pair class.
Definition: stl.hh:58
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUComputeDriver::open
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Definition: gpu_compute_driver.cc:90
gem5::GPUComputeDriver::scratchApeBaseV9
Addr scratchApeBaseV9() const
Definition: gpu_compute_driver.cc:959
gem5::GPUComputeDriver::dGPUPoolID
int dGPUPoolID
Definition: gpu_compute_driver.hh:153
gem5::GPUComputeDriver::doorbellSize
int doorbellSize()
Definition: gpu_compute_driver.hh:86
full_system.hh
AMDKFD_IOC_IMPORT_DMABUF
#define AMDKFD_IOC_IMPORT_DMABUF
Definition: kfd_ioctl.h:607
KFD_MMAP_TYPE_EVENTS
#define KFD_MMAP_TYPE_EVENTS
Definition: kfd_event_defines.h:45
gem5::BaseBufferArg::copyIn
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
Definition: syscall_emul_buf.hh:81
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:220
gem5::EmulatedDriver
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
Definition: emul_driver.hh:55
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::GPUComputeDriver::ioctl
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
Definition: gpu_compute_driver.cc:225
gem5::GPUComputeDriver::DriverWakeupEvent::scheduleWakeup
void scheduleWakeup(Tick wakeup_delay)
Definition: gpu_compute_driver.cc:180
gem5::ThreadContext::suspend
virtual void suspend()=0
Set the status to Suspended.
KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
Definition: kfd_ioctl.h:386
gem5::GPUComputeDriver::device
GPUCommandProcessor * device
GPU that is controlled by this driver.
Definition: gpu_compute_driver.hh:149
AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_CREATE_EVENT
Definition: kfd_ioctl.h:543
AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_DESTROY_QUEUE
Definition: kfd_ioctl.h:528
AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
Definition: kfd_ioctl.h:595
gem5::BaseBufferArg::copyOut
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
Definition: syscall_emul_buf.hh:91
SLOTS_PER_PAGE
#define SLOTS_PER_PAGE
Definition: kfd_event_defines.h:46
gem5::GPUComputeDriver::isdGPU
bool isdGPU
Definition: gpu_compute_driver.hh:151
gem5::ArmISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:53
AMDKFD_IOC_SET_MEMORY_POLICY
#define AMDKFD_IOC_SET_MEMORY_POLICY
Definition: kfd_ioctl.h:531
gem5::GPUComputeDriver::mmap
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
Definition: gpu_compute_driver.cc:104
AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_WAIT_EVENTS
Definition: kfd_ioctl.h:555
gem5::BaseMMU::flushAll
virtual void flushAll()
Definition: mmu.cc:81
logging.hh
AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
Definition: kfd_ioctl.h:570
AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_DBG_UNREGISTER
Definition: kfd_ioctl.h:561
AMDKFD_IOC_SMI_EVENTS
#define AMDKFD_IOC_SMI_EVENTS
Definition: kfd_ioctl.h:613
se_translating_port_proxy.hh
gem5::GPUComputeDriver::Params
GPUComputeDriverParams Params
Definition: gpu_compute_driver.hh:65
gem5::GPUComputeDriver::DriverWakeupEvent::process
void process() override
Definition: gpu_compute_driver.cc:214
trace.hh
AMDKFD_IOC_UPDATE_QUEUE
#define AMDKFD_IOC_UPDATE_QUEUE
Definition: kfd_ioctl.h:540
gem5::GPUComputeDriver::ldsApeLimit
Addr ldsApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:985
gpu_compute_driver.hh
gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:81
gem5::GPUComputeDriver::DriverWakeupEvent::description
const char * description() const override
Return a C string describing the event.
Definition: gpu_compute_driver.cc:81
AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
Definition: kfd_ioctl.h:579
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
AMDKFD_IOC_ALLOC_QUEUE_GWS
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Definition: kfd_ioctl.h:610
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::GPUComputeDriver::gpuVmApeBase
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Definition: gpu_compute_driver.cc:939
gem5::GPUComputeDriver::scratchApeLimit
Addr scratchApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:965
gem5::GPUComputeDriver::ldsApeBase
Addr ldsApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:971
gem5::EmulatedDriver::filename
const std::string & filename
filename for opening this driver (under /dev)
Definition: emul_driver.hh:61
kfd_ioctl.h
thread_context.hh
gem5::GPUComputeDriver::gfxVersion
GfxVersion gfxVersion
Definition: gpu_compute_driver.hh:152
AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
Definition: kfd_ioctl.h:586
gem5::X86ISA::prot
Bitfield< 7 > prot
Definition: misc.hh:588
AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_CREATE_QUEUE
Definition: kfd_ioctl.h:525
se_workload.hh
AMDKFD_IOC_GET_CLOCK_COUNTERS
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
Definition: kfd_ioctl.h:534
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
KFD_MMAP_TYPE_DOORBELL
#define KFD_MMAP_TYPE_DOORBELL
Definition: kfd_event_defines.h:44
AMDKFD_IOC_GET_DMABUF_INFO
#define AMDKFD_IOC_GET_DMABUF_INFO
Definition: kfd_ioctl.h:604
gem5::GPUComputeDriver::eventPage
Addr eventPage
Definition: gpu_compute_driver.hh:154
gem5::sim_clock::as_int::ns
Tick ns
nanosecond
Definition: core.cc:71

Generated on Tue Feb 8 2022 11:47:09 for gem5 by doxygen 1.8.17