gem5  v22.0.0.2
gpu_compute_driver.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <memory>
35 
36 #include "arch/x86/page_size.hh"
37 #include "base/compiler.hh"
38 #include "base/logging.hh"
39 #include "base/trace.hh"
40 #include "cpu/thread_context.hh"
41 #include "debug/GPUDriver.hh"
42 #include "debug/GPUShader.hh"
45 #include "dev/hsa/kfd_ioctl.h"
47 #include "gpu-compute/shader.hh"
48 #include "mem/port_proxy.hh"
51 #include "params/GPUComputeDriver.hh"
52 #include "sim/full_system.hh"
53 #include "sim/process.hh"
54 #include "sim/se_workload.hh"
55 #include "sim/syscall_emul_buf.hh"
56 
57 namespace gem5
58 {
59 
61  : EmulatedDriver(p), device(p.device), queueId(0),
62  isdGPU(p.isdGPU), gfxVersion(p.gfxVersion), dGPUPoolID(p.dGPUPoolID),
63  eventPage(0), eventSlotIndex(0)
64 {
65  device->attachDriver(this);
66  DPRINTF(GPUDriver, "Constructing KFD: device\n");
67 
68  // Convert the 3 bit mtype specified in Shader.py to the proper type
69  // used for requests.
70  std::bitset<MtypeFlags::NUM_MTYPE_BITS> mtype(p.m_type);
71  if (mtype.test(MtypeFlags::SHARED)) {
73  }
74 
75  if (mtype.test(MtypeFlags::READ_WRITE)) {
77  }
78 
79  if (mtype.test(MtypeFlags::CACHED)) {
81  }
82 }
83 
84 const char*
86 {
87  return "DriverWakeupEvent";
88 }
89 
93 int
95 {
96  DPRINTF(GPUDriver, "Opened %s\n", filename);
97  auto process = tc->getProcessPtr();
98  auto device_fd_entry = std::make_shared<DeviceFDEntry>(this, filename);
99  int tgt_fd = process->fds->allocFD(device_fd_entry);
100  return tgt_fd;
101 }
102 
107 Addr
108 GPUComputeDriver::mmap(ThreadContext *tc, Addr start, uint64_t length,
109  int prot, int tgt_flags, int tgt_fd, off_t offset)
110 {
111  auto process = tc->getProcessPtr();
112  auto mem_state = process->memState;
113 
114  Addr pg_off = offset >> PAGE_SHIFT;
115  Addr mmap_type = pg_off & KFD_MMAP_TYPE_MASK;
116  DPRINTF(GPUDriver, "amdkfd mmap (start: %p, length: 0x%x,"
117  "offset: 0x%x)\n", start, length, offset);
118 
119  switch(mmap_type) {
121  DPRINTF(GPUDriver, "amdkfd mmap type DOORBELL offset\n");
122  start = mem_state->extendMmap(length);
123  process->pTable->map(start, device->hsaPacketProc().pioAddr,
124  length, false);
125  break;
127  DPRINTF(GPUDriver, "amdkfd mmap type EVENTS offset\n");
128  panic_if(start != 0,
129  "Start address should be provided by KFD\n");
130  panic_if(length != 8 * KFD_SIGNAL_EVENT_LIMIT,
131  "Requested length %d, expected length %d; length "
132  "mismatch\n", length, 8* KFD_SIGNAL_EVENT_LIMIT);
138  if (!eventPage) {
139  eventPage = mem_state->extendMmap(length);
140  start = eventPage;
141  }
142  break;
143  default:
144  warn_once("Unrecognized kfd mmap type %llx\n", mmap_type);
145  break;
146  }
147 
148  return start;
149 }
150 
158 void
160 {
162  args.copyIn(mem_proxy);
163 
164  if ((doorbellSize() * queueId) > 4096) {
165  fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
166  }
167 
168  args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
169  KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
170 
171  // for vega offset needs to include exact value of doorbell
172  if (doorbellSize())
173  args->doorbell_offset += queueId * doorbellSize();
174 
175  args->queue_id = queueId++;
176  auto &hsa_pp = device->hsaPacketProc();
177  hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
178  args->ring_base_address, args->queue_id,
179  args->ring_size, doorbellSize(), gfxVersion);
180  args.copyOut(mem_proxy);
181 }
182 
183 void
185 {
186  assert(driver);
187  driver->schedule(this, curTick() + wakeup_delay);
188 }
189 
190 void
192 {
193  panic_if(event_id >= eventSlotIndex,
194  "Trying wakeup on an event that is not yet created\n");
195  if (ETable[event_id].threadWaiting) {
196  panic_if(!ETable[event_id].tc,
197  "No thread context to wake up\n");
198  ThreadContext *tc = ETable[event_id].tc;
199  DPRINTF(GPUDriver,
200  "Signal event: Waking up CPU %d\n", tc->cpuId());
201  // Remove events that can wakeup this thread
202  TCEvents[tc].clearEvents();
203  // Now wakeup this thread
204  tc->activate();
205  } else {
206  // This may be a race condition between an ioctl call asking to wait on
207  // this event and this signalWakeupEvent. Taking care of this race
208  // condition here by setting the event here. The ioctl call should take
209  // the necessary action when waiting on an already set event. However,
210  // this may be a genuine instance in which the runtime has decided not
211  // to wait on this event. But since we cannot distinguish this case with
212  // the race condition, we are any way setting the event.
213  ETable[event_id].setEvent = true;
214  }
215 }
216 
217 void
219 {
220  DPRINTF(GPUDriver,
221  "Timer event: Waking up CPU %d\n", tc->cpuId());
222  // Remove events that can wakeup this thread
223  driver->TCEvents[tc].clearEvents();
224  // Now wakeup this thread
225  tc->activate();
226 }
227 
228 int
229 GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
230 {
231  TranslatingPortProxy fs_proxy(tc);
232  SETranslatingPortProxy se_proxy(tc);
233  PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;
234  auto process = tc->getProcessPtr();
235  auto mem_state = process->memState;
236 
237  switch (req) {
239  {
240  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_VERSION\n");
241 
243  args->major_version = KFD_IOCTL_MAJOR_VERSION;
244  args->minor_version = KFD_IOCTL_MINOR_VERSION;
245 
246  args.copyOut(virt_proxy);
247  }
248  break;
250  {
251  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_QUEUE\n");
252 
253  allocateQueue(virt_proxy, ioc_buf);
254 
255  DPRINTF(GPUDriver, "Creating queue %d\n", queueId);
256  }
257  break;
259  {
261  args.copyIn(virt_proxy);
262  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
263  "queue offset %d\n", args->queue_id);
264  device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
265  doorbellSize());
266  }
267  break;
269  {
283  warn("unimplemented ioctl: AMDKFD_IOC_SET_MEMORY_POLICY\n");
284  }
285  break;
287  {
288  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_CLOCK_COUNTERS\n");
289 
291  args.copyIn(virt_proxy);
292 
293  // Set nanosecond resolution
294  args->system_clock_freq = 1000000000;
295 
300  uint64_t elapsed_nsec = curTick() / sim_clock::as_int::ns;
301  args->gpu_clock_counter = elapsed_nsec;
302  args->cpu_clock_counter = elapsed_nsec;
303  args->system_clock_counter = elapsed_nsec;
304 
305  args.copyOut(virt_proxy);
306  }
307  break;
309  {
310  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES\n");
311 
313  args->num_of_nodes = 1;
314 
321  for (int i = 0; i < args->num_of_nodes; ++i) {
329  switch (gfxVersion) {
330  case GfxVersion::gfx801:
331  case GfxVersion::gfx803:
332  args->process_apertures[i].scratch_base =
333  scratchApeBase(i + 1);
334  args->process_apertures[i].lds_base =
335  ldsApeBase(i + 1);
336  break;
337  case GfxVersion::gfx900:
338  case GfxVersion::gfx902:
339  args->process_apertures[i].scratch_base =
341  args->process_apertures[i].lds_base =
342  ldsApeBaseV9();
343  break;
344  default:
345  fatal("Invalid gfx version\n");
346  }
347 
348  // GFX8 and GFX9 set lds and scratch limits the same way
349  args->process_apertures[i].scratch_limit =
350  scratchApeLimit(args->process_apertures[i].scratch_base);
351 
352  args->process_apertures[i].lds_limit =
353  ldsApeLimit(args->process_apertures[i].lds_base);
354 
355  switch (gfxVersion) {
356  case GfxVersion::gfx801:
357  args->process_apertures[i].gpuvm_base =
358  gpuVmApeBase(i + 1);
359  args->process_apertures[i].gpuvm_limit =
360  gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
361  break;
362  case GfxVersion::gfx803:
363  case GfxVersion::gfx900:
364  case GfxVersion::gfx902:
365  // Taken from SVM_USE_BASE in Linux kernel
366  args->process_apertures[i].gpuvm_base = 0x1000000ull;
367  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
368  args->process_apertures[i].gpuvm_limit =
369  0x0000800000000000ULL - 1;
370  break;
371  default:
372  fatal("Invalid gfx version");
373  }
374 
375  // NOTE: Must match ID populated by hsaTopology.py
376  //
377  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/
378  // blob/6a986c0943e9acd8c4c0cf2a9d510ff42167b43f/include/uapi/
379  // linux/kfd_ioctl.h#L564
380  //
381  // The gpu_id is a device identifier used by the driver for
382  // ioctls that allocate arguments. Each device has an unique
383  // id composed out of a non-zero base and an offset.
384  if (isdGPU) {
385  switch (gfxVersion) {
386  case GfxVersion::gfx803:
387  args->process_apertures[i].gpu_id = 50156;
388  break;
389  case GfxVersion::gfx900:
390  args->process_apertures[i].gpu_id = 22124;
391  break;
392  default:
393  fatal("Invalid gfx version for dGPU\n");
394  }
395  } else {
396  switch (gfxVersion) {
397  case GfxVersion::gfx801:
398  case GfxVersion::gfx902:
399  args->process_apertures[i].gpu_id = 2765;
400  break;
401  default:
402  fatal("Invalid gfx version for APU\n");
403  }
404  }
405 
406  DPRINTF(GPUDriver, "GPUVM base for node[%i] = %#x\n", i,
407  args->process_apertures[i].gpuvm_base);
408  DPRINTF(GPUDriver, "GPUVM limit for node[%i] = %#x\n", i,
409  args->process_apertures[i].gpuvm_limit);
410 
411  DPRINTF(GPUDriver, "LDS base for node[%i] = %#x\n", i,
412  args->process_apertures[i].lds_base);
413  DPRINTF(GPUDriver, "LDS limit for node[%i] = %#x\n", i,
414  args->process_apertures[i].lds_limit);
415 
416  DPRINTF(GPUDriver, "Scratch base for node[%i] = %#x\n", i,
417  args->process_apertures[i].scratch_base);
418  DPRINTF(GPUDriver, "Scratch limit for node[%i] = %#x\n", i,
419  args->process_apertures[i].scratch_limit);
420 
427  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
428  47) != 0x1ffff);
429  assert(bits<Addr>(args->process_apertures[i].scratch_base, 63,
430  47) != 0);
431  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
432  47) != 0x1ffff);
433  assert(bits<Addr>(args->process_apertures[i].scratch_limit, 63,
434  47) != 0);
435  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
436  47) != 0x1ffff);
437  assert(bits<Addr>(args->process_apertures[i].lds_base, 63,
438  47) != 0);
439  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
440  47) != 0x1ffff);
441  assert(bits<Addr>(args->process_apertures[i].lds_limit, 63,
442  47) != 0);
443  }
444 
445  args.copyOut(virt_proxy);
446  }
447  break;
449  {
450  warn("unimplemented ioctl: AMDKFD_IOC_UPDATE_QUEUE\n");
451  }
452  break;
454  {
455  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_CREATE_EVENT\n");
456 
458  args.copyIn(virt_proxy);
459  if (args->event_type != KFD_IOC_EVENT_SIGNAL) {
460  warn("Signal events are only supported currently\n");
461  } else if (eventSlotIndex == SLOTS_PER_PAGE) {
462  fatal("Signal event wasn't created; signal limit reached\n");
463  }
464  // Currently, we allocate only one signal_page for events.
465  // Note that this signal page is of size 8 * KFD_SIGNAL_EVENT_LIMIT
466  uint64_t page_index = 0;
467  args->event_page_offset = (page_index | KFD_MMAP_TYPE_EVENTS);
468  args->event_page_offset <<= PAGE_SHIFT;
469  // TODO: Currently we support only signal events, hence using
470  // the same ID for both signal slot and event slot
471  args->event_slot_index = eventSlotIndex;
472  args->event_id = eventSlotIndex++;
473  args->event_trigger_data = args->event_id;
474  DPRINTF(GPUDriver, "amdkfd create events"
475  "(event_id: 0x%x, offset: 0x%x)\n",
476  args->event_id, args->event_page_offset);
477  // Since eventSlotIndex is increased everytime a new event is
478  // created ETable at eventSlotIndex(event_id) is guaranteed to be
479  // empty. In a future implementation that reuses deleted event_ids,
480  // we should check if event table at this
481  // eventSlotIndex(event_id) is empty before inserting a new event
482  // table entry
483  ETable.emplace(std::pair<uint32_t, ETEntry>(args->event_id, {}));
484  args.copyOut(virt_proxy);
485  }
486  break;
488  {
489  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_EVENT\n");
491  args.copyIn(virt_proxy);
492  DPRINTF(GPUDriver, "amdkfd destroying event %d\n", args->event_id);
493  fatal_if(ETable.count(args->event_id) == 0,
494  "Event ID invalid, cannot destroy this event\n");
495  ETable.erase(args->event_id);
496  }
497  break;
499  {
500  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_SET_EVENTS\n");
502  args.copyIn(virt_proxy);
503  DPRINTF(GPUDriver, "amdkfd set event %d\n", args->event_id);
504  fatal_if(ETable.count(args->event_id) == 0,
505  "Event ID invlaid, cannot set this event\n");
506  ETable[args->event_id].setEvent = true;
507  signalWakeupEvent(args->event_id);
508  }
509  break;
511  {
512  warn("unimplemented ioctl: AMDKFD_IOC_RESET_EVENT\n");
513  }
514  break;
516  {
517  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_WAIT_EVENTS\n");
519  args.copyIn(virt_proxy);
520  kfd_event_data *events =
521  (kfd_event_data *)args->events_ptr;
522  DPRINTF(GPUDriver, "amdkfd wait for events"
523  "(wait on all: %d, timeout : %d, num_events: %s)\n",
524  args->wait_for_all, args->timeout, args->num_events);
525  panic_if(args->wait_for_all != 0 && args->num_events > 1,
526  "Wait for all events not supported\n");
527  bool should_sleep = true;
528  if (TCEvents.count(tc) == 0) {
529  // This thread context trying to wait on an event for the first
530  // time, initialize it.
531  TCEvents.emplace(std::piecewise_construct, std::make_tuple(tc),
532  std::make_tuple(this, tc));
533  DPRINTF(GPUDriver, "\tamdkfd creating event list"
534  " for thread %d\n", tc->cpuId());
535  }
536  panic_if(TCEvents[tc].signalEvents.size() != 0,
537  "There are %d events that put this thread to sleep,"
538  " this thread should not be running\n",
539  TCEvents[tc].signalEvents.size());
540  for (int i = 0; i < args->num_events; i++) {
541  panic_if(!events,
542  "Event pointer invalid\n");
543  Addr eventDataAddr = (Addr)(events + i);
545  eventDataAddr, sizeof(kfd_event_data));
546  EventData.copyIn(virt_proxy);
547  DPRINTF(GPUDriver,
548  "\tamdkfd wait for event %d\n", EventData->event_id);
549  panic_if(ETable.count(EventData->event_id) == 0,
550  "Event ID invalid, cannot set this event\n");
551  if (ETable[EventData->event_id].threadWaiting)
552  warn("Multiple threads waiting on the same event\n");
553  if (ETable[EventData->event_id].setEvent) {
554  // If event is already set, the event has already happened.
555  // Just unset the event and dont put this thread to sleep.
556  ETable[EventData->event_id].setEvent = false;
557  should_sleep = false;
558  }
559  if (should_sleep) {
560  // Put this thread to sleep
561  ETable[EventData->event_id].threadWaiting = true;
562  ETable[EventData->event_id].tc = tc;
563  TCEvents[tc].signalEvents.insert(EventData->event_id);
564  }
565  }
566 
567  // TODO: Return the correct wait_result back. Currently, returning
568  // success for both KFD_WAIT_TIMEOUT and KFD_WAIT_COMPLETE.
569  // Ideally, this needs to be done after the event is triggered and
570  // after the thread is woken up.
571  args->wait_result = 0;
572  args.copyOut(virt_proxy);
573  if (should_sleep) {
574  // Put this thread to sleep
575  sleepCPU(tc, args->timeout);
576  } else {
577  // Remove events that tried to put this thread to sleep
578  TCEvents[tc].clearEvents();
579  }
580  }
581  break;
583  {
584  warn("unimplemented ioctl: AMDKFD_IOC_DBG_REGISTER\n");
585  }
586  break;
588  {
589  warn("unimplemented ioctl: AMDKFD_IOC_DBG_UNREGISTER\n");
590  }
591  break;
593  {
594  warn("unimplemented ioctl: AMDKFD_IOC_DBG_ADDRESS_WATCH\n");
595  }
596  break;
598  {
599  warn("unimplemented ioctl: AMDKFD_IOC_DBG_WAVE_CONTROL\n");
600  }
601  break;
603  {
604  warn("unimplemented ioctl: AMDKFD_IOC_SET_SCRATCH_BACKING_VA\n");
605  }
606  break;
608  {
609  warn("unimplemented ioctl: AMDKFD_IOC_GET_TILE_CONFIG\n");
610  }
611  break;
613  {
614  warn("unimplemented ioctl: AMDKFD_IOC_SET_TRAP_HANDLER\n");
615  }
616  break;
618  {
619  DPRINTF(GPUDriver,
620  "ioctl: AMDKFD_IOC_GET_PROCESS_APERTURES_NEW\n");
621 
623  ioc_args(ioc_buf);
624 
625  ioc_args.copyIn(virt_proxy);
626  ioc_args->num_of_nodes = 1;
627 
628  for (int i = 0; i < ioc_args->num_of_nodes; ++i) {
630  (ioc_args->kfd_process_device_apertures_ptr);
631 
632  switch (gfxVersion) {
633  case GfxVersion::gfx801:
634  case GfxVersion::gfx803:
635  ape_args->scratch_base = scratchApeBase(i + 1);
636  ape_args->lds_base = ldsApeBase(i + 1);
637  break;
638  case GfxVersion::gfx900:
639  case GfxVersion::gfx902:
640  ape_args->scratch_base = scratchApeBaseV9();
641  ape_args->lds_base = ldsApeBaseV9();
642  break;
643  default:
644  fatal("Invalid gfx version\n");
645  }
646 
647  // GFX8 and GFX9 set lds and scratch limits the same way
648  ape_args->scratch_limit =
649  scratchApeLimit(ape_args->scratch_base);
650  ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
651 
652  switch (gfxVersion) {
653  case GfxVersion::gfx801:
654  ape_args->gpuvm_base = gpuVmApeBase(i + 1);
655  ape_args->gpuvm_limit =
656  gpuVmApeLimit(ape_args->gpuvm_base);
657  break;
658  case GfxVersion::gfx803:
659  case GfxVersion::gfx900:
660  case GfxVersion::gfx902:
661  // Taken from SVM_USE_BASE in Linux kernel
662  ape_args->gpuvm_base = 0x1000000ull;
663  // Taken from AMDGPU_GMC_HOLE_START in Linux kernel
664  ape_args->gpuvm_limit = 0x0000800000000000ULL - 1;
665  break;
666  default:
667  fatal("Invalid gfx version\n");
668  }
669 
670  // NOTE: Must match ID populated by hsaTopology.py
671  if (isdGPU) {
672  switch (gfxVersion) {
673  case GfxVersion::gfx803:
674  ape_args->gpu_id = 50156;
675  break;
676  case GfxVersion::gfx900:
677  ape_args->gpu_id = 22124;
678  break;
679  default:
680  fatal("Invalid gfx version for dGPU\n");
681  }
682  } else {
683  switch (gfxVersion) {
684  case GfxVersion::gfx801:
685  case GfxVersion::gfx902:
686  ape_args->gpu_id = 2765;
687  break;
688  default:
689  fatal("Invalid gfx version for APU\n");
690  }
691  }
692 
693  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0x1ffff);
694  assert(bits<Addr>(ape_args->scratch_base, 63, 47) != 0);
695  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0x1ffff);
696  assert(bits<Addr>(ape_args->scratch_limit, 63, 47) != 0);
697  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0x1ffff);
698  assert(bits<Addr>(ape_args->lds_base, 63, 47) != 0);
699  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0x1ffff);
700  assert(bits<Addr>(ape_args->lds_limit, 63, 47) != 0);
701 
702  ape_args.copyOut(virt_proxy);
703  }
704 
705  ioc_args.copyOut(virt_proxy);
706  }
707  break;
709  {
710  warn("unimplemented ioctl: AMDKFD_IOC_ACQUIRE_VM\n");
711  }
712  break;
730  {
731  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_ALLOC_MEMORY_OF_GPU\n");
733  args.copyIn(virt_proxy);
734 
735  assert(isdGPU || gfxVersion == GfxVersion::gfx902);
736  assert((args->va_addr % TheISA::PageBytes) == 0);
737  [[maybe_unused]] Addr mmap_offset = 0;
738 
740  Addr pa_addr = 0;
741 
742  int npages = divCeil(args->size, (int64_t)TheISA::PageBytes);
743  bool cacheable = true;
744 
745  if (KFD_IOC_ALLOC_MEM_FLAGS_VRAM & args->flags) {
746  DPRINTF(GPUDriver, "amdkfd allocation type: VRAM\n");
747  args->mmap_offset = args->va_addr;
748  // VRAM allocations are device memory mapped into GPUVM
749  // space.
750  //
751  // We can't rely on the lazy host allocator (fixupFault) to
752  // handle this mapping since it needs to be placed in dGPU
753  // framebuffer memory. The lazy allocator will try to place
754  // this in host memory.
755  //
756  // TODO: We don't have the appropriate bifurcation of the
757  // physical address space with different memory controllers
758  // yet. This is where we will explicitly add the PT maps to
759  // dGPU memory in the future.
760  //
761  // Bind the VA space to the dGPU physical memory pool. Mark
762  // this region as Uncacheable. The Uncacheable flag is only
763  // really used by the CPU and is ignored by the GPU. We mark
764  // this as uncacheable from the CPU so that we can implement
765  // direct CPU framebuffer access similar to what we currently
766  // offer in real HW through the so-called Large BAR feature.
767  pa_addr = process->seWorkload->allocPhysPages(
768  npages, dGPUPoolID);
769  //
770  // TODO: Uncacheable accesses need to be supported by the
771  // CPU-side protocol for this to work correctly. I believe
772  // it only works right now if the physical memory is MMIO
773  cacheable = false;
774 
775  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
776  "%d\n", args->va_addr, pa_addr, args->size);
777 
778  } else if (KFD_IOC_ALLOC_MEM_FLAGS_USERPTR & args->flags) {
779  DPRINTF(GPUDriver, "amdkfd allocation type: USERPTR\n");
780  mmap_offset = args->mmap_offset;
781  // USERPTR allocations are system memory mapped into GPUVM
782  // space. The user provides the driver with the pointer.
783  pa_addr = process->seWorkload->allocPhysPages(npages);
784 
785  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
786  "%d\n", args->va_addr, pa_addr, args->size);
787 
788  // If the HSA runtime requests system coherent memory, than we
789  // need to explicity mark this region as uncacheable from the
790  // perspective of the GPU.
791  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
792  mtype.clear();
793 
794  } else if (KFD_IOC_ALLOC_MEM_FLAGS_GTT & args->flags) {
795  DPRINTF(GPUDriver, "amdkfd allocation type: GTT\n");
796  args->mmap_offset = args->va_addr;
797  // GTT allocations are system memory mapped into GPUVM space.
798  // It's different than a USERPTR allocation since the driver
799  // itself allocates the physical memory on the host.
800  //
801  // We will lazily map it into host memory on first touch. The
802  // fixupFault will find the original SVM aperture mapped to the
803  // host.
804  pa_addr = process->seWorkload->allocPhysPages(npages);
805 
806  DPRINTF(GPUDriver, "Mapping VA %p to framebuffer PA %p size "
807  "%d\n", args->va_addr, pa_addr, args->size);
808 
809  // If the HSA runtime requests system coherent memory, than we
810  // need to explicity mark this region as uncacheable from the
811  // perspective of the GPU.
812  if (args->flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
813  mtype.clear();
814 
815  // Note that for GTT the thunk layer needs to call mmap on the
816  // driver FD later if it wants the host to have access to this
817  // memory (which it probably does). This will be ignored.
818  } else if (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL & args->flags) {
819  DPRINTF(GPUDriver, "amdkfd allocation type: DOORBELL\n");
820  // DOORBELL allocations are the queue doorbells that are
821  // memory mapped into GPUVM space.
822  //
823  // Explicitly map this virtual address to our PIO doorbell
824  // interface in the page tables (non-cacheable)
825  pa_addr = device->hsaPacketProc().pioAddr;
826  cacheable = false;
827  }
828 
829  DPRINTF(GPUDriver, "amdkfd allocation arguments: va_addr %p "
830  "size %lu, mmap_offset %p, gpu_id %d\n",
831  args->va_addr, args->size, mmap_offset, args->gpu_id);
832 
833  // Bind selected physical memory to provided virtual address range
834  // in X86 page tables.
835  process->pTable->map(args->va_addr, pa_addr, args->size,
836  cacheable);
837 
838  // We keep track of allocated regions of GPU mapped memory,
839  // just like the driver would. This allows us to provide the
840  // user with a unique handle for a given allocation. The user
841  // will only provide us with a handle after allocation and expect
842  // us to be able to use said handle to extract all the properties
843  // of the region.
844  //
845  // This is a simplified version of regular system VMAs, but for
846  // GPUVM space (none of the clobber/remap nonsense we find in real
847  // OS managed memory).
848  allocateGpuVma(mtype, args->va_addr, args->size);
849 
850  // Used by the runtime to uniquely identify this allocation.
851  // We can just use the starting address of the VMA region.
852  args->handle= args->va_addr;
853  args.copyOut(virt_proxy);
854  }
855  break;
857  {
858  DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_FREE_MEMORY_OF_GPU\n");
860  args.copyIn(virt_proxy);
861 
862  assert(isdGPU);
863  DPRINTF(GPUDriver, "amdkfd free arguments: handle %p ",
864  args->handle);
865 
866  // We don't recycle physical pages in SE mode
867  Addr size = deallocateGpuVma(args->handle);
868  process->pTable->unmap(args->handle, size);
869 
870  // TODO: IOMMU and GPUTLBs do not seem to correctly support
871  // shootdown. This is also a potential issue for APU systems
872  // that perform unmap or remap with system memory.
873  tc->getMMUPtr()->flushAll();
874 
875  args.copyOut(virt_proxy);
876  }
877  break;
886  {
887  warn("unimplemented ioctl: AMDKFD_IOC_MAP_MEMORY_TO_GPU\n");
888  }
889  break;
891  {
892  warn("unimplemented ioctl: AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU\n");
893  }
894  break;
896  {
897  warn("unimplemented ioctl: AMDKFD_IOC_SET_CU_MASK\n");
898  }
899  break;
901  {
902  warn("unimplemented ioctl: AMDKFD_IOC_GET_QUEUE_WAVE_STATE\n");
903  }
904  break;
906  {
907  warn("unimplemented ioctl: AMDKFD_IOC_GET_DMABUF_INFO\n");
908  }
909  break;
911  {
912  warn("unimplemented ioctl: AMDKFD_IOC_IMPORT_DMABUF\n");
913  }
914  break;
916  {
917  warn("unimplemented ioctl: AMDKFD_IOC_ALLOC_QUEUE_GWS\n");
918  }
919  break;
921  {
922  warn("unimplemented ioctl: AMDKFD_IOC_SMI_EVENTS\n");
923  }
924  break;
925  default:
926  fatal("%s: bad ioctl %d\n", req);
927  break;
928  }
929  return 0;
930 }
931 
932 void
933 GPUComputeDriver::sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
934 {
935  // Convert millisecs to ticks
936  Tick wakeup_delay((uint64_t)milliSecTimeout * 1000000000);
937  assert(TCEvents.count(tc) == 1);
938  TCEvents[tc].timerEvent.scheduleWakeup(wakeup_delay);
939  tc->suspend();
940  DPRINTF(GPUDriver,
941  "CPU %d is put to sleep\n", tc->cpuId());
942 }
943 
944 Addr
946 {
947  return ((Addr)gpuNum << 61) + 0x1000000000000L;
948 }
949 
950 Addr
952 {
953  return (apeBase & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
954 }
955 
956 Addr
958 {
959  return ((Addr)gpuNum << 61) + 0x100000000L;
960 }
961 
962 // Used for GFX9 devices
963 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
964 Addr
966 {
967  return ((Addr)0x1 << 48);
968 }
969 
970 Addr
972 {
973  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
974 }
975 
976 Addr
978 {
979  return ((Addr)gpuNum << 61) + 0x0;
980 }
981 
982 //Used for GFX9 devices
983 // From drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c in the Linux kernel
984 Addr
986 {
987  return ((Addr)0x2 << 48);
988 }
989 
990 Addr
992 {
993  return (apeBase & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
994 }
995 
996 void
998  Addr start, Addr length)
999 {
1000  AddrRange range = AddrRange(start, start + length);
1001  DPRINTF(GPUDriver, "Registering [%p - %p] with MTYPE %d\n",
1002  range.start(), range.end(), mtype);
1003  fatal_if(gpuVmas.insert(range, mtype) == gpuVmas.end(),
1004  "Attempted to double register Mtypes for [%p - %p]\n",
1005  range.start(), range.end());
1006 }
1007 
1008 Addr
1010 {
1011  auto vma = gpuVmas.contains(start);
1012  assert(vma != gpuVmas.end());
1013  assert((vma->first.start() == start));
1014  Addr size = vma->first.size();
1015  DPRINTF(GPUDriver, "Unregistering [%p - %p]\n", vma->first.start(),
1016  vma->first.end());
1017  gpuVmas.erase(vma);
1018  return size;
1019 }
1020 
1021 void
1023 {
1024  // If we are a dGPU then set the MTYPE from our VMAs.
1025  if (isdGPU) {
1026  assert(!FullSystem);
1027  AddrRange range = RangeSize(req->getVaddr(), req->getSize());
1028  auto vma = gpuVmas.contains(range);
1029  assert(vma != gpuVmas.end());
1030  DPRINTF(GPUShader, "Setting req from [%p - %p] MTYPE %d\n"
1031  "%d\n", range.start(), range.end(), vma->second);
1032  req->setCacheCoherenceFlags(vma->second);
1033  // APUs always get the default MTYPE
1034  } else {
1035  req->setCacheCoherenceFlags(defaultMtype);
1036  }
1037 }
1038 
1039 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
AMDKFD_IOC_MAP_MEMORY_TO_GPU
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU
Definition: kfd_ioctl.h:592
gem5::kfd_event_data
Definition: kfd_ioctl.h:306
gem5::HSAPacketProcessor::pioAddr
Addr pioAddr
Definition: hsa_packet_processor.hh:346
gem5::GPUComputeDriver::TCEvents
std::unordered_map< ThreadContext *, EventList > TCEvents
Definition: gpu_compute_driver.hh:206
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:191
gem5::SETranslatingPortProxy
Definition: se_translating_port_proxy.hh:49
gem5::GPUComputeDriver::gpuVmas
AddrRangeMap< Request::CacheCoherenceFlags, 1 > gpuVmas
VMA structures for GPUVM memory.
Definition: gpu_compute_driver.hh:162
KFD_SIGNAL_EVENT_LIMIT
#define KFD_SIGNAL_EVENT_LIMIT
Definition: kfd_ioctl.h:228
gem5::GPUComputeDriver::allocateQueue
void allocateQueue(PortProxy &mem_proxy, Addr ioc_buf_addr)
Forward relevant parameters to packet processor; queueId is used to link doorbell.
Definition: gpu_compute_driver.cc:159
gem5::AddrRange::start
Addr start() const
Get the start address of the range.
Definition: addr_range.hh:343
warn
#define warn(...)
Definition: logging.hh:246
gem5::TypedBufferArg
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
Definition: syscall_emul_buf.hh:132
gem5::GPUComputeDriver::deallocateGpuVma
Addr deallocateGpuVma(Addr start)
Definition: gpu_compute_driver.cc:1009
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR
Definition: kfd_ioctl.h:377
shader.hh
gem5::X86ISA::L
Bitfield< 7, 0 > L
Definition: int.hh:61
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:293
AMDKFD_IOC_GET_TILE_CONFIG
#define AMDKFD_IOC_GET_TILE_CONFIG
Definition: kfd_ioctl.h:573
gem5::RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:815
warn_once
#define warn_once(...)
Definition: logging.hh:250
AMDKFD_IOC_DBG_WAVE_CONTROL
#define AMDKFD_IOC_DBG_WAVE_CONTROL
Definition: kfd_ioctl.h:567
AMDKFD_IOC_FREE_MEMORY_OF_GPU
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU
Definition: kfd_ioctl.h:589
hsa_packet_processor.hh
AMDKFD_IOC_DESTROY_EVENT
#define AMDKFD_IOC_DESTROY_EVENT
Definition: kfd_ioctl.h:546
gem5::Flags::set
void set(Type mask)
Set all flag's bits matching the given mask.
Definition: flags.hh:116
gem5::ThreadContext::getMMUPtr
virtual BaseMMU * getMMUPtr()=0
gem5::Flags::clear
void clear()
Clear all flag's bits.
Definition: flags.hh:102
gem5::HSAPacketProcessor::unsetDeviceQueueDesc
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
Definition: hsa_packet_processor.cc:106
translating_port_proxy.hh
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL
Definition: kfd_ioctl.h:378
gem5::GPUComputeDriver::gpuVmApeLimit
Addr gpuVmApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:951
gem5::ThreadContext::activate
virtual void activate()=0
Set the status to Active.
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
KFD_IOC_ALLOC_MEM_FLAGS_VRAM
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM
Definition: kfd_ioctl.h:375
syscall_emul_buf.hh
KFD_IOCTL_MINOR_VERSION
#define KFD_IOCTL_MINOR_VERSION
Definition: kfd_ioctl.h:38
KFD_MMAP_GPU_ID
#define KFD_MMAP_GPU_ID(gpu_id)
Definition: kfd_event_defines.h:51
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::Request::READ_WRITE
@ READ_WRITE
Definition: request.hh:337
gem5::ThreadContext::cpuId
virtual int cpuId() const =0
gem5::Request::CACHED
@ CACHED
mtype flags
Definition: request.hh:336
AMDKFD_IOC_GET_VERSION
#define AMDKFD_IOC_GET_VERSION
Definition: kfd_ioctl.h:522
gem5::GPUComputeDriver::allocateGpuVma
void allocateGpuVma(Request::CacheCoherenceFlags mtype, Addr start, Addr length)
Allocate/deallocate GPUVM VMAs for tracking virtual address allocations and properties on DGPUs.
Definition: gpu_compute_driver.cc:997
gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition: hsa_packet_processor.cc:112
gem5::Process::memState
std::shared_ptr< MemState > memState
Definition: process.hh:290
gem5::GPUComputeDriver::eventSlotIndex
uint32_t eventSlotIndex
Definition: gpu_compute_driver.hh:155
gem5::GPUComputeDriver::setMtype
void setMtype(RequestPtr req)
Called by the compute units right before a request is issued to ruby.
Definition: gpu_compute_driver.cc:1022
gem5::TranslatingPortProxy
This proxy attempts to translate virtual addresses using the TLBs.
Definition: translating_port_proxy.hh:60
KFD_MMAP_TYPE_MASK
#define KFD_MMAP_TYPE_MASK
Definition: kfd_event_defines.h:43
gem5::GPUComputeDriver::ETable
std::unordered_map< uint32_t, ETEntry > ETable
Definition: gpu_compute_driver.hh:157
AMDKFD_IOC_SET_EVENT
#define AMDKFD_IOC_SET_EVENT
Definition: kfd_ioctl.h:549
gem5::Flags< CacheCoherenceFlagsType >
AMDKFD_IOC_GET_PROCESS_APERTURES
#define AMDKFD_IOC_GET_PROCESS_APERTURES
Definition: kfd_ioctl.h:537
AMDKFD_IOC_SET_TRAP_HANDLER
#define AMDKFD_IOC_SET_TRAP_HANDLER
Definition: kfd_ioctl.h:576
AMDKFD_IOC_DBG_REGISTER
#define AMDKFD_IOC_DBG_REGISTER
Definition: kfd_ioctl.h:558
gem5::GPUComputeDriver::sleepCPU
void sleepCPU(ThreadContext *tc, uint32_t milliSecTimeout)
Definition: gpu_compute_driver.cc:933
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::GPUComputeDriver::defaultMtype
Request::CacheCoherenceFlags defaultMtype
Definition: gpu_compute_driver.hh:175
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::GPUComputeDriver::queueId
uint32_t queueId
Definition: gpu_compute_driver.hh:150
gem5::BaseBufferArg::size
const int size
buffer size
Definition: syscall_emul_buf.hh:99
gem5::VegaISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:42
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
AMDKFD_IOC_ACQUIRE_VM
#define AMDKFD_IOC_ACQUIRE_VM
Definition: kfd_ioctl.h:583
AMDKFD_IOC_SET_CU_MASK
#define AMDKFD_IOC_SET_CU_MASK
Definition: kfd_ioctl.h:598
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
AMDKFD_IOC_GET_QUEUE_WAVE_STATE
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE
Definition: kfd_ioctl.h:601
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::GPUComputeDriver::ldsApeBaseV9
Addr ldsApeBaseV9() const
Definition: gpu_compute_driver.cc:985
port_proxy.hh
process.hh
PAGE_SHIFT
#define PAGE_SHIFT
Definition: kfd_event_defines.h:41
kfd_event_defines.h
KFD_IOCTL_MAJOR_VERSION
#define KFD_IOCTL_MAJOR_VERSION
Definition: kfd_ioctl.h:37
gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition: port_proxy.hh:86
gem5::GPUComputeDriver::scratchApeBase
Addr scratchApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:957
AMDKFD_IOC_RESET_EVENT
#define AMDKFD_IOC_RESET_EVENT
Definition: kfd_ioctl.h:552
gem5::GPUComputeDriver::GPUComputeDriver
GPUComputeDriver(const Params &p)
Definition: gpu_compute_driver.cc:60
page_size.hh
KFD_IOC_ALLOC_MEM_FLAGS_GTT
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT
Definition: kfd_ioctl.h:376
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::AddrRange::end
Addr end() const
Get the end address of the range.
Definition: addr_range.hh:350
compiler.hh
flags
uint8_t flags
Definition: helpers.cc:66
gpu_command_processor.hh
KFD_IOC_EVENT_SIGNAL
#define KFD_IOC_EVENT_SIGNAL
Definition: kfd_ioctl.h:214
AMDKFD_IOC_DBG_ADDRESS_WATCH
#define AMDKFD_IOC_DBG_ADDRESS_WATCH
Definition: kfd_ioctl.h:564
std::pair
STL pair class.
Definition: stl.hh:58
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUComputeDriver::open
int open(ThreadContext *tc, int mode, int flags) override
Create an FD entry for the KFD inside of the owning process.
Definition: gpu_compute_driver.cc:94
gem5::GPUComputeDriver::scratchApeBaseV9
Addr scratchApeBaseV9() const
Definition: gpu_compute_driver.cc:965
gem5::GPUComputeDriver::dGPUPoolID
int dGPUPoolID
Definition: gpu_compute_driver.hh:153
gem5::GPUComputeDriver::doorbellSize
int doorbellSize()
Definition: gpu_compute_driver.hh:86
full_system.hh
AMDKFD_IOC_IMPORT_DMABUF
#define AMDKFD_IOC_IMPORT_DMABUF
Definition: kfd_ioctl.h:607
KFD_MMAP_TYPE_EVENTS
#define KFD_MMAP_TYPE_EVENTS
Definition: kfd_event_defines.h:45
gem5::BaseBufferArg::copyIn
bool copyIn(const PortProxy &memproxy)
copy data into simulator space (read from target memory)
Definition: syscall_emul_buf.hh:81
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:220
gem5::EmulatedDriver
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
Definition: emul_driver.hh:55
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::GPUComputeDriver::ioctl
int ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf) override
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
Definition: gpu_compute_driver.cc:229
gem5::GPUComputeDriver::DriverWakeupEvent::scheduleWakeup
void scheduleWakeup(Tick wakeup_delay)
Definition: gpu_compute_driver.cc:184
gem5::ThreadContext::suspend
virtual void suspend()=0
Set the status to Suspended.
KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT
Definition: kfd_ioctl.h:386
gem5::Request::SHARED
@ SHARED
Definition: request.hh:338
gem5::GPUComputeDriver::device
GPUCommandProcessor * device
GPU that is controlled by this driver.
Definition: gpu_compute_driver.hh:149
AMDKFD_IOC_CREATE_EVENT
#define AMDKFD_IOC_CREATE_EVENT
Definition: kfd_ioctl.h:543
AMDKFD_IOC_DESTROY_QUEUE
#define AMDKFD_IOC_DESTROY_QUEUE
Definition: kfd_ioctl.h:528
AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU
Definition: kfd_ioctl.h:595
gem5::BaseBufferArg::copyOut
bool copyOut(const PortProxy &memproxy)
copy data out of simulator space (write to target memory)
Definition: syscall_emul_buf.hh:91
SLOTS_PER_PAGE
#define SLOTS_PER_PAGE
Definition: kfd_event_defines.h:46
gem5::GPUComputeDriver::isdGPU
bool isdGPU
Definition: gpu_compute_driver.hh:151
AMDKFD_IOC_SET_MEMORY_POLICY
#define AMDKFD_IOC_SET_MEMORY_POLICY
Definition: kfd_ioctl.h:531
gem5::GPUComputeDriver::mmap
Addr mmap(ThreadContext *tc, Addr start, uint64_t length, int prot, int tgt_flags, int tgt_fd, off_t offset) override
Currently, mmap() will simply setup a mapping for the associated device's packet processor's doorbell...
Definition: gpu_compute_driver.cc:108
AMDKFD_IOC_WAIT_EVENTS
#define AMDKFD_IOC_WAIT_EVENTS
Definition: kfd_ioctl.h:555
gem5::BaseMMU::flushAll
virtual void flushAll()
Definition: mmu.cc:81
logging.hh
AMDKFD_IOC_SET_SCRATCH_BACKING_VA
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA
Definition: kfd_ioctl.h:570
AMDKFD_IOC_DBG_UNREGISTER
#define AMDKFD_IOC_DBG_UNREGISTER
Definition: kfd_ioctl.h:561
AMDKFD_IOC_SMI_EVENTS
#define AMDKFD_IOC_SMI_EVENTS
Definition: kfd_ioctl.h:613
se_translating_port_proxy.hh
gem5::GPUComputeDriver::Params
GPUComputeDriverParams Params
Definition: gpu_compute_driver.hh:65
gem5::GPUComputeDriver::DriverWakeupEvent::process
void process() override
Definition: gpu_compute_driver.cc:218
trace.hh
AMDKFD_IOC_UPDATE_QUEUE
#define AMDKFD_IOC_UPDATE_QUEUE
Definition: kfd_ioctl.h:540
gem5::GPUComputeDriver::ldsApeLimit
Addr ldsApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:991
gpu_compute_driver.hh
gem5::AddrRange
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:81
gem5::GPUComputeDriver::DriverWakeupEvent::description
const char * description() const override
Return a C string describing the event.
Definition: gpu_compute_driver.cc:85
AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW
Definition: kfd_ioctl.h:579
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
AMDKFD_IOC_ALLOC_QUEUE_GWS
#define AMDKFD_IOC_ALLOC_QUEUE_GWS
Definition: kfd_ioctl.h:610
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::GPUComputeDriver::gpuVmApeBase
Addr gpuVmApeBase(int gpuNum) const
The aperture (APE) base/limit pairs are set statically at startup by the real KFD.
Definition: gpu_compute_driver.cc:945
gem5::GPUComputeDriver::scratchApeLimit
Addr scratchApeLimit(Addr apeBase) const
Definition: gpu_compute_driver.cc:971
gem5::GPUComputeDriver::ldsApeBase
Addr ldsApeBase(int gpuNum) const
Definition: gpu_compute_driver.cc:977
gem5::EmulatedDriver::filename
const std::string & filename
filename for opening this driver (under /dev)
Definition: emul_driver.hh:61
kfd_ioctl.h
thread_context.hh
gem5::GPUComputeDriver::gfxVersion
GfxVersion gfxVersion
Definition: gpu_compute_driver.hh:152
AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU
Definition: kfd_ioctl.h:586
gem5::X86ISA::prot
Bitfield< 7 > prot
Definition: misc.hh:582
AMDKFD_IOC_CREATE_QUEUE
#define AMDKFD_IOC_CREATE_QUEUE
Definition: kfd_ioctl.h:525
se_workload.hh
AMDKFD_IOC_GET_CLOCK_COUNTERS
#define AMDKFD_IOC_GET_CLOCK_COUNTERS
Definition: kfd_ioctl.h:534
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
KFD_MMAP_TYPE_DOORBELL
#define KFD_MMAP_TYPE_DOORBELL
Definition: kfd_event_defines.h:44
AMDKFD_IOC_GET_DMABUF_INFO
#define AMDKFD_IOC_GET_DMABUF_INFO
Definition: kfd_ioctl.h:604
gem5::GPUComputeDriver::eventPage
Addr eventPage
Definition: gpu_compute_driver.hh:154
gem5::sim_clock::as_int::ns
Tick ns
nanosecond
Definition: core.cc:71

Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17