gem5  v22.1.0.0
amdgpu_device.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <fstream>
35 
36 #include "debug/AMDGPUDevice.hh"
37 #include "dev/amdgpu/amdgpu_vm.hh"
41 #include "dev/hsa/hw_scheduler.hh"
43 #include "gpu-compute/shader.hh"
44 #include "mem/abstract_mem.hh"
45 #include "mem/packet.hh"
46 #include "mem/packet_access.hh"
47 #include "params/AMDGPUDevice.hh"
48 #include "sim/byteswap.hh"
49 #include "sim/sim_exit.hh"
50 
51 namespace gem5
52 {
53 
54 AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
55  : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
56  sdma0(p.sdma0), sdma1(p.sdma1), pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
57  checkpoint_before_mmios(p.checkpoint_before_mmios),
58  init_interrupt_count(0), _lastVMID(0),
59  deviceMem(name() + ".deviceMem", p.memories, false, "", false)
60 {
61  // Loading the rom binary dumped from hardware.
62  std::ifstream romBin;
63  romBin.open(p.rom_binary, std::ios::binary);
64  romBin.read((char *)rom.data(), ROM_SIZE);
65  romBin.close();
66 
67  // System pointer needs to be explicitly set for device memory since
68  // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
69  // Note this means the cache line size is system wide.
70  for (auto& m : p.memories) {
71  m->system(p.system);
72 
73  // Add to system's device memory map.
74  p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
75  }
76 
77  if (config.expansionROM) {
78  romRange = RangeSize(config.expansionROM, ROM_SIZE);
79  } else {
81  }
82 
83  if (p.trace_file != "") {
84  mmioReader.readMMIOTrace(p.trace_file);
85  }
86 
87  sdma0->setGPUDevice(this);
88  sdma0->setId(0);
89  sdma1->setGPUDevice(this);
90  sdma1->setId(1);
91  deviceIH->setGPUDevice(this);
92  pm4PktProc->setGPUDevice(this);
93  cp->hsaPacketProc().setGPUDevice(this);
94  cp->setGPUDevice(this);
95 }
96 
97 void
99 {
100  Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);
101  uint64_t rom_data = 0;
102 
103  memcpy(&rom_data, rom.data() + rom_offset, pkt->getSize());
104  pkt->setUintX(rom_data, ByteOrder::little);
105 
106  DPRINTF(AMDGPUDevice, "Read from addr %#x on ROM offset %#x data: %#x\n",
107  pkt->getAddr(), rom_offset, rom_data);
108 }
109 
112 {
114  AddrRangeList ret_ranges;
115  ret_ranges.push_back(romRange);
116 
117  // If the range starts at zero assume OS hasn't assigned it yet. Do not
118  // return ranges starting with zero as they will surely overlap with
119  // another range causing the I/O crossbar to fatal.
120  for (auto & r : ranges) {
121  if (r.start() != 0) {
122  ret_ranges.push_back(r);
123  }
124  }
125 
126  return ret_ranges;
127 }
128 
129 Tick
131 {
132  [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
133  DPRINTF(AMDGPUDevice, "Read Config: from offset: %#x size: %#x "
134  "data: %#x\n", offset, pkt->getSize(), config.data[offset]);
135 
136  Tick delay = PciDevice::readConfig(pkt);
137 
138  // Before sending MMIOs the driver sends three interrupts in a row.
139  // Use this to trigger creating a checkpoint to restore in timing mode.
140  // This is only necessary until we can create a "hole" in the KVM VM
141  // around the VGA ROM region such that KVM exits and sends requests to
142  // this device rather than the KVM VM.
144  if (offset == PCI0_INTERRUPT_PIN) {
145  if (++init_interrupt_count == 3) {
146  DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
147  exitSimLoop("checkpoint", 0, curTick() + delay + 1);
148  }
149  } else {
151  }
152  }
153 
154  return delay;
155 }
156 
157 Tick
159 {
160  [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
161  DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "
162  "data: %#x\n", offset, pkt->getSize(),
163  pkt->getUintX(ByteOrder::little));
164 
165  return PciDevice::writeConfig(pkt);
166 }
167 
168 void
170 {
171  DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",
172  read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),
173  pkt->getUintX(ByteOrder::little));
174 
175  pkt->makeAtomicResponse();
176 }
177 
178 void
180 {
181  DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);
182 
183  /*
184  * Return data for frame reads in priority order: (1) Special addresses
185  * first, ignoring any writes from driver. (2) Any other address from
186  * device backing store / abstract memory class functionally.
187  */
188  if (offset == 0xa28000) {
189  /*
190  * Handle special counter addresses in framebuffer. These counter
191  * addresses expect the read to return previous value + 1.
192  */
193  if (regs.find(pkt->getAddr()) == regs.end()) {
194  regs[pkt->getAddr()] = 1;
195  } else {
196  regs[pkt->getAddr()]++;
197  }
198 
199  pkt->setUintX(regs[pkt->getAddr()], ByteOrder::little);
200  } else {
201  /*
202  * Read the value from device memory. This must be done functionally
203  * because this method is called by the PCIDevice::read method which
204  * is a non-timing read.
205  */
206  RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
207  vramRequestorId());
208  PacketPtr readPkt = Packet::createRead(req);
209  uint8_t *dataPtr = new uint8_t[pkt->getSize()];
210  readPkt->dataDynamic(dataPtr);
211 
212  auto system = cp->shader()->gpuCmdProc.system();
213  system->getDeviceMemory(readPkt)->access(readPkt);
214 
215  pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
216  }
217 }
218 
219 void
221 {
222  DPRINTF(AMDGPUDevice, "Read doorbell %#lx\n", offset);
224 }
225 
226 void
228 {
229  Addr aperture = gpuvm.getMmioAperture(offset);
230  Addr aperture_offset = offset - aperture;
231 
232  // By default read from MMIO trace. Overwrite the packet for a select
233  // few more dynamic MMIOs.
234  DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
236 
237  switch (aperture) {
238  case NBIO_BASE:
239  switch (aperture_offset) {
240  // This is a PCIe status register. At some point during driver init
241  // the driver checks that interrupts are enabled. This is only
242  // checked once, so if the MMIO trace does not exactly line up with
243  // what the driver is doing in gem5, this may still have the first
244  // bit zero causing driver to fail. Therefore, we always set this
245  // bit to one as there is no harm to do so.
246  case 0x3c: // mmPCIE_DATA2 << 2
247  uint32_t value = pkt->getLE<uint32_t>() | 0x1;
248  DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value);
249  pkt->setLE<uint32_t>(value);
250  break;
251  } break;
252  case GRBM_BASE:
253  gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
254  break;
255  case MMHUB_BASE:
256  gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
257  break;
258  default:
259  break;
260  }
261 }
262 
263 void
265 {
266  DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
267 
268  Addr aperture = gpuvm.getFrameAperture(offset);
269  Addr aperture_offset = offset - aperture;
270 
271  // Record the value
272  if (aperture == gpuvm.gartBase()) {
273  gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);
274  DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
275  gpuvm.gartTable[aperture_offset]);
276  }
277 }
278 
279 void
281 {
282  DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
283 
284  if (doorbells.find(offset) != doorbells.end()) {
285  QueueType q_type = doorbells[offset];
286  DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
287  offset, q_type);
288  switch (q_type) {
289  case Compute:
291  pkt->getLE<uint64_t>());
292  break;
293  case Gfx:
295  pkt->getLE<uint64_t>());
296  break;
297  case SDMAGfx: {
298  SDMAEngine *sdmaEng = getSDMAEngine(offset);
299  sdmaEng->processGfx(pkt->getLE<uint64_t>());
300  } break;
301  case SDMAPage: {
302  SDMAEngine *sdmaEng = getSDMAEngine(offset);
303  sdmaEng->processPage(pkt->getLE<uint64_t>());
304  } break;
305  case ComputeAQL: {
307  pkt->getLE<uint64_t>() + 1);
308  pm4PktProc->updateReadIndex(offset, pkt->getLE<uint64_t>() + 1);
309  } break;
310  case InterruptHandler:
311  deviceIH->updateRptr(pkt->getLE<uint32_t>());
312  break;
313  case RLC: {
314  SDMAEngine *sdmaEng = getSDMAEngine(offset);
315  sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());
316  } break;
317  default:
318  panic("Write to unkown queue type!");
319  }
320  } else {
321  warn("Unknown doorbell offset: %lx\n", offset);
322  }
323 }
324 
325 void
327 {
328  Addr aperture = gpuvm.getMmioAperture(offset);
329  Addr aperture_offset = offset - aperture;
330 
331  DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
332 
333  switch (aperture) {
334  /* Write a register to the first System DMA. */
335  case SDMA0_BASE:
336  sdma0->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
337  break;
338  /* Write a register to the second System DMA. */
339  case SDMA1_BASE:
340  sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
341  break;
342  /* Write a general register to the graphics register bus manager. */
343  case GRBM_BASE:
344  gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
345  pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
346  break;
347  /* Write a register to the interrupt handler. */
348  case IH_BASE:
349  deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
350  break;
351  default:
352  DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
353  break;
354  }
355 }
356 
357 Tick
359 {
360  if (isROM(pkt->getAddr())) {
361  readROM(pkt);
362  } else {
363  int barnum = -1;
364  Addr offset = 0;
365  getBAR(pkt->getAddr(), barnum, offset);
366 
367  switch (barnum) {
368  case FRAMEBUFFER_BAR:
369  readFrame(pkt, offset);
370  break;
371  case DOORBELL_BAR:
372  readDoorbell(pkt, offset);
373  break;
374  case MMIO_BAR:
375  readMMIO(pkt, offset);
376  break;
377  default:
378  panic("Request with address out of mapped range!");
379  }
380  }
381 
382  dispatchAccess(pkt, true);
383  return pioDelay;
384 }
385 
386 Tick
388 {
389  int barnum = -1;
390  Addr offset = 0;
391  getBAR(pkt->getAddr(), barnum, offset);
392 
393  switch (barnum) {
394  case FRAMEBUFFER_BAR:
395  gpuMemMgr->writeRequest(offset, pkt->getPtr<uint8_t>(),
396  pkt->getSize(), 0, nullptr);
397  writeFrame(pkt, offset);
398  break;
399  case DOORBELL_BAR:
400  writeDoorbell(pkt, offset);
401  break;
402  case MMIO_BAR:
403  writeMMIO(pkt, offset);
404  break;
405  default:
406  panic("Request with address out of mapped range!");
407  }
408 
409  // Record only if there is non-zero value, or a value to be overwritten.
410  // Reads return 0 by default.
411  uint64_t data = pkt->getUintX(ByteOrder::little);
412 
413  DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
414  pkt->getAddr(), data);
415 
416  if (data || regs.find(pkt->getAddr()) != regs.end())
417  regs[pkt->getAddr()] = data;
418 
419  dispatchAccess(pkt, false);
420 
421  return pioDelay;
422 }
423 
424 uint32_t
426 {
427  return regs[addr];
428 }
429 void
430 AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
431 {
432  DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
433  addr, value);
434  regs[addr] = value;
435 }
436 
437 void
439 {
440  DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
441  doorbells[offset] = qt;
442 }
443 
444 void
446 {
447  sdmaEngs[offset] = eng;
448 }
449 
450 SDMAEngine*
452 {
457  switch (id) {
458  case 0:
459  return sdma0;
460  break;
461  case 1:
462  return sdma1;
463  break;
464  default:
465  panic("No SDMA with id %d\n", id);
466  break;
467  }
468 
469  return nullptr;
470 }
471 
472 SDMAEngine*
474 {
475  return sdmaEngs[offset];
476 }
477 
478 void
480 {
482 }
483 
484 void
486 {
487  // Serialize the PciDevice base class
489 
490  uint64_t regs_size = regs.size();
491  uint64_t doorbells_size = doorbells.size();
492  uint64_t sdma_engs_size = sdmaEngs.size();
493 
494  SERIALIZE_SCALAR(regs_size);
495  SERIALIZE_SCALAR(doorbells_size);
496  SERIALIZE_SCALAR(sdma_engs_size);
497 
498  // Make a c-style array of the regs to serialize
499  uint32_t reg_addrs[regs_size];
500  uint64_t reg_values[regs_size];
501  uint32_t doorbells_offset[doorbells_size];
502  QueueType doorbells_queues[doorbells_size];
503  uint32_t sdma_engs_offset[sdma_engs_size];
504  int sdma_engs[sdma_engs_size];
505 
506  int idx = 0;
507  for (auto & it : regs) {
508  reg_addrs[idx] = it.first;
509  reg_values[idx] = it.second;
510  ++idx;
511  }
512 
513  idx = 0;
514  for (auto & it : doorbells) {
515  doorbells_offset[idx] = it.first;
516  doorbells_queues[idx] = it.second;
517  ++idx;
518  }
519 
520  idx = 0;
521  for (auto & it : sdmaEngs) {
522  sdma_engs_offset[idx] = it.first;
523  sdma_engs[idx] = it.second == sdma0 ? 0 : 1;
524  ++idx;
525  }
526 
527  SERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
528  SERIALIZE_ARRAY(reg_values, sizeof(reg_values)/sizeof(reg_values[0]));
529  SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
530  sizeof(doorbells_offset[0]));
531  SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
532  sizeof(doorbells_queues[0]));
533  SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
534  sizeof(sdma_engs_offset[0]));
535  SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
536 
537  // Serialize the device memory
538  deviceMem.serializeSection(cp, "deviceMem");
539 }
540 
541 void
543 {
544  // Unserialize the PciDevice base class
546 
547  uint64_t regs_size = 0;
548  uint64_t doorbells_size = 0;
549  uint64_t sdma_engs_size = 0;
550 
551  UNSERIALIZE_SCALAR(regs_size);
552  UNSERIALIZE_SCALAR(doorbells_size);
553  UNSERIALIZE_SCALAR(sdma_engs_size);
554 
555  if (regs_size > 0) {
556  uint32_t reg_addrs[regs_size];
557  uint64_t reg_values[regs_size];
558 
559  UNSERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
560  UNSERIALIZE_ARRAY(reg_values,
561  sizeof(reg_values)/sizeof(reg_values[0]));
562 
563  for (int idx = 0; idx < regs_size; ++idx) {
564  regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
565  }
566  }
567 
568  if (doorbells_size > 0) {
569  uint32_t doorbells_offset[doorbells_size];
570  QueueType doorbells_queues[doorbells_size];
571 
572  UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
573  sizeof(doorbells_offset[0]));
574  UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
575  sizeof(doorbells_queues[0]));
576 
577  for (int idx = 0; idx < doorbells_size; ++idx) {
578  regs.insert(std::make_pair(doorbells_offset[idx],
579  doorbells_queues[idx]));
580  doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
581  }
582  }
583 
584  if (sdma_engs_size > 0) {
585  uint32_t sdma_engs_offset[sdma_engs_size];
586  int sdma_engs[sdma_engs_size];
587 
588  UNSERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
589  sizeof(sdma_engs_offset[0]));
590  UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
591 
592  for (int idx = 0; idx < sdma_engs_size; ++idx) {
593  SDMAEngine *sdma = sdma_engs[idx] == 0 ? sdma0 : sdma1;
594  sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
595  }
596  }
597 
598  // Unserialize the device memory
599  deviceMem.unserializeSection(cp, "deviceMem");
600 }
601 
602 uint16_t
604 {
605  for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
606  auto result = usedVMIDs.find(vmid);
607  if (result == usedVMIDs.end()) {
608  idMap.insert(std::make_pair(pasid, vmid));
609  usedVMIDs[vmid] = {};
610  _lastVMID = vmid;
611  return vmid;
612  }
613  }
614  panic("All VMIDs have been assigned");
615 }
616 
617 void
619 {
620  usedVMIDs.erase(vmid);
621 }
622 
623 void
625 {
626  auto result = idMap.find(pasid);
627  assert(result != idMap.end());
628  if (result == idMap.end()) return;
629  uint16_t vmid = result->second;
630 
631  idMap.erase(result);
632  usedVMIDs.erase(vmid);
633 }
634 
635 void
637 {
638  idMap.erase(idMap.begin(), idMap.end());
639  usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
640 
643 }
644 
645 void
646 AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
647 {
648  doorbellVMIDMap[doorbell] = vmid;
649 }
650 
651 std::unordered_map<uint16_t, std::set<int>>&
653 {
654  return usedVMIDs;
655 }
656 
657 void
658 AMDGPUDevice::insertQId(uint16_t vmid, int id)
659 {
660  usedVMIDs[vmid].insert(id);
661 }
662 
663 } // namespace gem5
AbstractMemory declaration.
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
uint32_t getRegVal(uint32_t addr)
Register value getter/setter.
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
GPUCommandProcessor * cp
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
SDMAEngine * sdma0
PM4PacketProcessor * pm4PktProc
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
SDMAEngine * sdma1
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
Blocks of the GPU.
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciDevice.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, QueueType > doorbells
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateRptr(const uint32_t &data)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition: amdgpu_vm.hh:168
void readMMIO(PacketPtr pkt, Addr offset)
Definition: amdgpu_vm.cc:69
Addr getMmioAperture(Addr addr)
Definition: amdgpu_vm.hh:216
void writeMMIO(PacketPtr pkt, Addr offset)
Definition: amdgpu_vm.cc:105
Addr getFrameAperture(Addr addr)
Definition: amdgpu_vm.hh:248
Addr gartBase()
Return base address of GART table in framebuffer.
Definition: amdgpu_vm.cc:57
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
Definition: mmio_reader.cc:44
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
Definition: mmio_reader.cc:76
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
void setGPUDevice(AMDGPUDevice *gpu_device)
void write(Addr db_addr, uint64_t doorbell_reg)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:1212
Addr getAddr() const
Definition: packet.hh:805
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition: packet.cc:361
void setLE(T v)
Set the value in the data pointer to v as little endian.
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:1035
unsigned getSize() const
Definition: packet.hh:815
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition: packet.cc:352
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1200
void makeAtomicResponse()
Definition: packet.hh:1071
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
PCI device, base implementation is only config space.
Definition: device.hh:270
PCIConfig config
The current config space.
Definition: device.hh:275
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition: device.cc:464
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition: device.cc:401
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition: device.hh:320
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
Definition: device.cc:269
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition: device.cc:212
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
Definition: device.cc:283
void intrPost()
Definition: device.hh:364
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
void processRLC(Addr doorbellOffset, Addr wptrOffset)
Definition: sdma_engine.cc:240
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setId(int _id)
Definition: sdma_engine.hh:154
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
Definition: sdma_engine.cc:218
void deallocateRLCQueues()
Definition: sdma_engine.cc:209
void processPage(Addr wptrOffset)
Definition: sdma_engine.cc:229
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: sdma_engine.cc:72
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:256
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:815
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
Definition: serialize.cc:74
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Definition: serialize.cc:81
#define warn(...)
Definition: logging.hh:246
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 5 > r
Definition: pagetable.hh:60
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 15 > system
Definition: misc.hh:1004
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
constexpr int MMIO_BAR
std::ostream CheckpointOut
Definition: serialize.hh:66
@ InterruptHandler
@ ComputeAQL
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
constexpr uint32_t ROM_SIZE
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:88
static constexpr uint32_t IH_OFFSET_SHIFT
static constexpr uint32_t MMHUB_BASE
static constexpr uint32_t SDMA0_BASE
static constexpr uint32_t GRBM_BASE
static constexpr uint32_t SDMA1_BASE
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
static constexpr uint32_t SDMA_OFFSET_SHIFT
constexpr int FRAMEBUFFER_BAR
static constexpr uint32_t IH_BASE
constexpr int DOORBELL_BAR
static constexpr uint32_t NBIO_BASE
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_INTERRUPT_PIN
Definition: pcireg.h:135
#define PCI_CONFIG_SIZE
Definition: pcireg.h:165
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1