gem5 v24.0.0.0
Loading...
Searching...
No Matches
amdgpu_device.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <fstream>
35
36#include "debug/AMDGPUDevice.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/abstract_mem.hh"
46#include "mem/packet.hh"
47#include "mem/packet_access.hh"
48#include "params/AMDGPUDevice.hh"
49#include "sim/byteswap.hh"
50#include "sim/sim_exit.hh"
51
52namespace gem5
53{
54
55AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
56 : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
57 cp(p.cp), checkpoint_before_mmios(p.checkpoint_before_mmios),
58 init_interrupt_count(0), _lastVMID(0),
59 deviceMem(name() + ".deviceMem", p.memories, false, "", false)
60{
61 // Loading the rom binary dumped from hardware.
62 std::ifstream romBin;
63 romBin.open(p.rom_binary, std::ios::binary);
64 romBin.read((char *)rom.data(), ROM_SIZE);
65 romBin.close();
66
67 // System pointer needs to be explicitly set for device memory since
68 // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
69 // Note this means the cache line size is system wide.
70 for (auto& m : p.memories) {
71 m->system(p.system);
72
73 // Add to system's device memory map.
74 p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
75 }
76
77 if (config.expansionROM) {
78 romRange = RangeSize(config.expansionROM, ROM_SIZE);
79 } else {
81 }
82
83 if (p.device_name == "Vega10") {
84 gfx_version = GfxVersion::gfx900;
85 } else if (p.device_name == "MI100") {
86 gfx_version = GfxVersion::gfx908;
87 } else if (p.device_name == "MI200") {
88 gfx_version = GfxVersion::gfx90a;
89 } else if (p.device_name == "MI300X") {
90 gfx_version = GfxVersion::gfx942;
91 } else {
92 panic("Unknown GPU device %s\n", p.device_name);
93 }
94
95 if (p.trace_file != "") {
96 mmioReader.readMMIOTrace(p.trace_file);
97 }
98
99 int sdma_id = 0;
100 for (auto& s : p.sdmas) {
101 s->setGPUDevice(this);
102 s->setId(sdma_id);
103 sdmaIds.insert({sdma_id, s});
104 sdmaMmios.insert({sdma_id,
105 RangeSize(s->getMmioBase(), s->getMmioSize())});
106 DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
107 sdmaMmios[sdma_id].to_string().c_str());
108 sdma_id++;
109 }
110
111 // Map SDMA MMIO addresses to functions
112 sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
113 sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
114 sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
115 sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
116 sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
118 sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
119 sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
120 sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
121 if (p.device_name == "Vega10") {
122 sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
123 sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
124 sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
127 sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
128 sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
129 } else if (p.device_name == "MI100" || p.device_name == "MI200"
130 || p.device_name == "MI300X") {
131 sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
132 sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
133 sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
136 sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
137 } else {
138 panic("Unknown GPU device %s\n", p.device_name);
139 }
140
141 // Setup PM4 packet processors and sanity check IDs
142 std::set<int> pm4_ids;
143 for (auto& pm4 : p.pm4_pkt_procs) {
144 pm4->setGPUDevice(this);
145 fatal_if(pm4_ids.count(pm4->getIpId()),
146 "Two PM4s with same IP IDs is not allowed");
147 pm4_ids.insert(pm4->getIpId());
148 pm4PktProcs.insert({pm4->getIpId(), pm4});
149
150 pm4Ranges.insert({pm4->getMMIORange(), pm4});
151 }
152
153 // There should be at least one PM4 packet processor with ID 0
154 fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found");
155
156 deviceIH->setGPUDevice(this);
158 cp->setGPUDevice(this);
159 nbio.setGPUDevice(this);
160
161 // Address aperture for device memory. We tell this to the driver and
162 // could possibly be anything, but these are the values used by hardware.
163 uint64_t mmhubBase = 0x8000ULL << 24;
164 uint64_t mmhubTop = 0x83ffULL << 24;
165 uint64_t mem_size = 0x3ff0; // 16 GB of memory
166
167 gpuvm.setMMHUBBase(mmhubBase);
168 gpuvm.setMMHUBTop(mmhubTop);
169
170 // Map other MMIO apertures based on gfx version. This must be done before
171 // any calls to get/setRegVal.
172 // NBIO 0x0 - 0x4280
173 // IH 0x4280 - 0x4980
174 // GRBM 0x8000 - 0xC000
175 // GFX 0x28000 - 0x3F000
176 // MMHUB 0x68000 - 0x6a120
180 gpuvm.setMMIOAperture(GFX_MMIO_RANGE, AddrRange(0x28000, 0x3F000));
182
183 // These are hardcoded register values to return what the driver expects
185
186 // There are different registers for different GPUs, so we set the value
187 // based on the GPU type specified by the user.
188 if (p.device_name == "Vega10") {
189 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
190 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
191 } else if (p.device_name == "MI100") {
192 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
193 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
194 setRegVal(MI100_MEM_SIZE_REG, mem_size);
195 } else if (p.device_name == "MI200") {
196 // This device can have either 64GB or 128GB of device memory.
197 // This limits to 16GB for simulation.
198 setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
199 setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
200 setRegVal(MI200_MEM_SIZE_REG, mem_size);
201 } else if (p.device_name == "MI300X") {
202 setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
203 setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
204 setRegVal(MI200_MEM_SIZE_REG, mem_size);
205 } else {
206 panic("Unknown GPU device %s\n", p.device_name);
207 }
208}
209
210void
212{
213 Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);
214 uint64_t rom_data = 0;
215
216 memcpy(&rom_data, rom.data() + rom_offset, pkt->getSize());
217 pkt->setUintX(rom_data, ByteOrder::little);
218
219 DPRINTF(AMDGPUDevice, "Read from addr %#x on ROM offset %#x data: %#x\n",
220 pkt->getAddr(), rom_offset, rom_data);
221}
222
223void
225{
226 assert(isROM(pkt->getAddr()));
227
228 Addr rom_offset = pkt->getAddr() - romRange.start();
229 uint64_t rom_data = pkt->getUintX(ByteOrder::little);
230
231 memcpy(rom.data() + rom_offset, &rom_data, pkt->getSize());
232
233 DPRINTF(AMDGPUDevice, "Write to addr %#x on ROM offset %#x data: %#x\n",
234 pkt->getAddr(), rom_offset, rom_data);
235}
236
239{
241 AddrRangeList ret_ranges;
242 ret_ranges.push_back(romRange);
243
244 // If the range starts at zero assume OS hasn't assigned it yet. Do not
245 // return ranges starting with zero as they will surely overlap with
246 // another range causing the I/O crossbar to fatal.
247 for (auto & r : ranges) {
248 if (r.start() != 0) {
249 ret_ranges.push_back(r);
250 }
251 }
252
253 return ret_ranges;
254}
255
256Tick
258{
259 int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
260
263 } else {
264 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
265 int pxcap_offset = offset - PXCAP_BASE;
266
267 switch (pkt->getSize()) {
268 case sizeof(uint8_t):
269 pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);
271 "Read PXCAP: dev %#x func %#x reg %#x 1 bytes: data "
272 "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
273 (uint32_t)pkt->getLE<uint8_t>());
274 break;
275 case sizeof(uint16_t):
276 pkt->setLE<uint16_t>(
277 *(uint16_t*)&pxcap.data[pxcap_offset]);
279 "Read PXCAP: dev %#x func %#x reg %#x 2 bytes: data "
280 "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
281 (uint32_t)pkt->getLE<uint16_t>());
282 break;
283 case sizeof(uint32_t):
284 pkt->setLE<uint32_t>(
285 *(uint32_t*)&pxcap.data[pxcap_offset]);
287 "Read PXCAP: dev %#x func %#x reg %#x 4 bytes: data "
288 "= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset,
289 (uint32_t)pkt->getLE<uint32_t>());
290 break;
291 default:
292 panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",
293 pkt->getSize(), pxcap_offset);
294 }
295 pkt->makeAtomicResponse();
296 } else {
297 warn("Device specific offset %d not implemented!\n", offset);
298 }
299 }
300
301 // Before sending MMIOs the driver sends three interrupts in a row.
302 // Use this to trigger creating a checkpoint to restore in timing mode.
303 // This is only necessary until we can create a "hole" in the KVM VM
304 // around the VGA ROM region such that KVM exits and sends requests to
305 // this device rather than the KVM VM.
307 if (offset == PCI0_INTERRUPT_PIN) {
308 if (++init_interrupt_count == 3) {
309 DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
310 exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);
311 }
312 } else {
314 }
315 }
316
317 return configDelay;
318}
319
320Tick
322{
323 [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
324 DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "
325 "data: %#x\n", offset, pkt->getSize(),
326 pkt->getUintX(ByteOrder::little));
327
329 return PciDevice::writeConfig(pkt);
330
331
332 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
333 uint8_t *pxcap_data = &(pxcap.data[0]);
334 int pxcap_offset = offset - PXCAP_BASE;
335
336 DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",
337 pxcap_offset, pkt->getSize());
338
339 memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),
340 pkt->getSize());
341 }
342
343 pkt->makeAtomicResponse();
344
345 return configDelay;
346}
347
348void
350{
351 DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",
352 read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),
353 pkt->getUintX(ByteOrder::little));
354
355 pkt->makeAtomicResponse();
356}
357
358void
360{
361 DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);
362
363 /*
364 * Return data for frame reads in priority order: (1) Special addresses
365 * first, ignoring any writes from driver. (2) Any other address from
366 * device backing store / abstract memory class functionally.
367 */
368 if (nbio.readFrame(pkt, offset)) {
369 return;
370 }
371
372 /*
373 * Read the value from device memory. This must be done functionally
374 * because this method is called by the PCIDevice::read method which
375 * is a non-timing read.
376 */
377 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
379 PacketPtr readPkt = Packet::createRead(req);
380 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
381 readPkt->dataDynamic(dataPtr);
382
383 auto system = cp->shader()->gpuCmdProc.system();
384 system->getDeviceMemory(readPkt)->access(readPkt);
385
386 pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
387 delete readPkt;
388}
389
390void
396
397void
399{
401 Addr aperture_offset = offset - aperture.start();
402
403 // By default read from MMIO trace. Overwrite the packet for a select
404 // few more dynamic MMIOs.
405 DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
407
408 if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
409 DPRINTF(AMDGPUDevice, "NBIO base\n");
410 nbio.readMMIO(pkt, aperture_offset);
411 } else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
412 DPRINTF(AMDGPUDevice, "GRBM base\n");
413 gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
414 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
415 DPRINTF(AMDGPUDevice, "GFX base\n");
416 gfx.readMMIO(pkt, aperture_offset);
417 } else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) {
418 DPRINTF(AMDGPUDevice, "MMHUB base\n");
419 gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
420 } else {
421 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset);
422 }
423}
424
425void
427{
428 DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
429
430 for (auto& cu: CP()->shader()->cuList) {
431 auto system = CP()->shader()->gpuCmdProc.system();
432 Addr aligned_addr = offset & ~(system->cacheLineSize() - 1);
433 cu->sendInvL2(aligned_addr);
434 }
435
436 Addr aperture = gpuvm.getFrameAperture(offset);
437 Addr aperture_offset = offset - aperture;
438
439 // Record the value
440 if (aperture == gpuvm.gartBase()) {
441 gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);
442 DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
443 gpuvm.gartTable[aperture_offset]);
444 }
445
446 nbio.writeFrame(pkt, offset);
447
448 /*
449 * Write the value to device memory. This must be done functionally
450 * because this method is called by the PCIDevice::write method which
451 * is a non-timing write.
452 */
453 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
455 PacketPtr writePkt = Packet::createWrite(req);
456 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
457 std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),
458 pkt->getSize() * sizeof(uint8_t));
459 writePkt->dataDynamic(dataPtr);
460
461 auto system = cp->shader()->gpuCmdProc.system();
462 system->getDeviceMemory(writePkt)->access(writePkt);
463
464 delete writePkt;
465}
466
467void
469{
470 DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
471
472 if (doorbells.find(offset) != doorbells.end()) {
473 QueueType q_type = doorbells[offset].qtype;
474 int ip_id = doorbells[offset].ip_id;
475 DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
476 offset, q_type);
477 switch (q_type) {
478 case Compute:
479 assert(pm4PktProcs.count(ip_id));
480 pm4PktProcs[ip_id]->process(
481 pm4PktProcs[ip_id]->getQueue(offset),
482 pkt->getLE<uint64_t>());
483 break;
484 case Gfx:
485 assert(pm4PktProcs.count(ip_id));
486 pm4PktProcs[ip_id]->process(
487 pm4PktProcs[ip_id]->getQueue(offset, true),
488 pkt->getLE<uint64_t>());
489 break;
490 case SDMAGfx: {
491 SDMAEngine *sdmaEng = getSDMAEngine(offset);
492 sdmaEng->processGfx(pkt->getLE<uint64_t>());
493 } break;
494 case SDMAPage: {
495 SDMAEngine *sdmaEng = getSDMAEngine(offset);
496 sdmaEng->processPage(pkt->getLE<uint64_t>());
497 } break;
498 case ComputeAQL: {
499 assert(pm4PktProcs.count(ip_id));
501 pkt->getLE<uint64_t>() + 1);
502 pm4PktProcs[ip_id]->updateReadIndex(offset,
503 pkt->getLE<uint64_t>() + 1);
504 } break;
505 case InterruptHandler:
506 deviceIH->updateRptr(pkt->getLE<uint32_t>());
507 break;
508 case RLC: {
509 SDMAEngine *sdmaEng = getSDMAEngine(offset);
510 sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());
511 } break;
512 default:
513 panic("Write to unkown queue type!");
514 }
515 } else {
516 warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n",
517 offset);
518
519 // We have to ACK the PCI packet immediately, so create a copy of the
520 // packet here to send again. The packet data contains the value of
521 // the doorbell to write so we need to copy that as the original
522 // packet gets deleted after the PCI write() method returns.
523 RequestPtr pending_req(pkt->req);
524 PacketPtr pending_pkt = Packet::createWrite(pending_req);
525 uint8_t *pending_data = new uint8_t[pkt->getSize()];
526 memcpy(pending_data, pkt->getPtr<uint8_t>(), pkt->getSize());
527 pending_pkt->dataDynamic(pending_data);
528
529 pendingDoorbellPkts.emplace(offset, pending_pkt);
530 }
531}
532
533void
535{
537 Addr aperture_offset = offset - aperture.start();
538
539 DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
540
541 // Check SDMA functions first, then fallback to MMIO ranges.
542 for (int idx = 0; idx < sdmaIds.size(); ++idx) {
543 if (sdmaMmios[idx].contains(offset)) {
544 Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
545 if (sdmaFunc.count(sdma_offset)) {
546 DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
547 idx, sdma_offset);
548 sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
549 (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
550 } else {
551 DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
552 sdma_offset);
553 }
554
555 return;
556 }
557 }
558
559 // Check PM4s next, returning to avoid duplicate writes.
560 for (auto& [range, pm4_proc] : pm4Ranges) {
561 if (range.contains(offset)) {
562 // PM4 MMIOs are offset based on the MMIO range start
563 Addr ip_offset = offset - range.start();
564 pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT);
565
566 return;
567 }
568 }
569
570 if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
571 DPRINTF(AMDGPUDevice, "GRBM base\n");
572 gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
573 } else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) {
574 DPRINTF(AMDGPUDevice, "IH base\n");
575 deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
576 } else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
577 DPRINTF(AMDGPUDevice, "NBIO base\n");
578 nbio.writeMMIO(pkt, aperture_offset);
579 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
580 DPRINTF(AMDGPUDevice, "GFX base\n");
581 gfx.writeMMIO(pkt, aperture_offset);
582 } else {
583 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset);
584 }
585}
586
587Tick
589{
590 if (isROM(pkt->getAddr())) {
591 readROM(pkt);
592 } else {
593 int barnum = -1;
594 Addr offset = 0;
595 getBAR(pkt->getAddr(), barnum, offset);
596
597 switch (barnum) {
598 case FRAMEBUFFER_BAR:
599 readFrame(pkt, offset);
600 break;
601 case DOORBELL_BAR:
602 readDoorbell(pkt, offset);
603 break;
604 case MMIO_BAR:
605 readMMIO(pkt, offset);
606 break;
607 default:
608 panic("Request with address out of mapped range!");
609 }
610 }
611
612 dispatchAccess(pkt, true);
613 return pioDelay;
614}
615
616Tick
618{
619 if (isROM(pkt->getAddr())) {
620 writeROM(pkt);
621
622 dispatchAccess(pkt, false);
623
624 return pioDelay;
625 }
626
627 int barnum = -1;
628 Addr offset = 0;
629 getBAR(pkt->getAddr(), barnum, offset);
630
631 switch (barnum) {
632 case FRAMEBUFFER_BAR:
633 writeFrame(pkt, offset);
634 break;
635 case DOORBELL_BAR:
636 writeDoorbell(pkt, offset);
637 break;
638 case MMIO_BAR:
639 writeMMIO(pkt, offset);
640 break;
641 default:
642 panic("Request with address out of mapped range!");
643 }
644
645 // Record only if there is non-zero value, or a value to be overwritten.
646 // Reads return 0 by default.
647 uint64_t data = pkt->getUintX(ByteOrder::little);
648
649 DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
650 pkt->getAddr(), data);
651
652 dispatchAccess(pkt, false);
653
654 return pioDelay;
655}
656
657void
659{
660 if (pendingDoorbellPkts.count(offset)) {
661 DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset);
665 }
666}
667
668uint32_t
670{
671 // This is somewhat of a guess based on amdgpu_device_mm_access
672 // in amdgpu_device.c in the ROCk driver. If bit 32 is 1 then
673 // assume VRAM and use full address, otherwise assume register
674 // address and only user lower 31 bits.
675 Addr fixup_addr = bits(addr, 31, 31) ? addr : addr & 0x7fffffff;
676
677 uint32_t pkt_data = 0;
678 RequestPtr request = std::make_shared<Request>(fixup_addr,
679 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
680 PacketPtr pkt = Packet::createRead(request);
681 pkt->dataStatic((uint8_t *)&pkt_data);
682 readMMIO(pkt, addr);
683 DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
684 fixup_addr, pkt->getLE<uint32_t>());
685
686 pkt_data = pkt->getLE<uint32_t>();
687 delete pkt;
688
689 return pkt_data;
690}
691
692void
693AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)
694{
695 DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
696 addr, value);
697
698 uint32_t pkt_data = value;
699 RequestPtr request = std::make_shared<Request>(addr,
700 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
701 PacketPtr pkt = Packet::createWrite(request);
702 pkt->dataStatic((uint8_t *)&pkt_data);
703 writeMMIO(pkt, addr);
704 delete pkt;
705}
706
707void
709{
710 DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
711 doorbells[offset].qtype = qt;
712 doorbells[offset].ip_id = ip_id;
713}
714
715void
717{
718 doorbells.erase(offset);
719}
720
721void
726
729{
734 assert(sdmaIds.count(id));
735
736 return sdmaIds[id];
737}
738
744
745void
750
751void
753{
754 // Serialize the PciDevice base class
756
757 uint64_t doorbells_size = doorbells.size();
758 uint64_t sdma_engs_size = sdmaEngs.size();
759 uint64_t used_vmid_map_size = usedVMIDs.size();
760
761 SERIALIZE_SCALAR(doorbells_size);
762 SERIALIZE_SCALAR(sdma_engs_size);
763 // Save the number of vmids used
764 SERIALIZE_SCALAR(used_vmid_map_size);
765
766 // Make a c-style array of the regs to serialize
767 uint32_t doorbells_offset[doorbells_size];
768 QueueType doorbells_queues[doorbells_size];
769 int doorbells_ip_ids[doorbells_size];
770 uint32_t sdma_engs_offset[sdma_engs_size];
771 int sdma_engs[sdma_engs_size];
772 int used_vmids[used_vmid_map_size];
773 int used_queue_id_sizes[used_vmid_map_size];
774 std::vector<int> used_vmid_sets;
775
776 int idx = 0;
777 for (auto & it : doorbells) {
778 doorbells_offset[idx] = it.first;
779 doorbells_queues[idx] = it.second.qtype;
780 doorbells_ip_ids[idx] = it.second.ip_id;
781 ++idx;
782 }
783
784 idx = 0;
785 for (auto & it : sdmaEngs) {
786 sdma_engs_offset[idx] = it.first;
787 sdma_engs[idx] = it.second->getId();
788 ++idx;
789 }
790
791 idx = 0;
792 for (auto & it : usedVMIDs) {
793 used_vmids[idx] = it.first;
794 used_queue_id_sizes[idx] = it.second.size();
795 std::vector<int> set_vector(it.second.begin(), it.second.end());
796 used_vmid_sets.insert(used_vmid_sets.end(),
797 set_vector.begin(), set_vector.end());
798 ++idx;
799 }
800
801 int num_queue_id = used_vmid_sets.size();
802 int* vmid_array = new int[num_queue_id];
803 std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array);
804
805 SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
806 sizeof(doorbells_offset[0]));
807 SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
808 sizeof(doorbells_queues[0]));
809 SERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/
810 sizeof(doorbells_ip_ids[0]));
811 SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
812 sizeof(sdma_engs_offset[0]));
813 SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
814 // Save the vmids used in an array
815 SERIALIZE_ARRAY(used_vmids, sizeof(used_vmids)/sizeof(used_vmids[0]));
816 // Save the size of the set of queue ids mapped to each vmid
817 SERIALIZE_ARRAY(used_queue_id_sizes,
818 sizeof(used_queue_id_sizes)/sizeof(used_queue_id_sizes[0]));
819 // Save all the queue ids used for all the vmids
820 SERIALIZE_ARRAY(vmid_array, num_queue_id);
821 // Save the total number of queue idsused
822 SERIALIZE_SCALAR(num_queue_id);
823
824 // Serialize the device memory
825 deviceMem.serializeSection(cp, "deviceMem");
826 gpuvm.serializeSection(cp, "GPUVM");
827
828 delete[] vmid_array;
829}
830
831void
833{
834 // Unserialize the PciDevice base class
836
837 uint64_t doorbells_size = 0;
838 uint64_t sdma_engs_size = 0;
839 uint64_t used_vmid_map_size = 0;
840
841 UNSERIALIZE_SCALAR(doorbells_size);
842 UNSERIALIZE_SCALAR(sdma_engs_size);
843 UNSERIALIZE_SCALAR(used_vmid_map_size);
844
845
846 if (doorbells_size > 0) {
847 uint32_t doorbells_offset[doorbells_size];
848 QueueType doorbells_queues[doorbells_size];
849 int doorbells_ip_ids[doorbells_size];
850
851 UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
852 sizeof(doorbells_offset[0]));
853 UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
854 sizeof(doorbells_queues[0]));
855 UNSERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/
856 sizeof(doorbells_ip_ids[0]));
857
858 for (int idx = 0; idx < doorbells_size; ++idx) {
859 doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx];
860 doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx];
861 }
862 }
863
864 if (sdma_engs_size > 0) {
865 uint32_t sdma_engs_offset[sdma_engs_size];
866 int sdma_engs[sdma_engs_size];
867
868 UNSERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
869 sizeof(sdma_engs_offset[0]));
870 UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
871
872 for (int idx = 0; idx < sdma_engs_size; ++idx) {
873 int sdma_id = sdma_engs[idx];
874 assert(sdmaIds.count(sdma_id));
875 SDMAEngine *sdma = sdmaIds[sdma_id];
876 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
877 }
878 }
879
880 if (used_vmid_map_size > 0) {
881 int used_vmids[used_vmid_map_size];
882 int used_queue_id_sizes[used_vmid_map_size];
883 int num_queue_id = 0;
884 std::vector<int> used_vmid_sets;
885 // Extract the total number of queue ids used
886 UNSERIALIZE_SCALAR(num_queue_id);
887 int* vmid_array = new int[num_queue_id];
888 // Extract the number of vmids used
889 UNSERIALIZE_ARRAY(used_vmids, used_vmid_map_size);
890 // Extract the size of the queue id set for each vmid
891 UNSERIALIZE_ARRAY(used_queue_id_sizes, used_vmid_map_size);
892 // Extract all the queue ids used
893 UNSERIALIZE_ARRAY(vmid_array, num_queue_id);
894 // Populate the usedVMIDs map with the queue ids per vm
895 int idx = 0;
896 for (int it = 0; it < used_vmid_map_size; it++) {
897 int vmid = used_vmids[it];
898 int vmid_set_size = used_queue_id_sizes[it];
899 for (int j = 0; j < vmid_set_size; j++) {
900 usedVMIDs[vmid].insert(vmid_array[idx + j]);
901 }
902 idx += vmid_set_size;
903 }
904 delete[] vmid_array;
905 }
906
907 // Unserialize the device memory
908 deviceMem.unserializeSection(cp, "deviceMem");
909 gpuvm.unserializeSection(cp, "GPUVM");
910}
911
912uint16_t
914{
915 for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
916 auto result = usedVMIDs.find(vmid);
917 if (result == usedVMIDs.end()) {
918 idMap.insert(std::make_pair(pasid, vmid));
919 usedVMIDs[vmid] = {};
920 _lastVMID = vmid;
921 return vmid;
922 }
923 }
924 panic("All VMIDs have been assigned");
925}
926
927void
929{
930 usedVMIDs.erase(vmid);
931}
932
933void
935{
936 auto result = idMap.find(pasid);
937 assert(result != idMap.end());
938 if (result == idMap.end()) return;
939 uint16_t vmid = result->second;
940
941 idMap.erase(result);
942 usedVMIDs.erase(vmid);
943}
944
945void
947{
948 idMap.erase(idMap.begin(), idMap.end());
949 usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
950
951 for (auto& it : sdmaEngs) {
952 it.second->deallocateRLCQueues();
953 }
954
955 // "All" queues implicitly refers to all user queues. User queues begin at
956 // doorbell address 0x4000, so unmap any queue at or above that address.
957 for (auto [offset, vmid] : doorbellVMIDMap) {
958 if (offset >= 0x4000) {
959 doorbells.erase(offset);
960 }
961 }
962}
963
964void
965AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
966{
967 doorbellVMIDMap[doorbell] = vmid;
968}
969
970std::unordered_map<uint16_t, std::set<int>>&
972{
973 return usedVMIDs;
974}
975
976void
977AMDGPUDevice::insertQId(uint16_t vmid, int id)
978{
979 usedVMIDs[vmid].insert(id);
980}
981
982} // namespace gem5
AbstractMemory declaration.
#define AMDGPU_MP0_SMN_C2PMSG_33
#define VEGA10_FB_LOCATION_BASE
Definition amdgpu_vm.hh:77
#define VEGA10_FB_LOCATION_TOP
Definition amdgpu_vm.hh:78
#define MI200_MEM_SIZE_REG
Definition amdgpu_vm.hh:84
#define MI200_FB_LOCATION_TOP
Definition amdgpu_vm.hh:86
#define MI100_FB_LOCATION_BASE
Definition amdgpu_vm.hh:81
#define MI200_FB_LOCATION_BASE
Definition amdgpu_vm.hh:85
#define MI100_FB_LOCATION_TOP
Definition amdgpu_vm.hh:82
#define MI100_MEM_SIZE_REG
Definition amdgpu_vm.hh:80
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
std::unordered_map< AddrRange, PM4PacketProcessor *, AddrRangeHasher > pm4Ranges
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
void processPendingDoorbells(uint32_t offset)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
GPUCommandProcessor * cp
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint32_t, DoorbellInfo > doorbells
Structures to hold registers, doorbells, and some frame memory.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
void unsetDoorbell(uint32_t offset)
std::unordered_map< uint32_t, PacketPtr > pendingDoorbellPkts
void setRegVal(uint64_t addr, uint32_t value)
std::unordered_map< uint32_t, AddrRange > sdmaMmios
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
AMDGPUNbio nbio
Blocks of the GPU.
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
uint32_t getRegVal(uint64_t addr)
Register value getter/setter.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciDevice.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeROM(PacketPtr pkt)
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
uint16_t allocateVMID(uint16_t pasid)
std::unordered_map< int, PM4PacketProcessor * > pm4PktProcs
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
GPUCommandProcessor * CP()
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_gfx.cc:48
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_gfx.cc:66
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateRptr(const uint32_t &data)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void readMMIO(PacketPtr pkt, Addr offset)
void writeMMIO(PacketPtr pkt, Addr offset)
bool readFrame(PacketPtr pkt, Addr offset)
void writeFrame(PacketPtr pkt, Addr offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range)
Definition amdgpu_vm.cc:62
void setMMHUBBase(Addr base)
Definition amdgpu_vm.hh:230
AddrRange getMMIORange(mmio_range_t mmio_aperture)
Definition amdgpu_vm.cc:68
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:203
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:99
const AddrRange & getMMIOAperture(Addr addr)
Definition amdgpu_vm.cc:74
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:135
Addr getFrameAperture(Addr addr)
Definition amdgpu_vm.hh:259
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:87
void setMMHUBTop(Addr top)
Definition amdgpu_vm.hh:231
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
void setGPUDevice(AMDGPUDevice *gpu_device)
void write(Addr db_addr, uint64_t doorbell_reg)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361
void setLE(T v)
Set the value in the data pointer to v as little endian.
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void makeAtomicResponse()
Definition packet.hh:1074
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
PCI device, base implementation is only config space.
Definition device.hh:270
PCIConfig config
The current config space.
Definition device.hh:275
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition device.cc:464
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition device.cc:401
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition device.hh:320
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
Definition device.cc:269
const PciBusAddr _busAddr
Definition device.hh:272
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition device.cc:212
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
Definition device.cc:283
void intrPost()
Definition device.hh:364
const int PXCAP_BASE
Definition device.hh:300
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
void setGfxRptrHi(uint32_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void setGfxSize(uint32_t data)
void setGfxBaseLo(uint32_t data)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
void setPageDoorbellOffsetLo(uint32_t data)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void setPageDoorbellLo(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
void setPageRptrHi(uint32_t data)
std::vector< ComputeUnit * > cuList
Definition shader.hh:268
GPUCommandProcessor & gpuCmdProc
Definition shader.hh:270
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Definition system.cc:311
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
Addr start() const
Get the start address of the range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
Definition serialize.cc:74
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Definition serialize.cc:81
#define warn(...)
Definition logging.hh:256
Bitfield< 4 > s
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > m
Bitfield< 0 > p
Bitfield< 15 > system
Definition misc.hh:1032
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr int MMIO_BAR
std::ostream CheckpointOut
Definition serialize.hh:66
@ InterruptHandler
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
Bitfield< 10 > pasid
Definition x86_cpu.cc:129
uint64_t Tick
Tick count type.
Definition types.hh:58
constexpr uint32_t ROM_SIZE
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
static constexpr uint32_t IH_OFFSET_SHIFT
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
constexpr int FRAMEBUFFER_BAR
@ GRBM_MMIO_RANGE
Definition amdgpu_vm.hh:107
@ GFX_MMIO_RANGE
Definition amdgpu_vm.hh:106
@ IH_MMIO_RANGE
Definition amdgpu_vm.hh:108
@ MMHUB_MMIO_RANGE
Definition amdgpu_vm.hh:105
@ NBIO_MMIO_RANGE
Definition amdgpu_vm.hh:104
constexpr int DOORBELL_BAR
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_INTERRUPT_PIN
Definition pcireg.h:135
#define PCI_DEVICE_SPECIFIC
Definition pcireg.h:164
#define PCI_CONFIG_SIZE
Definition pcireg.h:165
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
uint8_t func
Definition types.hh:58
uint8_t dev
Definition types.hh:57
const std::string & name()
Definition trace.cc:48
Defines the PCI Express capability register and its associated bitfields for a PCIe device.
Definition pcireg.h:330
uint8_t data[48]
Definition pcireg.h:331

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0