gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
amdgpu_device.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <fstream>
35
36#include "debug/AMDGPUDevice.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/abstract_mem.hh"
46#include "mem/packet.hh"
47#include "mem/packet_access.hh"
48#include "params/AMDGPUDevice.hh"
49#include "sim/byteswap.hh"
50#include "sim/sim_exit.hh"
51
52namespace gem5
53{
54
55AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
56 : PciEndpoint(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
59 deviceMem(name() + ".deviceMem", p.memories, false, "", false)
60{
61 // System pointer needs to be explicitly set for device memory since
62 // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
63 // Note this means the cache line size is system wide.
64 for (auto& m : p.memories) {
65 m->system(p.system);
66
67 // Add to system's device memory map.
68 p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
69 }
70
71 if (config().expansionROM) {
72 romRange = RangeSize(config().expansionROM, ROM_SIZE);
73 } else {
75 }
76
77 if (p.device_name == "Vega10") {
78 gfx_version = GfxVersion::gfx900;
79 } else if (p.device_name == "MI100") {
80 gfx_version = GfxVersion::gfx908;
81 } else if (p.device_name == "MI200") {
82 gfx_version = GfxVersion::gfx90a;
83 } else if (p.device_name == "MI300X") {
84 gfx_version = GfxVersion::gfx942;
85 } else {
86 panic("Unknown GPU device %s\n", p.device_name);
87 }
88
89 int sdma_id = 0;
90 for (auto& s : p.sdmas) {
91 s->setGPUDevice(this);
92 s->setId(sdma_id);
93 sdmaIds.insert({sdma_id, s});
94 sdmaMmios.insert({sdma_id,
95 RangeSize(s->getMmioBase(), s->getMmioSize())});
96 DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
97 sdmaMmios[sdma_id].to_string().c_str());
98 sdma_id++;
99 }
100
101 // Map SDMA MMIO addresses to functions
102 sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
103 sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
104 sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
105 sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
106 sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
107 sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo});
108 sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
109 sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
110 sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
111 if (p.device_name == "Vega10") {
112 sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
113 sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
114 sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
115 sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo});
116 sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
117 sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
118 sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
119 } else if (p.device_name == "MI100" || p.device_name == "MI200"
120 || p.device_name == "MI300X") {
121 sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
122 sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
123 sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
124 sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo});
125 sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo});
126 sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
127 } else {
128 panic("Unknown GPU device %s\n", p.device_name);
129 }
130
131 // Setup PM4 packet processors and sanity check IDs
132 std::set<int> pm4_ids;
133 for (auto& pm4 : p.pm4_pkt_procs) {
134 pm4->setGPUDevice(this);
135 fatal_if(pm4_ids.count(pm4->getIpId()),
136 "Two PM4s with same IP IDs is not allowed");
137 pm4_ids.insert(pm4->getIpId());
138 pm4PktProcs.insert({pm4->getIpId(), pm4});
139
140 pm4Ranges.insert({pm4->getMMIORange(), pm4});
141 }
142
143 // There should be at least one PM4 packet processor with ID 0
144 fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found");
145
146 deviceIH->setGPUDevice(this);
147 cp->hsaPacketProc().setGPUDevice(this);
148 cp->setGPUDevice(this);
149 nbio.setGPUDevice(this);
150 gpuvm.setGPUDevice(this);
151
152 // Address aperture for device memory. We tell this to the driver and
153 // could possibly be anything, but these are the values used by hardware.
154 uint64_t mmhubBase = 0x8000ULL << 24;
155 uint64_t mmhubTop = 0x83ffULL << 24;
156 uint64_t mem_size = 0x3ff0; // 16 GB of memory
157
158 gpuvm.setMMHUBBase(mmhubBase);
159 gpuvm.setMMHUBTop(mmhubTop);
160
161 // Map other MMIO apertures based on gfx version. This must be done before
162 // any calls to get/setRegVal.
163 // NBIO 0x0 - 0x4280
164 // IH 0x4280 - 0x4980
165 // GRBM 0x8000 - 0xC000
166 // GFX 0x28000 - 0x3F000
167 // MMHUB 0x68000 - 0x6a120
168 gpuvm.setMMIOAperture(NBIO_MMIO_RANGE, AddrRange(0x0, 0x4280));
169 gpuvm.setMMIOAperture(IH_MMIO_RANGE, AddrRange(0x4280, 0x4980));
170 gpuvm.setMMIOAperture(GRBM_MMIO_RANGE, AddrRange(0x8000, 0xC000));
171 gpuvm.setMMIOAperture(GFX_MMIO_RANGE, AddrRange(0x28000, 0x3F000));
172 if (getGfxVersion() == GfxVersion::gfx942) {
173 gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x60D00, 0x62E20));
174 } else {
175 gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x68000, 0x6A120));
176 }
177
178 // These are hardcoded register values to return what the driver expects
179 setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);
180
181 // There are different registers for different GPUs, so we set the value
182 // based on the GPU type specified by the user.
183 if (p.device_name == "Vega10") {
184 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
185 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
186 } else if (p.device_name == "MI100") {
187 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
188 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
189 setRegVal(MI100_MEM_SIZE_REG, mem_size);
190 } else if (p.device_name == "MI200") {
191 // This device can have either 64GB or 128GB of device memory.
192 // This limits to 16GB for simulation.
193 setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
194 setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
195 setRegVal(MI200_MEM_SIZE_REG, mem_size);
196 } else if (p.device_name == "MI300X") {
197 // VRAM size in MB (shifted right by 20 bits)
198 setRegVal(MI300X_FB_LOCATION_BASE, mmhubBase >> 24);
199 setRegVal(MI300X_FB_LOCATION_TOP, mmhubTop >> 24);
200 setRegVal(MI300X_MEM_SIZE_REG, mem_size);
201 } else {
202 panic("Unknown GPU device %s\n", p.device_name);
203 }
204
205 if (getGfxVersion() == GfxVersion::gfx942 && p.ipt_binary != "") {
206 // From ROCk driver: amdgpu/amdgpu_discovery.h:
207 constexpr uint64_t DISCOVERY_TMR_OFFSET = (64 << 10);
208 constexpr int IPT_SIZE_DW = 0xa00;
209 uint64_t ip_table_base = (mem_size << 20) - DISCOVERY_TMR_OFFSET;
210
211 std::ifstream iptBin;
212 std::array<uint32_t, IPT_SIZE_DW> ipTable;
213 iptBin.open(p.ipt_binary, std::ios::binary);
214 iptBin.read((char *)ipTable.data(), IPT_SIZE_DW*4);
215 iptBin.close();
216
217 // Read from the IP discovery ROM starting at offset 0x100 (DW 0x40)
218 for (int ipt_dword = 0x0; ipt_dword < IPT_SIZE_DW; ipt_dword++) {
219 Addr ipt_addr = ip_table_base + ipt_dword*4;
220
221 // The driver is using bit 32 of the address for something not
222 // part of the address. Fixup the address to be ipt_addr >> 31
223 // OR'd with the lower 31 bits and 0x80000000.
224 Addr ipt_addr_hi = ipt_addr >> 31;
225 Addr fixup_addr = (ipt_addr_hi << 32) | (ipt_addr & 0x7fffffff)
226 | 0x80000000;
227
228 setRegVal(fixup_addr, ipTable[ipt_dword]);
229 DPRINTF(AMDGPUDevice, "IPTable wrote dword %d (%x) to %lx\n",
230 ipt_dword, ipTable[ipt_dword], fixup_addr);
231 }
232 }
233}
234
235void
237{
238 Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);
239 uint64_t rom_data = 0;
240
241 memcpy(&rom_data, rom.data() + rom_offset, pkt->getSize());
242 pkt->setUintX(rom_data, ByteOrder::little);
243
244 DPRINTF(AMDGPUDevice, "Read from addr %#x on ROM offset %#x data: %#x\n",
245 pkt->getAddr(), rom_offset, rom_data);
246}
247
248void
250{
251 assert(isROM(pkt->getAddr()));
252
253 Addr rom_offset = pkt->getAddr() - romRange.start();
254 uint64_t rom_data = pkt->getUintX(ByteOrder::little);
255
256 memcpy(rom.data() + rom_offset, &rom_data, pkt->getSize());
257
258 DPRINTF(AMDGPUDevice, "Write to addr %#x on ROM offset %#x data: %#x\n",
259 pkt->getAddr(), rom_offset, rom_data);
260}
261
264{
266 AddrRangeList ret_ranges;
267 ret_ranges.push_back(romRange);
268
269 // If the range starts at zero assume OS hasn't assigned it yet. Do not
270 // return ranges starting with zero as they will surely overlap with
271 // another range causing the I/O crossbar to fatal.
272 for (auto & r : ranges) {
273 if (r.start() != 0) {
274 ret_ranges.push_back(r);
275 }
276 }
277
278 return ret_ranges;
279}
280
281Tick
283{
284 int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
285
288 } else {
289 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
290 int pxcap_offset = offset - PXCAP_BASE;
291
292 switch (pkt->getSize()) {
293 case sizeof(uint8_t):
294 pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);
296 "Read PXCAP: dev %#x func %#x reg %#x 1 bytes: data "
297 "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
298 (uint32_t)pkt->getLE<uint8_t>());
299 break;
300 case sizeof(uint16_t):
301 pkt->setLE<uint16_t>(
302 *(uint16_t*)&pxcap.data[pxcap_offset]);
304 "Read PXCAP: dev %#x func %#x reg %#x 2 bytes: data "
305 "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
306 (uint32_t)pkt->getLE<uint16_t>());
307 break;
308 case sizeof(uint32_t):
309 pkt->setLE<uint32_t>(
310 *(uint32_t*)&pxcap.data[pxcap_offset]);
312 "Read PXCAP: dev %#x func %#x reg %#x 4 bytes: data "
313 "= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset,
314 (uint32_t)pkt->getLE<uint32_t>());
315 break;
316 default:
317 panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",
318 pkt->getSize(), pxcap_offset);
319 }
320 pkt->makeAtomicResponse();
321 } else {
322 warn("Device specific offset %d not implemented!\n", offset);
323 }
324 }
325
326 // Before sending MMIOs the driver sends three interrupts in a row.
327 // Use this to trigger creating a checkpoint to restore in timing mode.
328 // This is only necessary until we can create a "hole" in the KVM VM
329 // around the VGA ROM region such that KVM exits and sends requests to
330 // this device rather than the KVM VM.
332 if (offset == PCI_INTERRUPT_PIN) {
333 if (++init_interrupt_count == 3) {
334 DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
335 exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);
336 }
337 } else {
339 }
340 }
341
342 return configDelay;
343}
344
345Tick
347{
348 [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
349 DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "
350 "data: %#x\n", offset, pkt->getSize(),
351 pkt->getUintX(ByteOrder::little));
352
354 return PciEndpoint::writeConfig(pkt);
355
356
357 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
358 uint8_t *pxcap_data = &(pxcap.data[0]);
359 int pxcap_offset = offset - PXCAP_BASE;
360
361 DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",
362 pxcap_offset, pkt->getSize());
363
364 memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),
365 pkt->getSize());
366 }
367
368 pkt->makeAtomicResponse();
369
370 return configDelay;
371}
372
373void
375{
376 DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",
377 read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),
378 pkt->getUintX(ByteOrder::little));
379
380 pkt->makeAtomicResponse();
381}
382
383void
385{
386 DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);
387
388 /*
389 * Return data for frame reads in priority order: (1) Special addresses
390 * first, ignoring any writes from driver. (2) Any other address from
391 * device backing store / abstract memory class functionally.
392 */
393 if (nbio.readFrame(pkt, offset)) {
394 return;
395 }
396
397 /*
398 * Read the value from device memory. This must be done functionally
399 * because this method is called by the PCIEndpoint::read method which
400 * is a non-timing read.
401 */
402 RequestPtr req = std::make_shared<Request>(
403 offset, pkt->getSize(), 0, vramRequestorId());
404
405 PacketPtr readPkt = new Packet(req, MemCmd::ReadReq);
406 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
407 readPkt->dataDynamic(dataPtr);
408 readPkt->req->setGPUFuncAccess(true);
409 readPkt->setSuppressFuncError();
410 cp->shader()->cuList[0]->memPort[0].sendFunctional(readPkt);
411 if (readPkt->cmd == MemCmd::FunctionalReadError) {
412 delete readPkt;
413 delete[] dataPtr;
414 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
416 PacketPtr readPkt = Packet::createRead(req);
417 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
418 readPkt->dataDynamic(dataPtr);
419
420 auto system = cp->shader()->gpuCmdProc.system();
421 system->getDeviceMemory(readPkt)->access(readPkt);
422 }
423
424 pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
425 delete readPkt;
426}
427
428void
430{
431 DPRINTF(AMDGPUDevice, "Read doorbell %#lx\n", offset);
432 mmioReader.readFromTrace(pkt, DOORBELL_BAR, offset);
433}
434
435void
437{
438 AddrRange aperture = gpuvm.getMMIOAperture(offset);
439 Addr aperture_offset = offset - aperture.start();
440
441 // By default read from MMIO trace. Overwrite the packet for a select
442 // few more dynamic MMIOs.
443 DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
444 mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
445
446 if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
447 DPRINTF(AMDGPUDevice, "NBIO base\n");
448 nbio.readMMIO(pkt, aperture_offset);
449 } else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
450 DPRINTF(AMDGPUDevice, "GRBM base\n");
451 gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
452 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
453 DPRINTF(AMDGPUDevice, "GFX base\n");
454 gfx.readMMIO(pkt, aperture_offset);
455 } else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) {
456 DPRINTF(AMDGPUDevice, "MMHUB base\n");
457 gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
458 } else {
459 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset);
460 }
461}
462
463void
465{
466 DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
467
468 for (auto& cu: CP()->shader()->cuList) {
469 Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);
470 cu->sendInvL2(aligned_addr);
471 }
472
473 Addr aperture = gpuvm.getFrameAperture(offset);
474 Addr aperture_offset = offset - aperture;
475
476 // Record the value
477 if (aperture == gpuvm.gartBase()) {
478 gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);
479 DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
480 gpuvm.gartTable[aperture_offset]);
481 }
482
483 nbio.writeFrame(pkt, offset);
484
485 /*
486 * Write the value to device memory. This must be done functionally
487 * because this method is called by the PCIEndpoint::write method which
488 * is a non-timing write.
489 */
490 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
492 PacketPtr writePkt = Packet::createWrite(req);
493 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
494 std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),
495 pkt->getSize() * sizeof(uint8_t));
496 writePkt->dataDynamic(dataPtr);
497
498 auto system = cp->shader()->gpuCmdProc.system();
499 system->getDeviceMemory(writePkt)->access(writePkt);
500
501 delete writePkt;
502}
503
504void
506{
507 DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
508
509 if (doorbells.find(offset) != doorbells.end()) {
510 QueueType q_type = doorbells[offset].qtype;
511 int ip_id = doorbells[offset].ip_id;
512 DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
513 offset, q_type);
514 switch (q_type) {
515 case Compute:
516 assert(pm4PktProcs.count(ip_id));
517 pm4PktProcs[ip_id]->process(
518 pm4PktProcs[ip_id]->getQueue(offset),
519 pkt->getLE<uint64_t>());
520 break;
521 case Gfx:
522 assert(pm4PktProcs.count(ip_id));
523 pm4PktProcs[ip_id]->process(
524 pm4PktProcs[ip_id]->getQueue(offset, true),
525 pkt->getLE<uint64_t>());
526 break;
527 case SDMAGfx: {
528 SDMAEngine *sdmaEng = getSDMAEngine(offset);
529 sdmaEng->processGfx(pkt->getLE<uint64_t>());
530 } break;
531 case SDMAPage: {
532 SDMAEngine *sdmaEng = getSDMAEngine(offset);
533 sdmaEng->processPage(pkt->getLE<uint64_t>());
534 } break;
535 case ComputeAQL: {
536 assert(pm4PktProcs.count(ip_id));
537 cp->hsaPacketProc().hwScheduler()->write(offset,
538 pkt->getLE<uint64_t>() + 1);
539 pm4PktProcs[ip_id]->updateReadIndex(offset,
540 pkt->getLE<uint64_t>() + 1);
541 } break;
542 case InterruptHandler:
543 deviceIH->updateRptr(pkt->getLE<uint32_t>());
544 break;
545 case RLC: {
546 SDMAEngine *sdmaEng = getSDMAEngine(offset);
547 sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());
548 } break;
549 default:
550 panic("Write to unkown queue type!");
551 }
552 } else {
553 warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n",
554 offset);
555
556 // We have to ACK the PCI packet immediately, so create a copy of the
557 // packet here to send again. The packet data contains the value of
558 // the doorbell to write so we need to copy that as the original
559 // packet gets deleted after the PCI write() method returns.
560 RequestPtr pending_req(pkt->req);
561 PacketPtr pending_pkt = Packet::createWrite(pending_req);
562 uint8_t *pending_data = new uint8_t[pkt->getSize()];
563 memcpy(pending_data, pkt->getPtr<uint8_t>(), pkt->getSize());
564 pending_pkt->dataDynamic(pending_data);
565
566 pendingDoorbellPkts.emplace(offset, pending_pkt);
567 }
568}
569
570void
572{
573 AddrRange aperture = gpuvm.getMMIOAperture(offset);
574 Addr aperture_offset = offset - aperture.start();
575
576 DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
577
578 // Check SDMA functions first, then fallback to MMIO ranges.
579 for (int idx = 0; idx < sdmaIds.size(); ++idx) {
580 if (sdmaMmios[idx].contains(offset)) {
581 Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
582 if (sdmaFunc.count(sdma_offset)) {
583 DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
584 idx, sdma_offset);
585 sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
586 (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
587 } else {
588 DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
589 sdma_offset);
590 }
591
592 return;
593 }
594 }
595
596 // Check PM4s next, returning to avoid duplicate writes.
597 for (auto& [range, pm4_proc] : pm4Ranges) {
598 if (range.contains(offset)) {
599 // PM4 MMIOs are offset based on the MMIO range start
600 Addr ip_offset = offset - range.start();
601 pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT);
602
603 return;
604 }
605 }
606
607 if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
608 DPRINTF(AMDGPUDevice, "GRBM base\n");
609 gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
610 } else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) {
611 DPRINTF(AMDGPUDevice, "IH base\n");
612 deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
613 } else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
614 DPRINTF(AMDGPUDevice, "NBIO base\n");
615 nbio.writeMMIO(pkt, aperture_offset);
616 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
617 DPRINTF(AMDGPUDevice, "GFX base\n");
618 gfx.writeMMIO(pkt, aperture_offset);
619 } else {
620 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset);
621 }
622}
623
624Tick
626{
627 if (isROM(pkt->getAddr())) {
628 readROM(pkt);
629 } else {
630 int barnum = -1;
631 Addr offset = 0;
632 getBAR(pkt->getAddr(), barnum, offset);
633
634 switch (barnum) {
635 case FRAMEBUFFER_BAR:
636 readFrame(pkt, offset);
637 break;
638 case DOORBELL_BAR:
639 readDoorbell(pkt, offset);
640 break;
641 case MMIO_BAR:
642 readMMIO(pkt, offset);
643 break;
644 default:
645 panic("Request with address out of mapped range!");
646 }
647 }
648
649 dispatchAccess(pkt, true);
650 return pioDelay;
651}
652
653Tick
655{
656 if (isROM(pkt->getAddr())) {
657 writeROM(pkt);
658
659 dispatchAccess(pkt, false);
660
661 return pioDelay;
662 }
663
664 int barnum = -1;
665 Addr offset = 0;
666 getBAR(pkt->getAddr(), barnum, offset);
667
668 switch (barnum) {
669 case FRAMEBUFFER_BAR:
670 writeFrame(pkt, offset);
671 break;
672 case DOORBELL_BAR:
673 writeDoorbell(pkt, offset);
674 break;
675 case MMIO_BAR:
676 writeMMIO(pkt, offset);
677 break;
678 default:
679 panic("Request with address out of mapped range!");
680 }
681
682 // Record only if there is non-zero value, or a value to be overwritten.
683 // Reads return 0 by default.
684 uint64_t data = pkt->getUintX(ByteOrder::little);
685
686 DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
687 pkt->getAddr(), data);
688
689 dispatchAccess(pkt, false);
690
691 return pioDelay;
692}
693
694void
696{
697 if (pendingDoorbellPkts.count(offset)) {
698 DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset);
702 }
703}
704
705uint32_t
707{
708 // This is somewhat of a guess based on amdgpu_device_mm_access
709 // in amdgpu_device.c in the ROCk driver. If bit 32 is 1 then
710 // assume VRAM and use full address, otherwise assume register
711 // address and only user lower 31 bits.
712 Addr fixup_addr = bits(addr, 31, 31) ? addr : addr & 0x7fffffff;
713
714 uint32_t pkt_data = 0;
715 RequestPtr request = std::make_shared<Request>(fixup_addr,
716 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
717 PacketPtr pkt = Packet::createRead(request);
718 pkt->dataStatic((uint8_t *)&pkt_data);
719 readMMIO(pkt, addr);
720 DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
721 fixup_addr, pkt->getLE<uint32_t>());
722
723 pkt_data = pkt->getLE<uint32_t>();
724 delete pkt;
725
726 return pkt_data;
727}
728
729void
730AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)
731{
732 DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
733 addr, value);
734
735 uint32_t pkt_data = value;
736 RequestPtr request = std::make_shared<Request>(addr,
737 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
738 PacketPtr pkt = Packet::createWrite(request);
739 pkt->dataStatic((uint8_t *)&pkt_data);
740 writeMMIO(pkt, addr);
741 delete pkt;
742}
743
744void
746{
747 DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
748 doorbells[offset].qtype = qt;
749 doorbells[offset].ip_id = ip_id;
750}
751
752void
754{
755 doorbells.erase(offset);
756}
757
758void
763
766{
771 assert(sdmaIds.count(id));
772
773 return sdmaIds[id];
774}
775
781
782void
787
788void
790{
791 // Serialize the PciEndpoint base class
793
794 uint64_t doorbells_size = doorbells.size();
795 uint64_t sdma_engs_size = sdmaEngs.size();
796 uint64_t used_vmid_map_size = usedVMIDs.size();
797
798 SERIALIZE_SCALAR(doorbells_size);
799 SERIALIZE_SCALAR(sdma_engs_size);
800 // Save the number of vmids used
801 SERIALIZE_SCALAR(used_vmid_map_size);
802
803 // Make a c-style array of the regs to serialize
804 auto doorbells_offset = std::make_unique<uint32_t[]>(doorbells_size);
805 auto doorbells_queues = std::make_unique<QueueType[]>(doorbells_size);
806 auto doorbells_ip_ids = std::make_unique<int[]>(doorbells_size);
807 auto sdma_engs_offset = std::make_unique<uint32_t[]>(sdma_engs_size);
808 auto sdma_engs = std::make_unique<int[]>(sdma_engs_size);
809 auto used_vmids = std::make_unique<int[]>(used_vmid_map_size);
810 auto used_queue_id_sizes = std::make_unique<int[]>(used_vmid_map_size);
811 std::vector<int> used_vmid_sets;
812
813 int idx = 0;
814 for (auto & it : doorbells) {
815 doorbells_offset[idx] = it.first;
816 doorbells_queues[idx] = it.second.qtype;
817 doorbells_ip_ids[idx] = it.second.ip_id;
818 ++idx;
819 }
820
821 idx = 0;
822 for (auto & it : sdmaEngs) {
823 sdma_engs_offset[idx] = it.first;
824 sdma_engs[idx] = it.second->getId();
825 ++idx;
826 }
827
828 idx = 0;
829 for (auto & it : usedVMIDs) {
830 used_vmids[idx] = it.first;
831 used_queue_id_sizes[idx] = it.second.size();
832 std::vector<int> set_vector(it.second.begin(), it.second.end());
833 used_vmid_sets.insert(used_vmid_sets.end(),
834 set_vector.begin(), set_vector.end());
835 ++idx;
836 }
837
838 int num_queue_id = used_vmid_sets.size();
839 auto vmid_array = std::make_unique<int[]>(num_queue_id);
840 std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array.get());
841
842 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_offset, doorbells_size);
843 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_queues, doorbells_size);
844 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_ip_ids, doorbells_size);
845 SERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs_offset, sdma_engs_size);
846 SERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs, sdma_engs_size);
847 // Save the vmids used in an array
848 SERIALIZE_UNIQUE_PTR_ARRAY(used_vmids, used_vmid_map_size);
849 // Save the size of the set of queue ids mapped to each vmid
850 SERIALIZE_UNIQUE_PTR_ARRAY(used_queue_id_sizes, used_vmid_map_size);
851 // Save all the queue ids used for all the vmids
852 SERIALIZE_UNIQUE_PTR_ARRAY(vmid_array, num_queue_id);
853 // Save the total number of queue idsused
854 SERIALIZE_SCALAR(num_queue_id);
855
856 // Serialize the device memory
857 deviceMem.serializeSection(cp, "deviceMem");
858 gpuvm.serializeSection(cp, "GPUVM");
859}
860
861void
863{
864 // Unserialize the PciEndpoint base class
866
867 uint64_t doorbells_size = 0;
868 uint64_t sdma_engs_size = 0;
869 uint64_t used_vmid_map_size = 0;
870
871 UNSERIALIZE_SCALAR(doorbells_size);
872 UNSERIALIZE_SCALAR(sdma_engs_size);
873 UNSERIALIZE_SCALAR(used_vmid_map_size);
874
875
876 if (doorbells_size > 0) {
877 auto doorbells_offset = std::make_unique<uint32_t[]>(doorbells_size);
878 auto doorbells_queues = std::make_unique<QueueType[]>(doorbells_size);
879 auto doorbells_ip_ids = std::make_unique<int[]>(doorbells_size);
880
881 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_offset, doorbells_size);
882 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_queues, doorbells_size);
883 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_ip_ids, doorbells_size);
884
885 for (int idx = 0; idx < doorbells_size; ++idx) {
886 doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx];
887 doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx];
888 }
889 }
890
891 if (sdma_engs_size > 0) {
892 auto sdma_engs_offset = std::make_unique<uint32_t[]>(sdma_engs_size);
893 auto sdma_engs = std::make_unique<int[]>(sdma_engs_size);
894
895 UNSERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs_offset, sdma_engs_size);
896 UNSERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs, sdma_engs_size);
897
898 for (int idx = 0; idx < sdma_engs_size; ++idx) {
899 int sdma_id = sdma_engs[idx];
900 assert(sdmaIds.count(sdma_id));
901 SDMAEngine *sdma = sdmaIds[sdma_id];
902 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
903 }
904 }
905
906 if (used_vmid_map_size > 0) {
907 auto used_vmids = std::make_unique<int[]>(used_vmid_map_size);
908 auto used_queue_id_sizes = std::make_unique<int[]>(used_vmid_map_size);
909 int num_queue_id = 0;
910 std::vector<int> used_vmid_sets;
911 // Extract the total number of queue ids used
912 UNSERIALIZE_SCALAR(num_queue_id);
913 auto vmid_array = std::make_unique<int[]>(num_queue_id);
914 // Extract the number of vmids used
915 UNSERIALIZE_UNIQUE_PTR_ARRAY(used_vmids, used_vmid_map_size);
916 // Extract the size of the queue id set for each vmid
917 UNSERIALIZE_UNIQUE_PTR_ARRAY(used_queue_id_sizes, used_vmid_map_size);
918 // Extract all the queue ids used
919 UNSERIALIZE_UNIQUE_PTR_ARRAY(vmid_array, num_queue_id);
920 // Populate the usedVMIDs map with the queue ids per vm
921 int idx = 0;
922 for (int it = 0; it < used_vmid_map_size; it++) {
923 int vmid = used_vmids[it];
924 int vmid_set_size = used_queue_id_sizes[it];
925 for (int j = 0; j < vmid_set_size; j++) {
926 usedVMIDs[vmid].insert(vmid_array[idx + j]);
927 }
928 idx += vmid_set_size;
929 }
930 }
931
932 // Unserialize the device memory
933 deviceMem.unserializeSection(cp, "deviceMem");
934 gpuvm.unserializeSection(cp, "GPUVM");
935}
936
937uint16_t
939{
940 for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
941 auto result = usedVMIDs.find(vmid);
942 if (result == usedVMIDs.end()) {
943 idMap.insert(std::make_pair(pasid, vmid));
944 usedVMIDs[vmid] = {};
945 _lastVMID = vmid;
946 return vmid;
947 }
948 }
949 panic("All VMIDs have been assigned");
950}
951
952void
954{
955 usedVMIDs.erase(vmid);
956}
957
958void
960{
961 auto result = idMap.find(pasid);
962 assert(result != idMap.end());
963 if (result == idMap.end()) return;
964 uint16_t vmid = result->second;
965
966 idMap.erase(result);
967 usedVMIDs.erase(vmid);
968}
969
970void
972{
973 idMap.erase(idMap.begin(), idMap.end());
974 usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
975
976 for (auto& it : sdmaEngs) {
977 it.second->deallocateRLCQueues(unmap_static);
978 }
979
980 // "All" queues implicitly refers to all user queues. User queues begin at
981 // doorbell address 0x4000, so unmap any queue at or above that address.
982 for (auto [offset, vmid] : doorbellVMIDMap) {
983 if (offset >= 0x4000) {
984 doorbells.erase(offset);
985 }
986 }
987}
988
989void
990AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
991{
992 doorbellVMIDMap[doorbell] = vmid;
993}
994
995std::unordered_map<uint16_t, std::set<int>>&
997{
998 return usedVMIDs;
999}
1000
1001void
1002AMDGPUDevice::insertQId(uint16_t vmid, int id)
1003{
1004 usedVMIDs[vmid].insert(id);
1005}
1006
1007} // namespace gem5
AbstractMemory declaration.
#define AMDGPU_MP0_SMN_C2PMSG_33
#define VEGA10_FB_LOCATION_BASE
Definition amdgpu_vm.hh:93
#define VEGA10_FB_LOCATION_TOP
Definition amdgpu_vm.hh:94
#define MI200_MEM_SIZE_REG
Definition amdgpu_vm.hh:100
#define MI200_FB_LOCATION_TOP
Definition amdgpu_vm.hh:102
#define MI300X_FB_LOCATION_TOP
Definition amdgpu_vm.hh:106
#define MI100_FB_LOCATION_BASE
Definition amdgpu_vm.hh:97
#define MI200_FB_LOCATION_BASE
Definition amdgpu_vm.hh:101
#define MI300X_MEM_SIZE_REG
Definition amdgpu_vm.hh:104
#define MI100_FB_LOCATION_TOP
Definition amdgpu_vm.hh:98
#define MI300X_FB_LOCATION_BASE
Definition amdgpu_vm.hh:105
#define MI100_MEM_SIZE_REG
Definition amdgpu_vm.hh:96
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
void insertQId(uint16_t vmid, int id)
std::unordered_map< AddrRange, PM4PacketProcessor *, AddrRangeHasher > pm4Ranges
void deallocateAllQueues(bool unmap_static)
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
void processPendingDoorbells(uint32_t offset)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
GPUCommandProcessor * cp
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint32_t, DoorbellInfo > doorbells
Structures to hold registers, doorbells, and some frame memory.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
void unsetDoorbell(uint32_t offset)
std::unordered_map< uint32_t, PacketPtr > pendingDoorbellPkts
void setRegVal(uint64_t addr, uint32_t value)
std::unordered_map< uint32_t, AddrRange > sdmaMmios
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
AMDGPUNbio nbio
Blocks of the GPU.
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
uint32_t getRegVal(uint64_t addr)
Register value getter/setter.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciEndpoint.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeROM(PacketPtr pkt)
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
uint16_t allocateVMID(uint16_t pasid)
std::unordered_map< int, PM4PacketProcessor * > pm4PktProcs
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
GPUCommandProcessor * CP()
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
@ FunctionalReadError
Definition packet.hh:139
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361
void setLE(T v)
Set the value in the data pointer to v as little endian.
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void makeAtomicResponse()
Definition packet.hh:1074
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setSuppressFuncError()
Definition packet.hh:757
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition device.cc:386
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition device.hh:336
const PciBusAddr _busAddr
Definition device.hh:291
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition device.cc:210
void intrPost()
Definition device.hh:380
const int PXCAP_BASE
Definition device.hh:316
PCIConfigType0 & config()
Definition device.hh:424
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition device.cc:677
PciEndpoint(const PciEndpointParams &params)
Constructor for PCI Dev.
Definition device.cc:577
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
Definition device.cc:599
virtual AddrRangeList getAddrRanges() const =0
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
System DMA Engine class for AMD dGPU.
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
void setGfxRptrHi(uint32_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void setGfxSize(uint32_t data)
void setGfxBaseLo(uint32_t data)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
void setGfxDoorbellLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
Addr start() const
Get the start address of the range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:634
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:626
#define warn(...)
Definition logging.hh:288
Bitfield< 4 > s
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > m
Bitfield< 0 > p
Bitfield< 54 > p
Definition pagetable.hh:70
Bitfield< 15 > system
Definition misc.hh:1032
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr int MMIO_BAR
std::ostream CheckpointOut
Definition serialize.hh:66
@ InterruptHandler
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
The "old style" exitSimLoop functions.
Bitfield< 10 > pasid
Definition x86_cpu.cc:129
uint64_t Tick
Tick count type.
Definition types.hh:58
constexpr uint32_t ROM_SIZE
static constexpr uint32_t IH_OFFSET_SHIFT
Packet * PacketPtr
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
constexpr int FRAMEBUFFER_BAR
@ GRBM_MMIO_RANGE
Definition amdgpu_vm.hh:128
@ GFX_MMIO_RANGE
Definition amdgpu_vm.hh:127
@ IH_MMIO_RANGE
Definition amdgpu_vm.hh:129
@ MMHUB_MMIO_RANGE
Definition amdgpu_vm.hh:126
@ NBIO_MMIO_RANGE
Definition amdgpu_vm.hh:125
constexpr int DOORBELL_BAR
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI_INTERRUPT_PIN
Definition pcireg.h:190
#define PCI_DEVICE_SPECIFIC
Definition pcireg.h:54
#define PCI_CONFIG_SIZE
Definition pcireg.h:55
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
const std::string & name()
Definition trace.cc:48
Defines the PCI Express capability register and its associated bitfields for a PCIe device.
Definition pcireg.h:410

Generated on Mon May 26 2025 09:19:09 for gem5 by doxygen 1.13.2