gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
amdgpu_device.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <fstream>
35
36#include "debug/AMDGPUDevice.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/abstract_mem.hh"
46#include "mem/packet.hh"
47#include "mem/packet_access.hh"
48#include "params/AMDGPUDevice.hh"
49#include "sim/byteswap.hh"
50#include "sim/sim_exit.hh"
51
52namespace gem5
53{
54
55AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
56 : PciEndpoint(p),
57 gpuMemMgr(p.memory_manager),
58 deviceIH(p.device_ih),
59 cp(p.cp),
62 _lastVMID(0),
63 deviceMem(name() + ".deviceMem", p.memories, false, "", false),
65 gpuId(p.gpu_id)
66{
67 uint64_t vram_size = 0;
68
69 // System pointer needs to be explicitly set for device memory since
70 // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
71 // Note this means the cache line size is system wide.
72 for (auto& m : p.memories) {
73 m->system(p.system);
74
75 // Add to system's device memory map.
76 p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
77
78 vram_size += m->getAddrRange().size();
79 }
80
81 vramSize = vram_size;
82
83 if (config().expansionROM) {
84 romRange = RangeSize(config().expansionROM, ROM_SIZE);
85 } else {
87 }
88
89 if (p.device_name == "Vega10") {
90 gfx_version = GfxVersion::gfx900;
91 } else if (p.device_name == "MI100") {
92 gfx_version = GfxVersion::gfx908;
93 } else if (p.device_name == "MI200") {
94 gfx_version = GfxVersion::gfx90a;
95 } else if (p.device_name == "MI300X") {
96 gfx_version = GfxVersion::gfx942;
97 } else if (p.device_name == "MI355X") {
98 gfx_version = GfxVersion::gfx950;
99 } else {
100 panic("Unknown GPU device %s\n", p.device_name);
101 }
102
103 int sdma_id = 0;
104 for (auto& s : p.sdmas) {
105 s->setGPUDevice(this);
106 s->setId(sdma_id);
107 sdmaIds.insert({sdma_id, s});
108 sdmaMmios.insert({sdma_id,
109 RangeSize(s->getMmioBase(), s->getMmioSize())});
110 DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
111 sdmaMmios[sdma_id].to_string().c_str());
112 sdma_id++;
113 }
114
115 // Map SDMA MMIO addresses to functions
116 sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
117 sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
118 sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
119 sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
120 sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
121 sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo});
122 sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
123 sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
124 sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
125 if (p.device_name == "Vega10") {
126 sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
127 sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
128 sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
129 sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo});
130 sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
131 sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
132 sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
133 } else if (p.device_name == "MI100" || p.device_name == "MI200" ||
134 p.device_name == "MI300X" || p.device_name == "MI355X") {
135 sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
136 sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
137 sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
138 sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo});
139 sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo});
140 sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
141 } else {
142 panic("Unknown GPU device %s\n", p.device_name);
143 }
144
145 // Setup PM4 packet processors and sanity check IDs
146 std::set<int> pm4_ids;
147 for (auto& pm4 : p.pm4_pkt_procs) {
148 pm4->setGPUDevice(this);
149 fatal_if(pm4_ids.count(pm4->getIpId()),
150 "Two PM4s with same IP IDs is not allowed");
151 pm4_ids.insert(pm4->getIpId());
152 pm4PktProcs.insert({pm4->getIpId(), pm4});
153
154 pm4Ranges.insert({pm4->getMMIORange(), pm4});
155 }
156
157 // There should be at least one PM4 packet processor with ID 0
158 fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found");
159
160 deviceIH->setGPUDevice(this);
161 cp->hsaPacketProc().setGPUDevice(this);
162 cp->setGPUDevice(this);
163 nbio.setGPUDevice(this);
164 gpuvm.setGPUDevice(this);
165 smu.setGPUDevice(this);
166
167 // Address aperture for device memory. We tell this to the driver and
168 // could possibly be anything, but these are the values used by hardware.
169 uint64_t mmhubBase = 0x8000ULL << 24;
170 uint64_t mmhubTop = 0x83ffULL << 24;
171 uint64_t mmio_mem_size = vram_size / 0x100000;
172
173 // The driver adds + 1 to MMIO value to reduce the number of bits required
174 // to represent max memory size. Subtract one here before writing MMIO.
175 mmio_mem_size -= 0x1;
176
177 gpuvm.setMMHUBBase(mmhubBase);
178 gpuvm.setMMHUBTop(mmhubTop);
179
180 // Map other MMIO apertures based on gfx version. For MI300X+ these come
181 // from the ip discovery table (see ip_discovery_header struct in
182 // include/discovery.h in amdgpu driver. Common values for MI200 - MI350:
183 // NBIO 0x0 - 0x4280
184 // IH 0x4280 - 0x4980
185 // GRBM 0x8000 - 0xC000
186 // GFX 0x28000 - 0x3F000
187 // MMHUB 0x68000 - 0x6a120 (MI200)
188 // MMHUB 0x60D00 - 0x62E20 (MI3xx)
189 // SMU 0x5a000 - 0x5ace4
190 //
191 // This must be done before any calls to get/setRegVal.
192 gpuvm.setMMIOAperture(NBIO_MMIO_RANGE, AddrRange(0x0, 0x4280));
193 gpuvm.setMMIOAperture(IH_MMIO_RANGE, AddrRange(0x4280, 0x4980));
194 gpuvm.setMMIOAperture(GRBM_MMIO_RANGE, AddrRange(0x8000, 0xC000));
195 gpuvm.setMMIOAperture(GFX_MMIO_RANGE, AddrRange(0x28000, 0x3F000));
196 if (getGfxVersion() == GfxVersion::gfx942 ||
197 getGfxVersion() == GfxVersion::gfx950) {
198 gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x60D00, 0x62E20));
199 } else {
200 gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x68000, 0x6A120));
201 }
202 gpuvm.setMMIOAperture(SMU_MMIO_RANGE, AddrRange(0x5A000, 0x5ACE4));
203
204 // These are hardcoded register values to return what the driver expects
205 setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);
206
207 // There are different registers for different GPUs, so we set the value
208 // based on the GPU type specified by the user.
209 if (p.device_name == "Vega10") {
210 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
211 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
212 } else if (p.device_name == "MI100") {
213 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
214 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
215 setRegVal(MI100_MEM_SIZE_REG, mmio_mem_size);
216 } else if (p.device_name == "MI200") {
217 // This device can have either 64GB or 128GB of device memory.
218 // This limits to 16GB for simulation.
219 setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
220 setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
221 setRegVal(MI200_MEM_SIZE_REG, mmio_mem_size);
222 } else if (p.device_name == "MI300X" || p.device_name == "MI355X") {
223 // The MMIO addresses are the same in MI300X and MI355X
224 // VRAM size in MB (shifted right by 20 bits)
225 setRegVal(MI300X_FB_LOCATION_BASE, mmhubBase >> 24);
226 setRegVal(MI300X_FB_LOCATION_TOP, mmhubTop >> 24);
227 setRegVal(MI300X_MEM_SIZE_REG, mmio_mem_size);
228 } else {
229 panic("Unknown GPU device %s\n", p.device_name);
230 }
231
232 // IP discovery from VRAM for MI300X+. If ipt_binary is None, the assume
233 // the driver is being loaded using discovery=2 to read from the disk.
234 // In that case gem5 does not have to do anything special.
235 bool use_ip_discovery = false;
236
237 if (getGfxVersion() == GfxVersion::gfx942 ||
238 getGfxVersion() == GfxVersion::gfx950) {
239 use_ip_discovery = true;
240
241 if (p.ipt_binary == "") {
242 DPRINTF(AMDGPUDevice, "Assuming discovery=2 for IP discovery\n");
243 }
244 }
245
246 if (use_ip_discovery && p.ipt_binary != "") {
247 // From ROCk driver: amdgpu/amdgpu_discovery.h:
248 constexpr uint64_t DISCOVERY_TMR_OFFSET = (64 << 10);
249 constexpr int IPT_SIZE_DW = 0xa00;
250 uint64_t ip_table_base = (mmio_mem_size << 20) - DISCOVERY_TMR_OFFSET;
251
252 DPRINTF(AMDGPUDevice, "Using IP discovery file %s\n", p.ipt_binary);
253
254 std::ifstream iptBin;
255 std::array<uint32_t, IPT_SIZE_DW> ipTable;
256 iptBin.open(p.ipt_binary, std::ios::binary);
257 iptBin.read((char *)ipTable.data(), IPT_SIZE_DW*4);
258 iptBin.close();
259
260 // Read from the IP discovery ROM starting at offset 0x100 (DW 0x40)
261 for (int ipt_dword = 0x0; ipt_dword < IPT_SIZE_DW; ipt_dword++) {
262 Addr ipt_addr = ip_table_base + ipt_dword*4;
263
264 // The driver is using bit 32 of the address for something not
265 // part of the address. Fixup the address to be ipt_addr >> 31
266 // OR'd with the lower 31 bits and 0x80000000.
267 Addr ipt_addr_hi = ipt_addr >> 31;
268 Addr fixup_addr = (ipt_addr_hi << 32) | (ipt_addr & 0x7fffffff)
269 | 0x80000000;
270
271 setRegVal(fixup_addr, ipTable[ipt_dword]);
272 DPRINTF(AMDGPUDevice, "IPTable wrote dword %d (%x) to %lx\n",
273 ipt_dword, ipTable[ipt_dword], fixup_addr);
274 }
275 }
276}
277
278void
280{
281 Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);
282
283 // Read directly from the VGA ROM region. For multiple GPUs, this means
284 // every GPU must be the same type. However, this allows for one less
285 // input file as the GPU VBIOS is already part of the gem5 resources disk
286 // image and loaded at the VGA_ROM_DEFAULT address as part of readfile.
287 RequestPtr request = std::make_shared<Request>(
288 VGA_ROM_DEFAULT + rom_offset, pkt->getSize(), 0, vramRequestorId());
289
290 auto readPkt = new Packet(request, MemCmd::ReadReq);
291 readPkt->allocate();
292
293 system->getPhysMem().access(readPkt);
294
295 DPRINTF(AMDGPUDevice, "Read from VGA ROM offset %#x returned %#x\n",
296 rom_offset, readPkt->getUintX(ByteOrder::little));
297
298 pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
299}
300
301void
303{
304 assert(isROM(pkt->getAddr()));
305
306 // Read directly from the VGA ROM region at VGA_ROM_DEFAULT address.
307 Addr rom_offset = pkt->getAddr() - romRange.start();
308 uint64_t rom_data = pkt->getUintX(ByteOrder::little);
309
310 RequestPtr request = std::make_shared<Request>(
311 VGA_ROM_DEFAULT + rom_offset, pkt->getSize(), 0, vramRequestorId());
312
313 auto writePkt = new Packet(request, MemCmd::WriteReq);
314 writePkt->allocate();
315 writePkt->setUintX(rom_data, ByteOrder::little);
316
317 system->getPhysMem().access(writePkt);
318
319 DPRINTF(AMDGPUDevice, "Wrote to VGA ROM offset %#x value %#x\n",
320 rom_offset, writePkt->getUintX(ByteOrder::little));
321}
322
325{
327 AddrRangeList ret_ranges;
328 ret_ranges.push_back(romRange);
329
330 // If the range starts at zero assume OS hasn't assigned it yet. Do not
331 // return ranges starting with zero as they will surely overlap with
332 // another range causing the I/O crossbar to fatal.
333 for (auto & r : ranges) {
334 if (r.start() != 0) {
335 ret_ranges.push_back(r);
336 }
337 }
338
339 return ret_ranges;
340}
341
342Tick
344{
345 int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
346
349 } else {
350 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
351 int pxcap_offset = offset - PXCAP_BASE;
352
353 switch (pkt->getSize()) {
354 case sizeof(uint8_t):
355 pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);
357 "Read PXCAP: dev %#x func %#x reg %#x 1 bytes: "
358 "data = %#x\n",
359 _devAddr.dev, _devAddr.func, pxcap_offset,
360 (uint32_t)pkt->getLE<uint8_t>());
361 break;
362 case sizeof(uint16_t):
363 pkt->setLE<uint16_t>(
364 *(uint16_t*)&pxcap.data[pxcap_offset]);
366 "Read PXCAP: dev %#x func %#x reg %#x 2 bytes: "
367 "data = %#x\n",
368 _devAddr.dev, _devAddr.func, pxcap_offset,
369 (uint32_t)pkt->getLE<uint16_t>());
370 break;
371 case sizeof(uint32_t):
372 pkt->setLE<uint32_t>(
373 *(uint32_t*)&pxcap.data[pxcap_offset]);
375 "Read PXCAP: dev %#x func %#x reg %#x 4 bytes: "
376 "data = %#x\n",
377 _devAddr.dev, _devAddr.func, pxcap_offset,
378 (uint32_t)pkt->getLE<uint32_t>());
379 break;
380 default:
381 panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",
382 pkt->getSize(), pxcap_offset);
383 }
384 pkt->makeAtomicResponse();
385 } else {
386 warn("Device specific offset %d not implemented!\n", offset);
387 }
388 }
389
390 // Before sending MMIOs the driver sends three interrupts in a row.
391 // Use this to trigger creating a checkpoint to restore in timing mode.
392 // This is only necessary until we can create a "hole" in the KVM VM
393 // around the VGA ROM region such that KVM exits and sends requests to
394 // this device rather than the KVM VM.
396 if (offset == PCI_INTERRUPT_PIN) {
397 if (++init_interrupt_count == 3) {
398 DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
399 exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);
400 }
401 } else {
403 }
404 }
405
406 return configDelay;
407}
408
409Tick
411{
412 [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
413 DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "
414 "data: %#x\n", offset, pkt->getSize(),
415 pkt->getUintX(ByteOrder::little));
416
418 // For the Expansion ROM BAR, Linux will write ~0x7ff before reading
419 // the ROM bar size. If we simply return the written value, the ROM
420 // size is only 0x800 which is too small for the GPU VBIOS. Here we
421 // override the default PciDevice behavior and set the next read to
422 // return 4kiB size. This is enough to load the *used* portions of
423 // the VBIOS. See how PCI_ROM_ADDRESS is handled in the function:
424 // github.com/torvalds/linux/blob/master/drivers/pci/probe.c#L176
425 if (offset == PCI0_ROM_BASE_ADDR &&
426 letoh(pkt->getLE<uint32_t>()) == 0xfffff800) {
427 DPRINTF(AMDGPUDevice, "Setting expansion ROM size to 0x1000\n");
428
429 config().expansionROM = 0xfffff000;
430 } else {
431 return PciEndpoint::writeConfig(pkt);
432 }
433 }
434
435 if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
436 uint8_t *pxcap_data = &(pxcap.data[0]);
437 int pxcap_offset = offset - PXCAP_BASE;
438
439 DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",
440 pxcap_offset, pkt->getSize());
441
442 memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),
443 pkt->getSize());
444 }
445
446 pkt->makeAtomicResponse();
447
448 return configDelay;
449}
450
451void
453{
454 DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",
455 read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),
456 pkt->getUintX(ByteOrder::little));
457
458 pkt->makeAtomicResponse();
459}
460
461void
463{
464 DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);
465
466 /*
467 * Return data for frame reads in priority order: (1) Special addresses
468 * first, ignoring any writes from driver. (2) Any other address from
469 * device backing store / abstract memory class functionally.
470 */
471 if (nbio.readFrame(pkt, offset)) {
472 return;
473 }
474
475 /*
476 * Read the value from device memory. This must be done functionally
477 * because this method is called by the PCIEndpoint::read method which
478 * is a non-timing read.
479 */
480 RequestPtr req = std::make_shared<Request>(
481 offset, pkt->getSize(), 0, vramRequestorId());
482
483 PacketPtr readPkt = new Packet(req, MemCmd::ReadReq);
484 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
485 readPkt->dataDynamic(dataPtr);
486 readPkt->req->setGPUFuncAccess(true);
487 readPkt->setSuppressFuncError();
488 cp->shader()->cuList[0]->memPort[0].sendFunctional(readPkt);
489 if (readPkt->cmd == MemCmd::FunctionalReadError) {
490 delete readPkt;
491 delete[] dataPtr;
492 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
494 PacketPtr readPkt = Packet::createRead(req);
495 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
496 readPkt->dataDynamic(dataPtr);
497
498 auto system = cp->shader()->gpuCmdProc.system();
499 system->getDeviceMemory(readPkt)->access(readPkt);
500 }
501
502 pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
503 delete readPkt;
504}
505
506void
508{
509 DPRINTF(AMDGPUDevice, "Read doorbell %#lx\n", offset);
510 mmioReader.readFromTrace(pkt, DOORBELL_BAR, offset);
511}
512
513void
515{
516 AddrRange aperture = gpuvm.getMMIOAperture(offset);
517 Addr aperture_offset = offset - aperture.start();
518
519 // By default read from MMIO trace. Overwrite the packet for a select
520 // few more dynamic MMIOs.
521 DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
522 mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
523
524 if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
525 DPRINTF(AMDGPUDevice, "NBIO base\n");
526 nbio.readMMIO(pkt, aperture_offset);
527 } else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
528 DPRINTF(AMDGPUDevice, "GRBM base\n");
529 gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
530 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
531 DPRINTF(AMDGPUDevice, "GFX base\n");
532 gfx.readMMIO(pkt, aperture_offset);
533 } else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) {
534 DPRINTF(AMDGPUDevice, "MMHUB base\n");
535 gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
536 } else if (aperture == gpuvm.getMMIORange(SMU_MMIO_RANGE)) {
537 DPRINTF(AMDGPUDevice, "SMU base\n");
538 smu.readMMIO(pkt, aperture_offset >> SMU_OFFSET_SHIFT);
539 } else {
540 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset);
541 }
542}
543
544void
546{
547 DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx (size %d)\n", offset,
548 pkt->getSize());
549
550 for (auto& cu: CP()->shader()->cuList) {
551 Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);
552 cu->sendInvL2(aligned_addr);
553 }
554
555 Addr aperture = gpuvm.getFrameAperture(offset);
556 Addr aperture_offset = offset - aperture;
557
558 // Record the value
559 if (aperture == gpuvm.gartBase()) {
560 gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);
561 DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
562 gpuvm.gartTable[aperture_offset]);
563 }
564
565 nbio.writeFrame(pkt, offset);
566
567 /*
568 * Write the value to device memory. This must be done functionally
569 * because this method is called by the PCIEndpoint::write method which
570 * is a non-timing write.
571 */
572 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
574 PacketPtr writePkt = Packet::createWrite(req);
575 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
576 std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),
577 pkt->getSize() * sizeof(uint8_t));
578 writePkt->dataDynamic(dataPtr);
579
580 auto system = cp->shader()->gpuCmdProc.system();
581
582 // If for some reason no device memory is found for this address, ignore
583 // the packet. This is an extremely rare situation and seems to only
584 // happen with one address that is not important, therefore warn only.
585 if (system->getDeviceMemory(writePkt)) {
586 system->getDeviceMemory(writePkt)->access(writePkt);
587 } else {
588 warn("Unable to find device memory for address %#lx\n", offset);
589 }
590
591 delete writePkt;
592}
593
594void
596{
597 DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
598
599 if (doorbells.find(offset) != doorbells.end()) {
600 QueueType q_type = doorbells[offset].qtype;
601 int ip_id = doorbells[offset].ip_id;
602 DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
603 offset, q_type);
604 switch (q_type) {
605 case Compute:
606 assert(pm4PktProcs.count(ip_id));
607 pm4PktProcs[ip_id]->process(
608 pm4PktProcs[ip_id]->getQueue(offset),
609 pkt->getLE<uint64_t>());
610 break;
611 case Gfx:
612 assert(pm4PktProcs.count(ip_id));
613 pm4PktProcs[ip_id]->process(
614 pm4PktProcs[ip_id]->getQueue(offset, true),
615 pkt->getLE<uint64_t>());
616 break;
617 case SDMAGfx: {
618 SDMAEngine *sdmaEng = getSDMAEngine(offset);
619 sdmaEng->processGfx(pkt->getLE<uint64_t>());
620 } break;
621 case SDMAPage: {
622 SDMAEngine *sdmaEng = getSDMAEngine(offset);
623 sdmaEng->processPage(pkt->getLE<uint64_t>());
624 } break;
625 case ComputeAQL: {
626 assert(pm4PktProcs.count(ip_id));
627 cp->hsaPacketProc().hwScheduler()->write(offset,
628 pkt->getLE<uint64_t>() + 1);
629 pm4PktProcs[ip_id]->updateReadIndex(offset,
630 pkt->getLE<uint64_t>() + 1);
631 } break;
632 case InterruptHandler:
633 deviceIH->updateRptr(pkt->getLE<uint32_t>());
634 break;
635 case RLC: {
636 SDMAEngine *sdmaEng = getSDMAEngine(offset);
637 sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());
638 } break;
639 default:
640 panic("Write to unkown queue type!");
641 }
642 } else {
643 warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n",
644 offset);
645
646 // We have to ACK the PCI packet immediately, so create a copy of the
647 // packet here to send again. The packet data contains the value of
648 // the doorbell to write so we need to copy that as the original
649 // packet gets deleted after the PCI write() method returns.
650 RequestPtr pending_req(pkt->req);
651 PacketPtr pending_pkt = Packet::createWrite(pending_req);
652 uint8_t *pending_data = new uint8_t[pkt->getSize()];
653 memcpy(pending_data, pkt->getPtr<uint8_t>(), pkt->getSize());
654 pending_pkt->dataDynamic(pending_data);
655
656 pendingDoorbellPkts.emplace(offset, pending_pkt);
657 }
658}
659
660void
662{
663 AddrRange aperture = gpuvm.getMMIOAperture(offset);
664 Addr aperture_offset = offset - aperture.start();
665
666 DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
667
668 // Check SDMA functions first, then fallback to MMIO ranges.
669 for (int idx = 0; idx < sdmaIds.size(); ++idx) {
670 if (sdmaMmios[idx].contains(offset)) {
671 Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
672 if (sdmaFunc.count(sdma_offset)) {
673 DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
674 idx, sdma_offset);
675 sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
676 (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
677 } else {
678 DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
679 sdma_offset);
680 }
681
682 return;
683 }
684 }
685
686 // Check PM4s next, returning to avoid duplicate writes.
687 for (auto& [range, pm4_proc] : pm4Ranges) {
688 if (range.contains(offset)) {
689 // PM4 MMIOs are offset based on the MMIO range start
690 Addr ip_offset = offset - range.start();
691 pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT);
692
693 return;
694 }
695 }
696
697 if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
698 DPRINTF(AMDGPUDevice, "GRBM base\n");
699 gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
700 } else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) {
701 DPRINTF(AMDGPUDevice, "IH base\n");
702 deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
703 } else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
704 DPRINTF(AMDGPUDevice, "NBIO base\n");
705 nbio.writeMMIO(pkt, aperture_offset);
706 } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
707 DPRINTF(AMDGPUDevice, "GFX base\n");
708 gfx.writeMMIO(pkt, aperture_offset);
709 } else if (aperture == gpuvm.getMMIORange(SMU_MMIO_RANGE)) {
710 DPRINTF(AMDGPUDevice, "SMU base\n");
711 smu.writeMMIO(pkt, aperture_offset >> SMU_OFFSET_SHIFT);
712 } else {
713 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset);
714 }
715}
716
717Tick
719{
720 if (isROM(pkt->getAddr())) {
721 readROM(pkt);
722 } else {
723 int barnum = -1;
724 Addr offset = 0;
725 getBAR(pkt->getAddr(), barnum, offset);
726
727 switch (barnum) {
728 case FRAMEBUFFER_BAR:
729 readFrame(pkt, offset);
730 break;
731 case DOORBELL_BAR:
732 readDoorbell(pkt, offset);
733 break;
734 case MMIO_BAR:
735 readMMIO(pkt, offset);
736 break;
737 default:
738 panic("Request with address out of mapped range!");
739 }
740 }
741
742 dispatchAccess(pkt, true);
743 return pioDelay;
744}
745
746Tick
748{
749 if (isROM(pkt->getAddr())) {
750 writeROM(pkt);
751
752 dispatchAccess(pkt, false);
753
754 return pioDelay;
755 }
756
757 int barnum = -1;
758 Addr offset = 0;
759 getBAR(pkt->getAddr(), barnum, offset);
760
761 switch (barnum) {
762 case FRAMEBUFFER_BAR:
763 writeFrame(pkt, offset);
764 break;
765 case DOORBELL_BAR:
766 writeDoorbell(pkt, offset);
767 break;
768 case MMIO_BAR:
769 writeMMIO(pkt, offset);
770 break;
771 default:
772 panic("Request with address out of mapped range!");
773 }
774
775 // Record only if there is non-zero value, or a value to be overwritten.
776 // Reads return 0 by default.
777 uint64_t data = pkt->getUintX(ByteOrder::little);
778
779 DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
780 pkt->getAddr(), data);
781
782 dispatchAccess(pkt, false);
783
784 return pioDelay;
785}
786
787void
789{
790 if (pendingDoorbellPkts.count(offset)) {
791 DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset);
795 }
796}
797
798uint32_t
800{
801 // This is somewhat of a guess based on amdgpu_device_mm_access
802 // in amdgpu_device.c in the ROCk driver. If bit 32 is 1 then
803 // assume VRAM and use full address, otherwise assume register
804 // address and only user lower 31 bits.
805 Addr fixup_addr = bits(addr, 31, 31) ? addr : addr & 0x7fffffff;
806
807 uint32_t pkt_data = 0;
808 RequestPtr request = std::make_shared<Request>(fixup_addr,
809 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
810 PacketPtr pkt = Packet::createRead(request);
811 pkt->dataStatic((uint8_t *)&pkt_data);
812 readMMIO(pkt, addr);
813 DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
814 fixup_addr, pkt->getLE<uint32_t>());
815
816 pkt_data = pkt->getLE<uint32_t>();
817 delete pkt;
818
819 return pkt_data;
820}
821
822void
823AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)
824{
825 DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
826 addr, value);
827
828 uint32_t pkt_data = value;
829 RequestPtr request = std::make_shared<Request>(addr,
830 sizeof(uint32_t), 0 /* flags */, vramRequestorId());
831 PacketPtr pkt = Packet::createWrite(request);
832 pkt->dataStatic((uint8_t *)&pkt_data);
833 writeMMIO(pkt, addr);
834 delete pkt;
835}
836
837void
839{
840 DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
841 doorbells[offset].qtype = qt;
842 doorbells[offset].ip_id = ip_id;
843}
844
845void
847{
848 doorbells.erase(offset);
849}
850
851void
856
859{
864 assert(sdmaIds.count(id));
865
866 return sdmaIds[id];
867}
868
874
875void
880
881void
883{
884 // Serialize the PciEndpoint base class
886
887 uint64_t doorbells_size = doorbells.size();
888 uint64_t sdma_engs_size = sdmaEngs.size();
889 uint64_t used_vmid_map_size = usedVMIDs.size();
890
891 SERIALIZE_SCALAR(doorbells_size);
892 SERIALIZE_SCALAR(sdma_engs_size);
893 // Save the number of vmids used
894 SERIALIZE_SCALAR(used_vmid_map_size);
895
896 // Make a c-style array of the regs to serialize
897 auto doorbells_offset = std::make_unique<uint32_t[]>(doorbells_size);
898 auto doorbells_queues = std::make_unique<QueueType[]>(doorbells_size);
899 auto doorbells_ip_ids = std::make_unique<int[]>(doorbells_size);
900 auto sdma_engs_offset = std::make_unique<uint32_t[]>(sdma_engs_size);
901 auto sdma_engs = std::make_unique<int[]>(sdma_engs_size);
902 auto used_vmids = std::make_unique<int[]>(used_vmid_map_size);
903 auto used_queue_id_sizes = std::make_unique<int[]>(used_vmid_map_size);
904 std::vector<int> used_vmid_sets;
905
906 int idx = 0;
907 for (auto & it : doorbells) {
908 doorbells_offset[idx] = it.first;
909 doorbells_queues[idx] = it.second.qtype;
910 doorbells_ip_ids[idx] = it.second.ip_id;
911 ++idx;
912 }
913
914 idx = 0;
915 for (auto & it : sdmaEngs) {
916 sdma_engs_offset[idx] = it.first;
917 sdma_engs[idx] = it.second->getId();
918 ++idx;
919 }
920
921 idx = 0;
922 for (auto & it : usedVMIDs) {
923 used_vmids[idx] = it.first;
924 used_queue_id_sizes[idx] = it.second.size();
925 std::vector<int> set_vector(it.second.begin(), it.second.end());
926 used_vmid_sets.insert(used_vmid_sets.end(),
927 set_vector.begin(), set_vector.end());
928 ++idx;
929 }
930
931 int num_queue_id = used_vmid_sets.size();
932 auto vmid_array = std::make_unique<int[]>(num_queue_id);
933 std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array.get());
934
935 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_offset, doorbells_size);
936 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_queues, doorbells_size);
937 SERIALIZE_UNIQUE_PTR_ARRAY(doorbells_ip_ids, doorbells_size);
938 SERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs_offset, sdma_engs_size);
939 SERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs, sdma_engs_size);
940 // Save the vmids used in an array
941 SERIALIZE_UNIQUE_PTR_ARRAY(used_vmids, used_vmid_map_size);
942 // Save the size of the set of queue ids mapped to each vmid
943 SERIALIZE_UNIQUE_PTR_ARRAY(used_queue_id_sizes, used_vmid_map_size);
944 // Save all the queue ids used for all the vmids
945 SERIALIZE_UNIQUE_PTR_ARRAY(vmid_array, num_queue_id);
946 // Save the total number of queue idsused
947 SERIALIZE_SCALAR(num_queue_id);
948
949 // Serialize the device memory
950 deviceMem.serializeSection(cp, "deviceMem");
951 gpuvm.serializeSection(cp, "GPUVM");
952}
953
954void
956{
957 // Unserialize the PciEndpoint base class
959
960 uint64_t doorbells_size = 0;
961 uint64_t sdma_engs_size = 0;
962 uint64_t used_vmid_map_size = 0;
963
964 UNSERIALIZE_SCALAR(doorbells_size);
965 UNSERIALIZE_SCALAR(sdma_engs_size);
966 UNSERIALIZE_SCALAR(used_vmid_map_size);
967
968
969 if (doorbells_size > 0) {
970 auto doorbells_offset = std::make_unique<uint32_t[]>(doorbells_size);
971 auto doorbells_queues = std::make_unique<QueueType[]>(doorbells_size);
972 auto doorbells_ip_ids = std::make_unique<int[]>(doorbells_size);
973
974 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_offset, doorbells_size);
975 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_queues, doorbells_size);
976 UNSERIALIZE_UNIQUE_PTR_ARRAY(doorbells_ip_ids, doorbells_size);
977
978 for (int idx = 0; idx < doorbells_size; ++idx) {
979 doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx];
980 doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx];
981 }
982 }
983
984 if (sdma_engs_size > 0) {
985 auto sdma_engs_offset = std::make_unique<uint32_t[]>(sdma_engs_size);
986 auto sdma_engs = std::make_unique<int[]>(sdma_engs_size);
987
988 UNSERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs_offset, sdma_engs_size);
989 UNSERIALIZE_UNIQUE_PTR_ARRAY(sdma_engs, sdma_engs_size);
990
991 for (int idx = 0; idx < sdma_engs_size; ++idx) {
992 int sdma_id = sdma_engs[idx];
993 assert(sdmaIds.count(sdma_id));
994 SDMAEngine *sdma = sdmaIds[sdma_id];
995 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
996 }
997 }
998
999 if (used_vmid_map_size > 0) {
1000 auto used_vmids = std::make_unique<int[]>(used_vmid_map_size);
1001 auto used_queue_id_sizes = std::make_unique<int[]>(used_vmid_map_size);
1002 int num_queue_id = 0;
1003 std::vector<int> used_vmid_sets;
1004 // Extract the total number of queue ids used
1005 UNSERIALIZE_SCALAR(num_queue_id);
1006 auto vmid_array = std::make_unique<int[]>(num_queue_id);
1007 // Extract the number of vmids used
1008 UNSERIALIZE_UNIQUE_PTR_ARRAY(used_vmids, used_vmid_map_size);
1009 // Extract the size of the queue id set for each vmid
1010 UNSERIALIZE_UNIQUE_PTR_ARRAY(used_queue_id_sizes, used_vmid_map_size);
1011 // Extract all the queue ids used
1012 UNSERIALIZE_UNIQUE_PTR_ARRAY(vmid_array, num_queue_id);
1013 // Populate the usedVMIDs map with the queue ids per vm
1014 int idx = 0;
1015 for (int it = 0; it < used_vmid_map_size; it++) {
1016 int vmid = used_vmids[it];
1017 int vmid_set_size = used_queue_id_sizes[it];
1018 for (int j = 0; j < vmid_set_size; j++) {
1019 usedVMIDs[vmid].insert(vmid_array[idx + j]);
1020 }
1021 idx += vmid_set_size;
1022 }
1023 }
1024
1025 // Unserialize the device memory
1026 deviceMem.unserializeSection(cp, "deviceMem");
1027 gpuvm.unserializeSection(cp, "GPUVM");
1028}
1029
1030uint16_t
1032{
1033 for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
1034 auto result = usedVMIDs.find(vmid);
1035 if (result == usedVMIDs.end()) {
1036 idMap.insert(std::make_pair(pasid, vmid));
1037 usedVMIDs[vmid] = {};
1038 _lastVMID = vmid;
1039 return vmid;
1040 }
1041 }
1042 panic("All VMIDs have been assigned");
1043}
1044
1045void
1047{
1048 usedVMIDs.erase(vmid);
1049}
1050
1051void
1053{
1054 auto result = idMap.find(pasid);
1055 assert(result != idMap.end());
1056 if (result == idMap.end()) return;
1057 uint16_t vmid = result->second;
1058
1059 idMap.erase(result);
1060 usedVMIDs.erase(vmid);
1061}
1062
1063void
1065{
1066 idMap.erase(idMap.begin(), idMap.end());
1067 usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
1068
1069 for (auto& it : sdmaEngs) {
1070 it.second->deallocateRLCQueues(unmap_static);
1071 }
1072
1073 // "All" queues implicitly refers to all user queues. User queues begin at
1074 // doorbell address 0x4000, so unmap any queue at or above that address.
1075 for (auto [offset, vmid] : doorbellVMIDMap) {
1076 if (offset >= 0x4000) {
1077 doorbells.erase(offset);
1078 }
1079 }
1080}
1081
1082void
1084{
1085 doorbellVMIDMap[doorbell] = vmid;
1086}
1087
1088std::unordered_map<uint16_t, std::set<int>>&
1090{
1091 return usedVMIDs;
1092}
1093
1094void
1095AMDGPUDevice::insertQId(uint16_t vmid, int id)
1096{
1097 usedVMIDs[vmid].insert(id);
1098}
1099
1100} // namespace gem5
AbstractMemory declaration.
#define AMDGPU_MP0_SMN_C2PMSG_33
#define VEGA10_FB_LOCATION_BASE
Definition amdgpu_vm.hh:93
#define VEGA10_FB_LOCATION_TOP
Definition amdgpu_vm.hh:94
#define MI200_MEM_SIZE_REG
Definition amdgpu_vm.hh:100
#define MI200_FB_LOCATION_TOP
Definition amdgpu_vm.hh:102
#define MI300X_FB_LOCATION_TOP
Definition amdgpu_vm.hh:106
#define MI100_FB_LOCATION_BASE
Definition amdgpu_vm.hh:97
#define MI200_FB_LOCATION_BASE
Definition amdgpu_vm.hh:101
#define MI300X_MEM_SIZE_REG
Definition amdgpu_vm.hh:104
#define MI100_FB_LOCATION_TOP
Definition amdgpu_vm.hh:98
#define MI300X_FB_LOCATION_BASE
Definition amdgpu_vm.hh:105
#define MI100_MEM_SIZE_REG
Definition amdgpu_vm.hh:96
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
void insertQId(uint16_t vmid, int id)
std::unordered_map< AddrRange, PM4PacketProcessor *, AddrRangeHasher > pm4Ranges
void deallocateAllQueues(bool unmap_static)
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
void processPendingDoorbells(uint32_t offset)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
GPUCommandProcessor * cp
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
Tick writeDevice(PacketPtr pkt) override
Write to the PCI device.
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint32_t, DoorbellInfo > doorbells
Structures to hold registers, doorbells, and some frame memory.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
bool isROM(Addr addr) const
void unsetDoorbell(uint32_t offset)
std::unordered_map< uint32_t, PacketPtr > pendingDoorbellPkts
void setRegVal(uint64_t addr, uint32_t value)
std::unordered_map< uint32_t, AddrRange > sdmaMmios
Tick readDevice(PacketPtr pkt) override
Read from the PCI device.
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
AMDGPUNbio nbio
Blocks of the GPU.
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Methods inherited from PciEndpoint.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
bool checkpoint_before_mmios
Initial checkpoint support variables.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
uint32_t getRegVal(uint64_t addr)
Register value getter/setter.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciEndpoint.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeROM(PacketPtr pkt)
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
uint16_t allocateVMID(uint16_t pasid)
std::unordered_map< int, PM4PacketProcessor * > pm4PktProcs
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
GPUCommandProcessor * CP()
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
@ FunctionalReadError
Definition packet.hh:139
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361
void setLE(T v)
Set the value in the data pointer to v as little endian.
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
unsigned getSize() const
Definition packet.hh:817
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void makeAtomicResponse()
Definition packet.hh:1074
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setSuppressFuncError()
Definition packet.hh:757
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition device.cc:412
Tick read(PacketPtr pkt) final
Final implementation of read access from PioDevice.
Definition device.cc:264
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition device.hh:358
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition device.cc:207
void intrPost()
Definition device.hh:435
const int PXCAP_BASE
Definition device.hh:338
const PciDevAddr _devAddr
Definition device.hh:313
PCIConfigType0 & config()
Definition device.hh:505
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition device.cc:703
PciEndpoint(const PciEndpointParams &params)
Constructor for PCI Dev.
Definition device.cc:603
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
Definition device.cc:625
virtual AddrRangeList getAddrRanges() const =0
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
System DMA Engine class for AMD dGPU.
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
void setGfxRptrHi(uint32_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void setGfxSize(uint32_t data)
void setGfxBaseLo(uint32_t data)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
void setGfxDoorbellLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
Addr start() const
Get the start address of the range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:634
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:626
#define warn(...)
Definition logging.hh:288
Bitfield< 4 > s
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > m
Bitfield< 0 > p
Bitfield< 54 > p
Definition pagetable.hh:70
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
T letoh(T value)
Definition byteswap.hh:173
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr int MMIO_BAR
std::ostream CheckpointOut
Definition serialize.hh:66
@ InterruptHandler
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
The "old style" exitSimLoop functions.
Bitfield< 10 > pasid
Definition x86_cpu.cc:129
uint64_t Tick
Tick count type.
Definition types.hh:58
constexpr uint32_t ROM_SIZE
static constexpr uint32_t IH_OFFSET_SHIFT
Packet * PacketPtr
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
constexpr int FRAMEBUFFER_BAR
@ GRBM_MMIO_RANGE
Definition amdgpu_vm.hh:128
@ GFX_MMIO_RANGE
Definition amdgpu_vm.hh:127
@ IH_MMIO_RANGE
Definition amdgpu_vm.hh:129
@ MMHUB_MMIO_RANGE
Definition amdgpu_vm.hh:126
@ SMU_MMIO_RANGE
Definition amdgpu_vm.hh:130
@ NBIO_MMIO_RANGE
Definition amdgpu_vm.hh:125
static constexpr uint32_t SMU_OFFSET_SHIFT
constexpr int DOORBELL_BAR
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_ROM_BASE_ADDR
Definition pcireg.h:207
#define PCI_INTERRUPT_PIN
Definition pcireg.h:190
#define PCI_DEVICE_SPECIFIC
Definition pcireg.h:54
#define PCI_CONFIG_SIZE
Definition pcireg.h:55
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
const std::string & name()
Definition trace.cc:48
Defines the PCI Express capability register and its associated bitfields for a PCIe device.
Definition pcireg.h:410

Generated on Mon Oct 27 2025 04:13:01 for gem5 by doxygen 1.14.0