gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
amdgpu_device.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <fstream>
35
36#include "debug/AMDGPUDevice.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/abstract_mem.hh"
46#include "mem/packet.hh"
47#include "mem/packet_access.hh"
48#include "params/AMDGPUDevice.hh"
49#include "sim/byteswap.hh"
50#include "sim/sim_exit.hh"
51
52namespace gem5
53{
54
55AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
56 : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
57 pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
58 checkpoint_before_mmios(p.checkpoint_before_mmios),
59 init_interrupt_count(0), _lastVMID(0),
60 deviceMem(name() + ".deviceMem", p.memories, false, "", false)
61{
62 // Loading the rom binary dumped from hardware.
63 std::ifstream romBin;
64 romBin.open(p.rom_binary, std::ios::binary);
65 romBin.read((char *)rom.data(), ROM_SIZE);
66 romBin.close();
67
68 // System pointer needs to be explicitly set for device memory since
69 // DRAMCtrl uses it to get (1) cache line size and (2) the mem mode.
70 // Note this means the cache line size is system wide.
71 for (auto& m : p.memories) {
72 m->system(p.system);
73
74 // Add to system's device memory map.
75 p.system->addDeviceMemory(gpuMemMgr->getRequestorID(), m);
76 }
77
78 if (config.expansionROM) {
79 romRange = RangeSize(config.expansionROM, ROM_SIZE);
80 } else {
82 }
83
84 if (p.trace_file != "") {
85 mmioReader.readMMIOTrace(p.trace_file);
86 }
87
88 int sdma_id = 0;
89 for (auto& s : p.sdmas) {
90 s->setGPUDevice(this);
91 s->setId(sdma_id);
92 sdmaIds.insert({sdma_id, s});
93 sdmaMmios.insert({sdma_id,
94 RangeSize(s->getMmioBase(), s->getMmioSize())});
95 DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
96 sdmaMmios[sdma_id].to_string().c_str());
97 sdma_id++;
98 }
99
100 // Map SDMA MMIO addresses to functions
101 sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
102 sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
103 sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
104 sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
105 sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
107 sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
108 sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
109 sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
110 if (p.device_name == "Vega10") {
111 sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
112 sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
113 sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
116 sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
117 sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
118 } else if (p.device_name == "MI100" || p.device_name == "MI200") {
119 sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
120 sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
121 sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
124 sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
125 } else {
126 panic("Unknown GPU device %s\n", p.device_name);
127 }
128
129 deviceIH->setGPUDevice(this);
132 cp->setGPUDevice(this);
133
134 // Address aperture for device memory. We tell this to the driver and
135 // could possibly be anything, but these are the values used by hardware.
136 uint64_t mmhubBase = 0x8000ULL << 24;
137 uint64_t mmhubTop = 0x83ffULL << 24;
138
139 // These are hardcoded register values to return what the driver expects
141
142 // There are different registers for different GPUs, so we set the value
143 // based on the GPU type specified by the user.
144 if (p.device_name == "Vega10") {
145 setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
146 setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
147 gfx_version = GfxVersion::gfx900;
148 } else if (p.device_name == "MI100") {
149 setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
150 setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
151 setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
152 gfx_version = GfxVersion::gfx908;
153 } else if (p.device_name == "MI200") {
154 // This device can have either 64GB or 128GB of device memory.
155 // This limits to 16GB for simulation.
156 setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
157 setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
159 gfx_version = GfxVersion::gfx90a;
160 } else {
161 panic("Unknown GPU device %s\n", p.device_name);
162 }
163
164 gpuvm.setMMHUBBase(mmhubBase);
165 gpuvm.setMMHUBTop(mmhubTop);
166
167 nbio.setGPUDevice(this);
168}
169
170void
172{
173 Addr rom_offset = pkt->getAddr() & (ROM_SIZE - 1);
174 uint64_t rom_data = 0;
175
176 memcpy(&rom_data, rom.data() + rom_offset, pkt->getSize());
177 pkt->setUintX(rom_data, ByteOrder::little);
178
179 DPRINTF(AMDGPUDevice, "Read from addr %#x on ROM offset %#x data: %#x\n",
180 pkt->getAddr(), rom_offset, rom_data);
181}
182
183void
185{
186 assert(isROM(pkt->getAddr()));
187
188 Addr rom_offset = pkt->getAddr() - romRange.start();
189 uint64_t rom_data = pkt->getUintX(ByteOrder::little);
190
191 memcpy(rom.data() + rom_offset, &rom_data, pkt->getSize());
192
193 DPRINTF(AMDGPUDevice, "Write to addr %#x on ROM offset %#x data: %#x\n",
194 pkt->getAddr(), rom_offset, rom_data);
195}
196
199{
201 AddrRangeList ret_ranges;
202 ret_ranges.push_back(romRange);
203
204 // If the range starts at zero assume OS hasn't assigned it yet. Do not
205 // return ranges starting with zero as they will surely overlap with
206 // another range causing the I/O crossbar to fatal.
207 for (auto & r : ranges) {
208 if (r.start() != 0) {
209 ret_ranges.push_back(r);
210 }
211 }
212
213 return ret_ranges;
214}
215
216Tick
218{
219 [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
220 DPRINTF(AMDGPUDevice, "Read Config: from offset: %#x size: %#x "
221 "data: %#x\n", offset, pkt->getSize(), config.data[offset]);
222
223 Tick delay = PciDevice::readConfig(pkt);
224
225 // Before sending MMIOs the driver sends three interrupts in a row.
226 // Use this to trigger creating a checkpoint to restore in timing mode.
227 // This is only necessary until we can create a "hole" in the KVM VM
228 // around the VGA ROM region such that KVM exits and sends requests to
229 // this device rather than the KVM VM.
231 if (offset == PCI0_INTERRUPT_PIN) {
232 if (++init_interrupt_count == 3) {
233 DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
234 exitSimLoop("checkpoint", 0, curTick() + delay + 1);
235 }
236 } else {
238 }
239 }
240
241 return delay;
242}
243
244Tick
246{
247 [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
248 DPRINTF(AMDGPUDevice, "Write Config: from offset: %#x size: %#x "
249 "data: %#x\n", offset, pkt->getSize(),
250 pkt->getUintX(ByteOrder::little));
251
252 return PciDevice::writeConfig(pkt);
253}
254
255void
257{
258 DPRINTF(AMDGPUDevice, "%s from addr %#x size: %#x data: %#x\n",
259 read ? "Read" : "Write", pkt->getAddr(), pkt->getSize(),
260 pkt->getUintX(ByteOrder::little));
261
262 pkt->makeAtomicResponse();
263}
264
265void
267{
268 DPRINTF(AMDGPUDevice, "Read framebuffer address %#lx\n", offset);
269
270 /*
271 * Return data for frame reads in priority order: (1) Special addresses
272 * first, ignoring any writes from driver. (2) Any other address from
273 * device backing store / abstract memory class functionally.
274 */
275 if (nbio.readFrame(pkt, offset)) {
276 return;
277 }
278
279 /*
280 * Read the value from device memory. This must be done functionally
281 * because this method is called by the PCIDevice::read method which
282 * is a non-timing read.
283 */
284 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
286 PacketPtr readPkt = Packet::createRead(req);
287 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
288 readPkt->dataDynamic(dataPtr);
289
290 auto system = cp->shader()->gpuCmdProc.system();
291 system->getDeviceMemory(readPkt)->access(readPkt);
292
293 pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
294}
295
296void
298{
299 DPRINTF(AMDGPUDevice, "Read doorbell %#lx\n", offset);
301}
302
303void
305{
306 Addr aperture = gpuvm.getMmioAperture(offset);
307 Addr aperture_offset = offset - aperture;
308
309 // By default read from MMIO trace. Overwrite the packet for a select
310 // few more dynamic MMIOs.
311 DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
313
314 if (regs.find(offset) != regs.end()) {
315 uint64_t value = regs[offset];
316 DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n",
317 value);
318 pkt->setUintX(value, ByteOrder::little);
319 }
320
321 switch (aperture) {
322 case NBIO_BASE:
323 nbio.readMMIO(pkt, aperture_offset);
324 break;
325 case GRBM_BASE:
326 gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
327 break;
328 case MMHUB_BASE:
329 gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
330 break;
331 default:
332 break;
333 }
334}
335
336void
338{
339 DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
340
341 Addr aperture = gpuvm.getFrameAperture(offset);
342 Addr aperture_offset = offset - aperture;
343
344 // Record the value
345 if (aperture == gpuvm.gartBase()) {
346 gpuvm.gartTable[aperture_offset] = pkt->getUintX(ByteOrder::little);
347 DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
348 gpuvm.gartTable[aperture_offset]);
349 }
350
351 nbio.writeFrame(pkt, offset);
352
353 /*
354 * Write the value to device memory. This must be done functionally
355 * because this method is called by the PCIDevice::write method which
356 * is a non-timing write.
357 */
358 RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
360 PacketPtr writePkt = Packet::createWrite(req);
361 uint8_t *dataPtr = new uint8_t[pkt->getSize()];
362 std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),
363 pkt->getSize() * sizeof(uint8_t));
364 writePkt->dataDynamic(dataPtr);
365
366 auto system = cp->shader()->gpuCmdProc.system();
367 system->getDeviceMemory(writePkt)->access(writePkt);
368}
369
370void
372{
373 DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
374
375 if (doorbells.find(offset) != doorbells.end()) {
376 QueueType q_type = doorbells[offset];
377 DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
378 offset, q_type);
379 switch (q_type) {
380 case Compute:
382 pkt->getLE<uint64_t>());
383 break;
384 case Gfx:
386 pkt->getLE<uint64_t>());
387 break;
388 case SDMAGfx: {
389 SDMAEngine *sdmaEng = getSDMAEngine(offset);
390 sdmaEng->processGfx(pkt->getLE<uint64_t>());
391 } break;
392 case SDMAPage: {
393 SDMAEngine *sdmaEng = getSDMAEngine(offset);
394 sdmaEng->processPage(pkt->getLE<uint64_t>());
395 } break;
396 case ComputeAQL: {
398 pkt->getLE<uint64_t>() + 1);
399 pm4PktProc->updateReadIndex(offset, pkt->getLE<uint64_t>() + 1);
400 } break;
401 case InterruptHandler:
402 deviceIH->updateRptr(pkt->getLE<uint32_t>());
403 break;
404 case RLC: {
405 SDMAEngine *sdmaEng = getSDMAEngine(offset);
406 sdmaEng->processRLC(offset, pkt->getLE<uint64_t>());
407 } break;
408 default:
409 panic("Write to unkown queue type!");
410 }
411 } else {
412 warn("Unknown doorbell offset: %lx\n", offset);
413 }
414}
415
416void
418{
419 Addr aperture = gpuvm.getMmioAperture(offset);
420 Addr aperture_offset = offset - aperture;
421
422 DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
423
424 // Check SDMA functions first, then fallback to switch statement
425 for (int idx = 0; idx < sdmaIds.size(); ++idx) {
426 if (sdmaMmios[idx].contains(offset)) {
427 Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
428 if (sdmaFunc.count(sdma_offset)) {
429 DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
430 idx, sdma_offset);
431 sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
432 (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
433 } else {
434 DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
435 sdma_offset);
436 }
437
438 return;
439 }
440 }
441
442 switch (aperture) {
443 /* Write a general register to the graphics register bus manager. */
444 case GRBM_BASE:
445 gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
446 pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
447 break;
448 /* Write a register to the interrupt handler. */
449 case IH_BASE:
450 deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
451 break;
452 /* Write an IO space register */
453 case NBIO_BASE:
454 nbio.writeMMIO(pkt, aperture_offset);
455 break;
456 default:
457 DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
458 break;
459 }
460}
461
462Tick
464{
465 if (isROM(pkt->getAddr())) {
466 readROM(pkt);
467 } else {
468 int barnum = -1;
469 Addr offset = 0;
470 getBAR(pkt->getAddr(), barnum, offset);
471
472 switch (barnum) {
473 case FRAMEBUFFER_BAR:
474 readFrame(pkt, offset);
475 break;
476 case DOORBELL_BAR:
477 readDoorbell(pkt, offset);
478 break;
479 case MMIO_BAR:
480 readMMIO(pkt, offset);
481 break;
482 default:
483 panic("Request with address out of mapped range!");
484 }
485 }
486
487 dispatchAccess(pkt, true);
488 return pioDelay;
489}
490
491Tick
493{
494 if (isROM(pkt->getAddr())) {
495 writeROM(pkt);
496
497 dispatchAccess(pkt, false);
498
499 return pioDelay;
500 }
501
502 int barnum = -1;
503 Addr offset = 0;
504 getBAR(pkt->getAddr(), barnum, offset);
505
506 switch (barnum) {
507 case FRAMEBUFFER_BAR:
508 writeFrame(pkt, offset);
509 break;
510 case DOORBELL_BAR:
511 writeDoorbell(pkt, offset);
512 break;
513 case MMIO_BAR:
514 writeMMIO(pkt, offset);
515 break;
516 default:
517 panic("Request with address out of mapped range!");
518 }
519
520 // Record only if there is non-zero value, or a value to be overwritten.
521 // Reads return 0 by default.
522 uint64_t data = pkt->getUintX(ByteOrder::little);
523
524 DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
525 pkt->getAddr(), data);
526
527 dispatchAccess(pkt, false);
528
529 return pioDelay;
530}
531
532bool
534{
535 return regs.count(addr);
536}
537
538uint32_t
540{
541 DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
542 addr, regs[addr]);
543 return regs[addr];
544}
545
546void
547AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
548{
549 DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
550 addr, value);
551 regs[addr] = value;
552}
553
554void
556{
557 DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
558 doorbells[offset] = qt;
559}
560
561void
563{
564 sdmaEngs[offset] = eng;
565}
566
569{
574 assert(sdmaIds.count(id));
575
576 return sdmaIds[id];
577}
578
581{
582 return sdmaEngs[offset];
583}
584
585void
587{
589}
590
591void
593{
594 // Serialize the PciDevice base class
596
597 uint64_t regs_size = regs.size();
598 uint64_t doorbells_size = doorbells.size();
599 uint64_t sdma_engs_size = sdmaEngs.size();
600
601 SERIALIZE_SCALAR(regs_size);
602 SERIALIZE_SCALAR(doorbells_size);
603 SERIALIZE_SCALAR(sdma_engs_size);
604
605 // Make a c-style array of the regs to serialize
606 uint32_t reg_addrs[regs_size];
607 uint64_t reg_values[regs_size];
608 uint32_t doorbells_offset[doorbells_size];
609 QueueType doorbells_queues[doorbells_size];
610 uint32_t sdma_engs_offset[sdma_engs_size];
611 int sdma_engs[sdma_engs_size];
612
613 int idx = 0;
614 for (auto & it : regs) {
615 reg_addrs[idx] = it.first;
616 reg_values[idx] = it.second;
617 ++idx;
618 }
619
620 idx = 0;
621 for (auto & it : doorbells) {
622 doorbells_offset[idx] = it.first;
623 doorbells_queues[idx] = it.second;
624 ++idx;
625 }
626
627 idx = 0;
628 for (auto & it : sdmaEngs) {
629 sdma_engs_offset[idx] = it.first;
630 sdma_engs[idx] = it.second->getId();
631 ++idx;
632 }
633
634 SERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
635 SERIALIZE_ARRAY(reg_values, sizeof(reg_values)/sizeof(reg_values[0]));
636 SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
637 sizeof(doorbells_offset[0]));
638 SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
639 sizeof(doorbells_queues[0]));
640 SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
641 sizeof(sdma_engs_offset[0]));
642 SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
643
644 // Serialize the device memory
645 deviceMem.serializeSection(cp, "deviceMem");
646 gpuvm.serializeSection(cp, "GPUVM");
647}
648
649void
651{
652 // Unserialize the PciDevice base class
654
655 uint64_t regs_size = 0;
656 uint64_t doorbells_size = 0;
657 uint64_t sdma_engs_size = 0;
658
659 UNSERIALIZE_SCALAR(regs_size);
660 UNSERIALIZE_SCALAR(doorbells_size);
661 UNSERIALIZE_SCALAR(sdma_engs_size);
662
663 if (regs_size > 0) {
664 uint32_t reg_addrs[regs_size];
665 uint64_t reg_values[regs_size];
666
667 UNSERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
668 UNSERIALIZE_ARRAY(reg_values,
669 sizeof(reg_values)/sizeof(reg_values[0]));
670
671 for (int idx = 0; idx < regs_size; ++idx) {
672 regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
673 }
674 }
675
676 if (doorbells_size > 0) {
677 uint32_t doorbells_offset[doorbells_size];
678 QueueType doorbells_queues[doorbells_size];
679
680 UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
681 sizeof(doorbells_offset[0]));
682 UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
683 sizeof(doorbells_queues[0]));
684
685 for (int idx = 0; idx < doorbells_size; ++idx) {
686 regs.insert(std::make_pair(doorbells_offset[idx],
687 doorbells_queues[idx]));
688 doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
689 }
690 }
691
692 if (sdma_engs_size > 0) {
693 uint32_t sdma_engs_offset[sdma_engs_size];
694 int sdma_engs[sdma_engs_size];
695
696 UNSERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
697 sizeof(sdma_engs_offset[0]));
698 UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
699
700 for (int idx = 0; idx < sdma_engs_size; ++idx) {
701 int sdma_id = sdma_engs[idx];
702 assert(sdmaIds.count(sdma_id));
703 SDMAEngine *sdma = sdmaIds[sdma_id];
704 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
705 }
706 }
707
708 // Unserialize the device memory
709 deviceMem.unserializeSection(cp, "deviceMem");
710 gpuvm.unserializeSection(cp, "GPUVM");
711}
712
713uint16_t
715{
716 for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
717 auto result = usedVMIDs.find(vmid);
718 if (result == usedVMIDs.end()) {
719 idMap.insert(std::make_pair(pasid, vmid));
720 usedVMIDs[vmid] = {};
721 _lastVMID = vmid;
722 return vmid;
723 }
724 }
725 panic("All VMIDs have been assigned");
726}
727
728void
730{
731 usedVMIDs.erase(vmid);
732}
733
734void
736{
737 auto result = idMap.find(pasid);
738 assert(result != idMap.end());
739 if (result == idMap.end()) return;
740 uint16_t vmid = result->second;
741
742 idMap.erase(result);
743 usedVMIDs.erase(vmid);
744}
745
746void
748{
749 idMap.erase(idMap.begin(), idMap.end());
750 usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
751
752 for (auto& it : sdmaEngs) {
753 it.second->deallocateRLCQueues();
754 }
755}
756
757void
758AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
759{
760 doorbellVMIDMap[doorbell] = vmid;
761}
762
763std::unordered_map<uint16_t, std::set<int>>&
765{
766 return usedVMIDs;
767}
768
769void
770AMDGPUDevice::insertQId(uint16_t vmid, int id)
771{
772 usedVMIDs[vmid].insert(id);
773}
774
775} // namespace gem5
AbstractMemory declaration.
#define AMDGPU_MP0_SMN_C2PMSG_33
#define VEGA10_FB_LOCATION_BASE
Definition amdgpu_vm.hh:77
#define VEGA10_FB_LOCATION_TOP
Definition amdgpu_vm.hh:78
#define MI200_MEM_SIZE_REG
Definition amdgpu_vm.hh:84
#define MI200_FB_LOCATION_TOP
Definition amdgpu_vm.hh:86
#define MI100_FB_LOCATION_BASE
Definition amdgpu_vm.hh:81
#define MI200_FB_LOCATION_BASE
Definition amdgpu_vm.hh:85
#define MI100_FB_LOCATION_TOP
Definition amdgpu_vm.hh:82
#define MI100_MEM_SIZE_REG
Definition amdgpu_vm.hh:80
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
uint32_t getRegVal(uint32_t addr)
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
GPUCommandProcessor * cp
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
PM4PacketProcessor * pm4PktProc
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
std::unordered_map< uint32_t, AddrRange > sdmaMmios
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
AMDGPUNbio nbio
Blocks of the GPU.
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
bool haveRegVal(uint32_t addr)
Register value getter/setter.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciDevice.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeROM(PacketPtr pkt)
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, QueueType > doorbells
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateRptr(const uint32_t &data)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void readMMIO(PacketPtr pkt, Addr offset)
void writeMMIO(PacketPtr pkt, Addr offset)
bool readFrame(PacketPtr pkt, Addr offset)
void writeFrame(PacketPtr pkt, Addr offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void setMMHUBBase(Addr base)
Definition amdgpu_vm.hh:206
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:179
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:69
Addr getMmioAperture(Addr addr)
Definition amdgpu_vm.hh:230
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:105
Addr getFrameAperture(Addr addr)
Definition amdgpu_vm.hh:262
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:57
void setMMHUBTop(Addr top)
Definition amdgpu_vm.hh:207
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
void setGPUDevice(AMDGPUDevice *gpu_device)
void write(Addr db_addr, uint64_t doorbell_reg)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
unsigned getSize() const
Definition packet.hh:817
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void makeAtomicResponse()
Definition packet.hh:1074
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
PCI device, base implementation is only config space.
Definition device.hh:270
PCIConfig config
The current config space.
Definition device.hh:275
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
Definition device.cc:464
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
Definition device.cc:401
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
Definition device.hh:320
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
Definition device.cc:269
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
Definition device.cc:212
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
Definition device.cc:283
void intrPost()
Definition device.hh:364
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
void setGfxRptrHi(uint32_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void setGfxSize(uint32_t data)
void setGfxBaseLo(uint32_t data)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
void setPageDoorbellOffsetLo(uint32_t data)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void setPageDoorbellLo(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
void setPageRptrHi(uint32_t data)
GPUCommandProcessor & gpuCmdProc
Definition shader.hh:256
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
Addr start() const
Get the start address of the range.
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
Definition serialize.cc:74
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Definition serialize.cc:81
#define warn(...)
Definition logging.hh:256
Bitfield< 4 > s
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > m
Bitfield< 0 > p
Bitfield< 15 > system
Definition misc.hh:1004
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr int MMIO_BAR
std::ostream CheckpointOut
Definition serialize.hh:66
@ InterruptHandler
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
constexpr uint32_t ROM_SIZE
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
static constexpr uint32_t IH_OFFSET_SHIFT
static constexpr uint32_t MMHUB_BASE
static constexpr uint32_t GRBM_BASE
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
constexpr int FRAMEBUFFER_BAR
static constexpr uint32_t IH_BASE
constexpr int DOORBELL_BAR
static constexpr uint32_t NBIO_BASE
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_INTERRUPT_PIN
Definition pcireg.h:135
#define PCI_CONFIG_SIZE
Definition pcireg.h:165
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
const std::string & name()
Definition trace.cc:48

Generated on Mon Jul 10 2023 14:24:30 for gem5 by doxygen 1.9.7