gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
amdgpu_nbio.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/AMDGPUDevice.hh"
36#include "mem/packet_access.hh"
37
38namespace gem5
39{
40
42{
43 // All read-before-write MMIOs go here
45}
46
47void
49{
50 gpuDevice = gpu_device;
51}
52
53void
55{
56 // For Vega10 we rely on the golden values in an MMIO trace. Return
57 // immediately as to not clobber those values.
58 if (gpuDevice->getGfxVersion() == GfxVersion::gfx900) {
60 return;
61 }
62 }
63
64 switch (offset) {
65 // PCIE_DATA, PCIE_DATA2, PCIE_INDEX, and PCIE_INDEX2 handle "indirect
66 // "register reads/writes from the driver. This provides a way to read
67 // any register by providing a 32-bit address to one of the two INDEX
68 // registers and then reading the corresponding DATA register. See:
69 // https://github.com/ROCm/ROCK-Kernel-Driver/blob/roc-6.0.x/drivers/
70 // gpu/drm/amd/amdgpu/amdgpu_device.c#L459
72 {
73 uint32_t value = gpuDevice->getRegVal(pcie_index_reg);
74 DPRINTF(AMDGPUDevice, "Read PCIe index %lx data %x\n",
75 pcie_index_reg, value);
76 pkt->setLE<uint32_t>(value);
77 }
78 break;
80 {
81 uint32_t value = gpuDevice->getRegVal(pcie_index2_reg);
82 DPRINTF(AMDGPUDevice, "Read PCIe index2 %lx data2 %x\n",
83 pcie_index2_reg, value);
84 pkt->setLE<uint32_t>(value);
85 }
86 break;
88 pkt->setLE<uint32_t>(pcie_index_reg);
89 break;
91 pkt->setLE<uint32_t>(pcie_index2_reg);
92 break;
93 case AMDGPU_MM_DATA:
94 pkt->setLE<uint32_t>(gpuDevice->getRegVal(mm_index_reg));
95 break;
112 pkt->setLE<uint32_t>(0x10001);
113 break;
119 pkt->setLE<uint32_t>(0x1);
120 break;
121 // PSP responds with bit 31 set when ready
123 pkt->setLE<uint32_t>(0x80000000);
124 break;
126 pkt->setLE<uint32_t>(0x1);
127 break;
129 // This contains a revision ID for the chip. It is required for MI300X
130 // to see the GFX target as gfx942 instead of gfx941.
131 if (gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
132 pkt->setLE<uint32_t>(2 << 24);
133 } else {
134 pkt->setLE<uint32_t>(0);
135 }
136 break;
138 pkt->setLE<uint32_t>(0x200); // ATOM_S7_ASIC_INIT_COMPLETE_MASK
139 break;
140 default:
141 if (triggered_reads.count(offset)) {
142 DPRINTF(AMDGPUDevice, "Found triggered read for %#x\n", offset);
143 pkt->setLE<uint32_t>(triggered_reads[offset]);
144 } else if (regs.count(offset)) {
145 DPRINTF(AMDGPUDevice, "Returning value of unknown MMIO offset "
146 "%x: %x\n", offset, regs[offset]);
147 pkt->setLE<uint32_t>(regs[offset]);
148 } else {
149 DPRINTF(AMDGPUDevice, "NBIO Unknown MMIO %#x (%#x)\n", offset,
150 pkt->getAddr());
151 }
152 break;
153 }
154}
155
156void
158{
159 if (offset == AMDGPU_MM_INDEX) {
160 assert(pkt->getSize() == 4);
162 pkt->getLE<uint32_t>());
163 } else if (offset == AMDGPU_MM_INDEX_HI) {
164 assert(pkt->getSize() == 4);
166 pkt->getLE<uint32_t>());
167 } else if (offset == AMDGPU_MM_DATA) {
168 DPRINTF(AMDGPUDevice, "MM write to reg %#lx data %#lx\n",
169 mm_index_reg, pkt->getLE<uint32_t>());
170 gpuDevice->setRegVal(AMDGPU_MM_DATA, pkt->getLE<uint32_t>());
171 // PCIE_DATA, PCIE_DATA2, PCIE_INDEX, and PCIE_INDEX2 handle "indirect
172 // "register reads/writes from the driver. This provides a way to read
173 // any register by providing a 32-bit address to one of the two INDEX
174 // registers and then reading the corresponding DATA register. See:
175 // https://github.com/ROCm/ROCK-Kernel-Driver/blob/roc-6.0.x/drivers/
176 // gpu/drm/amd/amdgpu/amdgpu_device.c#L459
177 } else if (offset == AMDGPU_PCIE_INDEX) {
178 assert(pkt->getSize() == 4);
179 pcie_index_reg = pkt->getLE<uint32_t>();
180 } else if (offset == AMDGPU_PCIE_DATA) {
181 assert(pkt->getSize() == 4);
182 gpuDevice->setRegVal(pcie_index_reg, pkt->getLE<uint32_t>());
183 } else if (offset == AMDGPU_PCIE_INDEX2) {
184 assert(pkt->getSize() == 4);
185 pcie_index2_reg = pkt->getLE<uint32_t>();
186 } else if (offset == AMDGPU_PCIE_DATA2) {
187 assert(pkt->getSize() == 4);
188 gpuDevice->setRegVal(pcie_index2_reg, pkt->getLE<uint32_t>());
189 } else if (offset == AMDGPU_MP0_SMN_C2PMSG_35) {
190 // See psp_v3_1_bootloader_load_sos in amdgpu driver code.
191 if (pkt->getLE<uint32_t>() == 0x10000) {
193 }
194 } else if (offset == AMDGPU_MP0_SMN_C2PMSG_64) {
196 0x80000000 + pkt->getLE<uint32_t>();
197 } else if (offset == AMDGPU_MP0_SMN_C2PMSG_69) {
198 // PSP ring low addr
199 psp_ring = insertBits(psp_ring, 31, 0, pkt->getLE<uint32_t>());
201 - gpuDevice->getVM().getSysAddrRangeLow() + 0xc;
202 } else if (offset == AMDGPU_MP0_SMN_C2PMSG_70) {
203 // PSP ring high addr
204 psp_ring = insertBits(psp_ring, 63, 32, pkt->getLE<uint32_t>());
206 - gpuDevice->getVM().getSysAddrRangeLow() + 0xc;
207 } else if (offset == AMDGPU_MP0_SMN_C2PMSG_71) {
208 // PSP ring size
209 psp_ring_size = pkt->getLE<uint32_t>();
211 uint16_t context_id =
213 regs[offset] = pkt->getLE<uint32_t>();
214 if ((offset % 8) == 0) {
215 // The register write is to ptBaseH
216 gpuDevice->getVM().setPageTableBaseH(context_id,
217 pkt->getLE<uint32_t>());
218 } else {
219 // The register write is to ptBaseL
220 gpuDevice->getVM().setPageTableBaseL(context_id,
221 pkt->getLE<uint32_t>());
222 }
224 uint16_t context_id =
226 regs[offset] = pkt->getLE<uint32_t>();
227 if ((offset % 8) == 0) {
228 // The register write is to ptBaseH
229 gpuDevice->getVM().setPageTableStartH(context_id,
230 pkt->getLE<uint32_t>());
231 } else {
232 // The register write is to ptBaseL
233 gpuDevice->getVM().setPageTableStartL(context_id,
234 pkt->getLE<uint32_t>());
235 }
237 uint16_t context_id =
239 regs[offset] = pkt->getLE<uint32_t>();
240 // MI200 page table addresses are 64 bits long. There are
241 // separate registers to handle the lower 32 bits and upper 32
242 // bits. Use the MMIO offset to figure out which part of the
243 // address is being written to
244 if ((offset % 8) == 0) {
245 // The register write is to ptBaseH
246 gpuDevice->getVM().setPageTableEndH(context_id,
247 pkt->getLE<uint32_t>());
248 } else {
249 // The register write is to ptBaseL
250 gpuDevice->getVM().setPageTableEndL(context_id,
251 pkt->getLE<uint32_t>());
252 }
253 } else {
254 // Fallback to a map of register values. This was previously in the
255 // AMDGPUDevice, however that short-circuited some reads from other
256 // IP blocks. Since this is an end point IP block it is safer to use
257 // here.
258 regs[offset] = pkt->getLE<uint32_t>();
259 }
260}
261
262bool
264{
265 if (offset == psp_ring_dev_addr) {
267 pkt->setUintX(psp_ring_value, ByteOrder::little);
268
269 return true;
270 }
271
272 return false;
273}
274
275void
277{
279 DPRINTF(AMDGPUDevice, "Saw psp_ring_listen_addr with size %ld value "
280 "%ld\n", pkt->getSize(), pkt->getUintX(ByteOrder::little));
281
282 /*
283 * In ROCm versions 4.x this packet is a 4 byte value. In ROCm 5.x
284 * the packet is 8 bytes and mapped as a system address which needs
285 * to be subtracted out to get the framebuffer address.
286 */
287 if (pkt->getSize() == 4) {
288 psp_ring_dev_addr = pkt->getLE<uint32_t>();
289 } else if (pkt->getSize() == 8) {
290 psp_ring_dev_addr = pkt->getUintX(ByteOrder::little)
291 - gpuDevice->getVM().getSysAddrRangeLow();
292 } else {
293 panic("Invalid write size to psp_ring_listen_addr\n");
294 }
295
296 DPRINTF(AMDGPUDevice, "Setting PSP ring device address to %#lx\n",
298 }
299}
300
301} // namespace gem5
#define AMDGPU_PCIE_INDEX
#define AMDGPU_PCIE_DATA2
#define MI300X_INV_ENG17_ACK1
#define AMDGPU_MM_INDEX_HI
#define AMDGPU_PCIE_INDEX2
#define MI300X_INV_ENG17_ACK8
#define VEGA10_INV_ENG17_ACK2
#define MI300X_INV_ENG17_ACK10
#define VEGA10_INV_ENG17_ACK1
#define MI100_INV_ENG17_SEM2
#define AMDGPU_MP0_SMN_C2PMSG_81
#define AMDGPU_MP0_SMN_C2PMSG_71
#define MI300X_INV_ENG17_ACK3
#define MI300X_INV_ENG17_ACK9
#define AMDGPU_MP0_SMN_C2PMSG_64
#define MI300X_INV_ENG17_ACK7
#define MI100_INV_ENG17_ACK2
#define MI300X_INV_ENG17_ACK5
#define MI200_INV_ENG17_SEM2
#define MI300X_INV_ENG17_ACK2
#define MI300X_EPF0_STRAP0
#define AMDGPU_MP0_SMN_C2PMSG_35
#define MI300X_INV_ENG17_ACK6
#define AMDGPU_MM_INDEX
MMIO offsets for NBIO.
#define VEGA10_INV_ENG17_SEM1
#define MI300X_INV_ENG17_ACK4
#define MI100_INV_ENG17_ACK3
#define MI100_INV_ENG17_SEM3
#define MI200_INV_ENG17_ACK2
#define AMDGPU_MP0_SMN_C2PMSG_69
#define AMDGPU_MM_DATA
#define MI300X_INV_ENG17_ACK11
#define MI200_BIOS_SCRATCH_7
#define VEGA10_INV_ENG17_SEM2
#define AMDGPU_MP1_SMN_C2PMSG_90
#define AMDGPU_MP0_SMN_C2PMSG_70
#define AMDGPU_PCIE_DATA
#define DPRINTF(x,...)
Definition trace.hh:209
Device model for an AMD GPU.
uint16_t get_context_from_MI200_regBM_PAGE_TABLE_BASE_ADDR(Addr offset)
bool is_MI200_regBM_PAGE_TABLE_START_ADDR(Addr offset)
void readMMIO(PacketPtr pkt, Addr offset)
uint32_t pcie_index2_reg
std::unordered_map< uint32_t, uint32_t > triggered_reads
void writeMMIO(PacketPtr pkt, Addr offset)
uint16_t get_context_from_MI200_regBM_PAGE_TABLE_END_ADDR(Addr offset)
bool readFrame(PacketPtr pkt, Addr offset)
bool is_MI200_regBM_PAGE_TABLE_END_ADDR(Addr offset)
bool is_MI200_regBM_PAGE_TABLE_BASE_ADDR(Addr offset)
uint64_t mm_index_reg
AMDGPUDevice * gpuDevice
uint32_t pcie_index_reg
uint16_t get_context_from_MI200_regBM_PAGE_TABLE_START_ADDR(Addr offset)
void writeFrame(PacketPtr pkt, Addr offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getAddr() const
Definition packet.hh:807
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
Definition packet.cc:361
void setLE(T v)
Set the value in the data pointer to v as little endian.
unsigned getSize() const
Definition packet.hh:817
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
Definition packet.cc:352
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
Bitfield< 23, 0 > offset
Definition types.hh:144
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
Packet * PacketPtr

Generated on Mon Oct 27 2025 04:13:01 for gem5 by doxygen 1.14.0