gem5 v24.0.0.0
Loading...
Searching...
No Matches
amdgpu_vm.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
36#include "arch/generic/mmu.hh"
37#include "base/trace.hh"
38#include "debug/AMDGPUDevice.hh"
41#include "mem/packet_access.hh"
42
43namespace gem5
44{
45
47{
48 // Zero out contexts
49 memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
50
52 for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
53 memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
54 }
55
56 for (int i = 0; i < NUM_MMIO_RANGES; ++i) {
58 }
59}
60
61void
63{
64 mmioRanges[mmio_aperture] = range;
65}
66
69{
70 return mmioRanges[mmio_aperture];
71}
72
73const AddrRange&
75{
76 for (int i = 0; i < NUM_MMIO_RANGES; ++i) {
77 if (mmioRanges[i].contains(offset)) {
78 return mmioRanges[i];
79 }
80 }
81
82 // Default to NBIO
84}
85
86Addr
91
92Addr
97
98void
100{
101 uint32_t value = pkt->getLE<uint32_t>();
102
103 switch (offset) {
104 // MMHUB MMIOs
106 DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
107 pkt->setLE<uint32_t>(1);
108 break;
110 // This is only used by driver initialization and only expects an ACK
111 // for VMID 0 which is the first bit in the response.
112 DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
113 pkt->setLE<uint32_t>(1);
114 break;
116 mmhubBase = ((Addr)bits(value, 23, 0) << 24);
117 DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
118 break;
120 mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
121 DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
122 break;
123 // GRBM MMIOs
125 DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
126 pkt->setLE<uint32_t>(1);
127 break;
128 default:
129 DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
130 break;
131 }
132}
133
134void
136{
137 switch (offset) {
138 // VMID0 MMIOs
140 vmContext0.ptBaseL = pkt->getLE<uint32_t>();
141 // Clear extra bits not part of address
143 break;
145 vmContext0.ptBaseH = pkt->getLE<uint32_t>();
146 break;
148 vmContext0.ptStartL = pkt->getLE<uint32_t>();
149 break;
151 vmContext0.ptStartH = pkt->getLE<uint32_t>();
152 break;
154 vmContext0.ptEndL = pkt->getLE<uint32_t>();
155 break;
157 vmContext0.ptEndH = pkt->getLE<uint32_t>();
158 break;
159 case mmMC_VM_AGP_TOP: {
160 uint32_t val = pkt->getLE<uint32_t>();
161 vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
162 } break;
163 case mmMC_VM_AGP_BOT: {
164 uint32_t val = pkt->getLE<uint32_t>();
165 vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
166 } break;
167 case mmMC_VM_AGP_BASE: {
168 uint32_t val = pkt->getLE<uint32_t>();
169 vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
170 } break;
172 uint32_t val = pkt->getLE<uint32_t>();
173 vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
174 } break;
176 uint32_t val = pkt->getLE<uint32_t>();
177 vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
178 } break;
179 case mmMC_VM_FB_OFFSET: {
180 uint32_t val = pkt->getLE<uint32_t>();
181 vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
182 } break;
184 uint32_t val = pkt->getLE<uint32_t>();
185 vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
186 } break;
188 uint32_t val = pkt->getLE<uint32_t>();
189 vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
190 } break;
191 default:
192 break;
193 }
194}
195
196void
198{
199 DPRINTF(AMDGPUDevice, "Registered a TLB with device\n");
200 gpu_tlbs.push_back(tlb);
201}
202
203void
205{
206 DPRINTF(AMDGPUDevice, "Invalidating all TLBs\n");
207 for (auto &tlb : gpu_tlbs) {
208 tlb->invalidateAll();
209 DPRINTF(AMDGPUDevice, " ... TLB invalidated\n");
210 }
211}
212
213void
215{
216 Addr vm0PTBase = vmContext0.ptBase;
217 Addr vm0PTStart = vmContext0.ptStart;
218 Addr vm0PTEnd = vmContext0.ptEnd;
219 uint64_t gartTableSize;
220 SERIALIZE_SCALAR(vm0PTBase);
221 SERIALIZE_SCALAR(vm0PTStart);
222 SERIALIZE_SCALAR(vm0PTEnd);
223
232
235
236 Addr ptBase[AMDGPU_VM_COUNT];
237 Addr ptStart[AMDGPU_VM_COUNT];
238 Addr ptEnd[AMDGPU_VM_COUNT];
239 for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
240 ptBase[i] = vmContexts[i].ptBase;
241 ptStart[i] = vmContexts[i].ptStart;
242 ptEnd[i] = vmContexts[i].ptEnd;
243 }
247
248 gartTableSize = gartTable.size();
249 uint64_t* gartTableKey = new uint64_t[gartTableSize];
250 uint64_t* gartTableValue = new uint64_t[gartTableSize];
251 SERIALIZE_SCALAR(gartTableSize);
252 int i = 0;
253 for (auto it = gartTable.begin(); it != gartTable.end(); ++it) {
254 gartTableKey[i] = it->first;
255 gartTableValue[i] = it->second;
256 i++;
257 }
258 SERIALIZE_ARRAY(gartTableKey, gartTableSize);
259 SERIALIZE_ARRAY(gartTableValue, gartTableSize);
260 delete[] gartTableKey;
261 delete[] gartTableValue;
262}
263
264void
266{
267 // Unserialize requires fields not be packed
268 Addr vm0PTBase;
269 Addr vm0PTStart;
270 Addr vm0PTEnd;
271 uint64_t gartTableSize, *gartTableKey, *gartTableValue;
272 UNSERIALIZE_SCALAR(vm0PTBase);
273 UNSERIALIZE_SCALAR(vm0PTStart);
274 UNSERIALIZE_SCALAR(vm0PTEnd);
275 vmContext0.ptBase = vm0PTBase;
276 vmContext0.ptStart = vm0PTStart;
277 vmContext0.ptEnd = vm0PTEnd;
278
287
290
291 Addr ptBase[AMDGPU_VM_COUNT];
292 Addr ptStart[AMDGPU_VM_COUNT];
293 Addr ptEnd[AMDGPU_VM_COUNT];
297 for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
298 vmContexts[i].ptBase = ptBase[i];
299 vmContexts[i].ptStart = ptStart[i];
300 vmContexts[i].ptEnd = ptEnd[i];
301 }
302 UNSERIALIZE_SCALAR(gartTableSize);
303 gartTableKey = new uint64_t[gartTableSize];
304 gartTableValue = new uint64_t[gartTableSize];
305 UNSERIALIZE_ARRAY(gartTableKey, gartTableSize);
306 UNSERIALIZE_ARRAY(gartTableValue, gartTableSize);
307 for (uint64_t i = 0; i < gartTableSize; i++) {
308 gartTable[gartTableKey[i]] = gartTableValue[i];
309 }
310 delete[] gartTableKey;
311 delete[] gartTableValue;
312}
313
314void
316{
317 assert(vm->inAGP(range.vaddr));
318
319 Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
320 if (next == range.vaddr)
321 next += AMDGPU_AGP_PAGE_SIZE;
322
323 range.size = std::min(range.size, next - range.vaddr);
324 range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
325
326 DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
327 range.vaddr, range.paddr);
328}
329
330void
332{
334 if (next == range.vaddr)
335 next += AMDGPU_GART_PAGE_SIZE;
336 range.size = std::min(range.size, next - range.vaddr);
337
338 Addr gart_addr = bits(range.vaddr, 63, 12);
339
340 // This table is a bit hard to iterate over. If we cross a page, the next
341 // PTE is not necessarily the next entry but actually 7 entries away.
342 Addr lsb = bits(gart_addr, 2, 0);
343 gart_addr += lsb * 7;
344
345 // GART is a single level translation, so the value at the "virtual" addr
346 // is the PTE containing the physical address.
347 auto result = vm->gartTable.find(gart_addr);
348 if (result == vm->gartTable.end()) {
349 // There is no reason to fault as there is no recovery mechanism for
350 // invalid GART entries. Simply panic in this case
351 warn("GART translation for %p not found", range.vaddr);
352
353 // Some PM4 packets have register addresses which we ignore. In that
354 // case just return the vaddr rather than faulting.
355 range.paddr = range.vaddr;
356 } else {
357 Addr pte = result->second;
358 Addr lower_bits = bits(range.vaddr, 11, 0);
359 range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
360 }
361
362 DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
363 range.vaddr, range.paddr);
364}
365
366void
368{
369 assert(vm->inMMHUB(range.vaddr));
370
372 if (next == range.vaddr)
374
375 range.size = std::min(range.size, next - range.vaddr);
376 range.paddr = range.vaddr - vm->getMMHUBBase();
377
378 DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
379 range.vaddr, range.paddr);
380}
381
382void
384{
385 // Get base address of the page table for this vmid
386 Addr base = vm->getPageTableBase(vmid);
387 Addr start = vm->getPageTableStart(vmid);
388 DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
389 base, start, walker);
390
391 bool system_bit;
392 unsigned logBytes;
393 Addr paddr = range.vaddr;
394 Fault fault = walker->startFunctional(base, paddr, logBytes,
395 BaseMMU::Mode::Read, system_bit);
396 if (fault != NoFault) {
397 fatal("User translation fault");
398 }
399
400 // GPU page size is variable. Use logBytes to determine size.
401 const Addr page_size = 1 << logBytes;
402 Addr next = roundUp(range.vaddr, page_size);
403 if (next == range.vaddr) {
404 // We don't know the size of the next page, use default.
405 next += AMDGPU_USER_PAGE_SIZE;
406 }
407
408 // If we are not in system/host memory, change the address to the MMHUB
409 // aperture. This is mapped to the same backing memory as device memory.
410 if (!system_bit) {
411 paddr += vm->getMMHUBBase();
412 assert(vm->inMMHUB(paddr));
413 }
414
415 range.size = std::min(range.size, next - range.vaddr);
416 range.paddr = paddr;
417}
418
419} // namespace gem5
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32
Definition amdgpu_vm.hh:58
#define mmMMHUB_VM_FB_LOCATION_BASE
Definition amdgpu_vm.hh:74
#define mmMMHUB_VM_FB_LOCATION_TOP
Definition amdgpu_vm.hh:75
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32
Definition amdgpu_vm.hh:59
#define mmMC_VM_SYSTEM_APERTURE_LOW_ADDR
Definition amdgpu_vm.hh:68
#define mmMC_VM_FB_LOCATION_BASE
Definition amdgpu_vm.hh:63
#define mmMC_VM_AGP_BASE
Definition amdgpu_vm.hh:67
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:94
#define mmMC_VM_AGP_BOT
Definition amdgpu_vm.hh:66
#define mmMMHUB_VM_INVALIDATE_ENG17_SEM
Definition amdgpu_vm.hh:71
#define mmMC_VM_AGP_TOP
Definition amdgpu_vm.hh:65
#define mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR
Definition amdgpu_vm.hh:69
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32
Definition amdgpu_vm.hh:55
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32
Definition amdgpu_vm.hh:60
#define mmMC_VM_FB_OFFSET
Definition amdgpu_vm.hh:62
#define mmMC_VM_FB_LOCATION_TOP
Definition amdgpu_vm.hh:64
static constexpr int AMDGPU_GART_PAGE_SIZE
Definition amdgpu_vm.hh:93
static constexpr int AMDGPU_USER_PAGE_SIZE
Definition amdgpu_vm.hh:97
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32
Definition amdgpu_vm.hh:57
#define mmVM_INVALIDATE_ENG17_ACK
MMIO offsets for graphics register bus manager (GRBM).
Definition amdgpu_vm.hh:54
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32
Definition amdgpu_vm.hh:56
static constexpr int AMDGPU_AGP_PAGE_SIZE
Definition amdgpu_vm.hh:92
#define mmMMHUB_VM_INVALIDATE_ENG17_ACK
Definition amdgpu_vm.hh:73
#define DPRINTF(x,...)
Definition trace.hh:210
Device model for an AMD GPU.
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:315
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:331
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:367
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:383
uint64_t mmhubBase
Definition amdgpu_vm.hh:168
void setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range)
Definition amdgpu_vm.cc:62
std::vector< AMDGPUVMContext > vmContexts
Definition amdgpu_vm.hh:164
std::array< AddrRange, NUM_MMIO_RANGES > mmioRanges
Definition amdgpu_vm.hh:177
AddrRange getMMIORange(mmio_range_t mmio_aperture)
Definition amdgpu_vm.cc:68
void invalidateTLBs()
Definition amdgpu_vm.cc:204
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:203
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:212
std::vector< VegaISA::GpuTLB * > gpu_tlbs
List of TLBs associated with the GPU device.
Definition amdgpu_vm.hh:175
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:99
const AddrRange & getMMIOAperture(Addr addr)
Definition amdgpu_vm.cc:74
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:135
uint64_t mmhubTop
Definition amdgpu_vm.hh:169
AMDGPUSysVMContext vmContext0
Definition amdgpu_vm.hh:163
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:87
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition amdgpu_vm.cc:265
Addr gartSize()
Return size of GART in number of PTEs.
Definition amdgpu_vm.cc:93
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition amdgpu_vm.cc:214
Addr getAGPBase()
Definition amdgpu_vm.hh:219
void registerTLB(VegaISA::GpuTLB *tlb)
Control methods for TLBs associated with the GPU device.
Definition amdgpu_vm.cc:197
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition addr_range.hh:82
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void setLE(T v)
Set the value in the data pointer to v as little endian.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
static constexpr T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:260
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 0 > vm
Bitfield< 59, 56 > tlb
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 63 > val
Definition misc.hh:804
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
static constexpr int AMDGPU_VM_COUNT
mmio_range_t
Definition amdgpu_vm.hh:103
@ NUM_MMIO_RANGES
Definition amdgpu_vm.hh:109
@ NBIO_MMIO_RANGE
Definition amdgpu_vm.hh:104
constexpr decltype(nullptr) NoFault
Definition types.hh:253
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
This structure represents a single, contiguous translation, or carries information about whatever fau...

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0