gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
amdgpu_vm.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
36#include "arch/generic/mmu.hh"
37#include "base/trace.hh"
38#include "debug/AMDGPUDevice.hh"
40#include "mem/packet_access.hh"
41
42namespace gem5
43{
44
46{
47 // Zero out contexts
48 memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
49
51 for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
52 memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
53 }
54}
55
56Addr
58{
59 return vmContext0.ptBase;
60}
61
62Addr
64{
66}
67
68void
70{
71 uint32_t value = pkt->getLE<uint32_t>();
72
73 switch (offset) {
74 // MMHUB MMIOs
76 DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
77 pkt->setLE<uint32_t>(1);
78 break;
80 // This is only used by driver initialization and only expects an ACK
81 // for VMID 0 which is the first bit in the response.
82 DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
83 pkt->setLE<uint32_t>(1);
84 break;
86 mmhubBase = ((Addr)bits(value, 23, 0) << 24);
87 DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
88 break;
90 mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
91 DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
92 break;
93 // GRBM MMIOs
95 DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
96 pkt->setLE<uint32_t>(1);
97 break;
98 default:
99 DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
100 break;
101 }
102}
103
104void
106{
107 switch (offset) {
108 // VMID0 MMIOs
110 vmContext0.ptBaseL = pkt->getLE<uint32_t>();
111 // Clear extra bits not part of address
113 break;
115 vmContext0.ptBaseH = pkt->getLE<uint32_t>();
116 break;
118 vmContext0.ptStartL = pkt->getLE<uint32_t>();
119 break;
121 vmContext0.ptStartH = pkt->getLE<uint32_t>();
122 break;
124 vmContext0.ptEndL = pkt->getLE<uint32_t>();
125 break;
127 vmContext0.ptEndH = pkt->getLE<uint32_t>();
128 break;
129 case mmMC_VM_AGP_TOP: {
130 uint32_t val = pkt->getLE<uint32_t>();
131 vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
132 } break;
133 case mmMC_VM_AGP_BOT: {
134 uint32_t val = pkt->getLE<uint32_t>();
135 vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
136 } break;
137 case mmMC_VM_AGP_BASE: {
138 uint32_t val = pkt->getLE<uint32_t>();
139 vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
140 } break;
142 uint32_t val = pkt->getLE<uint32_t>();
143 vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
144 } break;
146 uint32_t val = pkt->getLE<uint32_t>();
147 vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
148 } break;
149 case mmMC_VM_FB_OFFSET: {
150 uint32_t val = pkt->getLE<uint32_t>();
151 vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
152 } break;
154 uint32_t val = pkt->getLE<uint32_t>();
155 vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
156 } break;
158 uint32_t val = pkt->getLE<uint32_t>();
159 vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
160 } break;
161 default:
162 break;
163 }
164}
165
166void
168{
169 DPRINTF(AMDGPUDevice, "Registered a TLB with device\n");
170 gpu_tlbs.push_back(tlb);
171}
172
173void
175{
176 DPRINTF(AMDGPUDevice, "Invalidating all TLBs\n");
177 for (auto &tlb : gpu_tlbs) {
178 tlb->invalidateAll();
179 DPRINTF(AMDGPUDevice, " ... TLB invalidated\n");
180 }
181}
182
183void
185{
186 Addr vm0PTBase = vmContext0.ptBase;
187 Addr vm0PTStart = vmContext0.ptStart;
188 Addr vm0PTEnd = vmContext0.ptEnd;
189 uint64_t gartTableSize;
190 SERIALIZE_SCALAR(vm0PTBase);
191 SERIALIZE_SCALAR(vm0PTStart);
192 SERIALIZE_SCALAR(vm0PTEnd);
193
202
205
206 Addr ptBase[AMDGPU_VM_COUNT];
207 Addr ptStart[AMDGPU_VM_COUNT];
208 Addr ptEnd[AMDGPU_VM_COUNT];
209 for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
210 ptBase[i] = vmContexts[i].ptBase;
211 ptStart[i] = vmContexts[i].ptStart;
212 ptEnd[i] = vmContexts[i].ptEnd;
213 }
217
218 gartTableSize = gartTable.size();
219 uint64_t* gartTableKey = new uint64_t[gartTableSize];
220 uint64_t* gartTableValue = new uint64_t[gartTableSize];
221 SERIALIZE_SCALAR(gartTableSize);
222 int i = 0;
223 for (auto it = gartTable.begin(); it != gartTable.end(); ++it) {
224 gartTableKey[i] = it->first;
225 gartTableValue[i] = it->second;
226 i++;
227 }
228 SERIALIZE_ARRAY(gartTableKey, gartTableSize);
229 SERIALIZE_ARRAY(gartTableValue, gartTableSize);
230 delete[] gartTableKey;
231 delete[] gartTableValue;
232}
233
234void
236{
237 // Unserialize requires fields not be packed
238 Addr vm0PTBase;
239 Addr vm0PTStart;
240 Addr vm0PTEnd;
241 uint64_t gartTableSize, *gartTableKey, *gartTableValue;
242 UNSERIALIZE_SCALAR(vm0PTBase);
243 UNSERIALIZE_SCALAR(vm0PTStart);
244 UNSERIALIZE_SCALAR(vm0PTEnd);
245 vmContext0.ptBase = vm0PTBase;
246 vmContext0.ptStart = vm0PTStart;
247 vmContext0.ptEnd = vm0PTEnd;
248
257
260
261 Addr ptBase[AMDGPU_VM_COUNT];
262 Addr ptStart[AMDGPU_VM_COUNT];
263 Addr ptEnd[AMDGPU_VM_COUNT];
267 for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
268 vmContexts[i].ptBase = ptBase[i];
269 vmContexts[i].ptStart = ptStart[i];
270 vmContexts[i].ptEnd = ptEnd[i];
271 }
272 UNSERIALIZE_SCALAR(gartTableSize);
273 gartTableKey = new uint64_t[gartTableSize];
274 gartTableValue = new uint64_t[gartTableSize];
275 UNSERIALIZE_ARRAY(gartTableKey, gartTableSize);
276 UNSERIALIZE_ARRAY(gartTableValue, gartTableSize);
277 for (uint64_t i = 0; i < gartTableSize; i++) {
278 gartTable[gartTableKey[i]] = gartTableValue[i];
279 }
280 delete[] gartTableKey;
281 delete[] gartTableValue;
282}
283
284void
286{
287 assert(vm->inAGP(range.vaddr));
288
289 Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
290 if (next == range.vaddr)
291 next += AMDGPU_AGP_PAGE_SIZE;
292
293 range.size = std::min(range.size, next - range.vaddr);
294 range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
295
296 DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
297 range.vaddr, range.paddr);
298}
299
300void
302{
304 if (next == range.vaddr)
305 next += AMDGPU_GART_PAGE_SIZE;
306 range.size = std::min(range.size, next - range.vaddr);
307
308 Addr gart_addr = bits(range.vaddr, 63, 12);
309
310 // This table is a bit hard to iterate over. If we cross a page, the next
311 // PTE is not necessarily the next entry but actually 7 entries away.
312 Addr lsb = bits(gart_addr, 2, 0);
313 gart_addr += lsb * 7;
314
315 // GART is a single level translation, so the value at the "virtual" addr
316 // is the PTE containing the physical address.
317 auto result = vm->gartTable.find(gart_addr);
318 if (result == vm->gartTable.end()) {
319 // There is no reason to fault as there is no recovery mechanism for
320 // invalid GART entries. Simply panic in this case
321 warn("GART translation for %p not found", range.vaddr);
322
323 // Some PM4 packets have register addresses which we ignore. In that
324 // case just return the vaddr rather than faulting.
325 range.paddr = range.vaddr;
326 } else {
327 Addr pte = result->second;
328 Addr lower_bits = bits(range.vaddr, 11, 0);
329 range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
330 }
331
332 DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
333 range.vaddr, range.paddr);
334}
335
336void
338{
339 assert(vm->inMMHUB(range.vaddr));
340
342 if (next == range.vaddr)
344
345 range.size = std::min(range.size, next - range.vaddr);
346 range.paddr = range.vaddr - vm->getMMHUBBase();
347
348 DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
349 range.vaddr, range.paddr);
350}
351
352void
354{
355 // Get base address of the page table for this vmid
356 Addr base = vm->getPageTableBase(vmid);
357 Addr start = vm->getPageTableStart(vmid);
358 DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
359 base, start, walker);
360
361 bool system_bit;
362 unsigned logBytes;
363 Addr paddr = range.vaddr;
364 Fault fault = walker->startFunctional(base, paddr, logBytes,
365 BaseMMU::Mode::Read, system_bit);
366 if (fault != NoFault) {
367 fatal("User translation fault");
368 }
369
370 // GPU page size is variable. Use logBytes to determine size.
371 const Addr page_size = 1 << logBytes;
372 Addr next = roundUp(range.vaddr, page_size);
373 if (next == range.vaddr) {
374 // We don't know the size of the next page, use default.
375 next += AMDGPU_USER_PAGE_SIZE;
376 }
377
378 // If we are not in system/host memory, change the address to the MMHUB
379 // aperture. This is mapped to the same backing memory as device memory.
380 if (!system_bit) {
381 paddr += vm->getMMHUBBase();
382 assert(vm->inMMHUB(paddr));
383 }
384
385 range.size = std::min(range.size, next - range.vaddr);
386 range.paddr = paddr;
387}
388
389} // namespace gem5
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32
Definition amdgpu_vm.hh:58
#define mmMMHUB_VM_FB_LOCATION_BASE
Definition amdgpu_vm.hh:74
#define mmMMHUB_VM_FB_LOCATION_TOP
Definition amdgpu_vm.hh:75
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32
Definition amdgpu_vm.hh:59
#define mmMC_VM_SYSTEM_APERTURE_LOW_ADDR
Definition amdgpu_vm.hh:68
#define mmMC_VM_FB_LOCATION_BASE
Definition amdgpu_vm.hh:63
#define mmMC_VM_AGP_BASE
Definition amdgpu_vm.hh:67
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:94
#define mmMC_VM_AGP_BOT
Definition amdgpu_vm.hh:66
#define mmMMHUB_VM_INVALIDATE_ENG17_SEM
Definition amdgpu_vm.hh:71
#define mmMC_VM_AGP_TOP
Definition amdgpu_vm.hh:65
#define mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR
Definition amdgpu_vm.hh:69
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32
Definition amdgpu_vm.hh:55
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32
Definition amdgpu_vm.hh:60
#define mmMC_VM_FB_OFFSET
Definition amdgpu_vm.hh:62
#define mmMC_VM_FB_LOCATION_TOP
Definition amdgpu_vm.hh:64
static constexpr int AMDGPU_GART_PAGE_SIZE
Definition amdgpu_vm.hh:93
static constexpr int AMDGPU_USER_PAGE_SIZE
Definition amdgpu_vm.hh:97
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32
Definition amdgpu_vm.hh:57
#define mmVM_INVALIDATE_ENG17_ACK
MMIO offsets for graphics register bus manager (GRBM).
Definition amdgpu_vm.hh:54
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32
Definition amdgpu_vm.hh:56
static constexpr int AMDGPU_AGP_PAGE_SIZE
Definition amdgpu_vm.hh:92
#define mmMMHUB_VM_INVALIDATE_ENG17_ACK
Definition amdgpu_vm.hh:73
#define DPRINTF(x,...)
Definition trace.hh:210
Device model for an AMD GPU.
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:285
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:301
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:337
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition amdgpu_vm.cc:353
uint64_t mmhubBase
Definition amdgpu_vm.hh:154
std::vector< AMDGPUVMContext > vmContexts
Definition amdgpu_vm.hh:150
void invalidateTLBs()
Definition amdgpu_vm.cc:174
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:179
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:188
std::vector< VegaISA::GpuTLB * > gpu_tlbs
List of TLBs associated with the GPU device.
Definition amdgpu_vm.hh:161
void readMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:69
void writeMMIO(PacketPtr pkt, Addr offset)
Definition amdgpu_vm.cc:105
uint64_t mmhubTop
Definition amdgpu_vm.hh:155
AMDGPUSysVMContext vmContext0
Definition amdgpu_vm.hh:149
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:57
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition amdgpu_vm.cc:235
Addr gartSize()
Return size of GART in number of PTEs.
Definition amdgpu_vm.cc:63
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition amdgpu_vm.cc:184
Addr getAGPBase()
Definition amdgpu_vm.hh:195
void registerTLB(VegaISA::GpuTLB *tlb)
Control methods for TLBs associated with the GPU device.
Definition amdgpu_vm.cc:167
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void setLE(T v)
Set the value in the data pointer to v as little endian.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
static constexpr T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:260
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:182
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 0 > vm
Bitfield< 59, 56 > tlb
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 63 > val
Definition misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
static constexpr int AMDGPU_VM_COUNT
constexpr decltype(nullptr) NoFault
Definition types.hh:253
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
This structure represents a single, contiguous translation, or carries information about whatever fau...

Generated on Mon Jul 10 2023 14:24:30 for gem5 by doxygen 1.9.7