gem5  v22.1.0.0
amdgpu_vm.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "dev/amdgpu/amdgpu_vm.hh"
33 
35 #include "arch/amdgpu/vega/tlb.hh"
36 #include "arch/generic/mmu.hh"
37 #include "base/trace.hh"
38 #include "debug/AMDGPUDevice.hh"
40 #include "mem/packet_access.hh"
41 
42 namespace gem5
43 {
44 
46 {
47  // Zero out contexts
48  memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
49 
51  for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
52  memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
53  }
54 }
55 
56 Addr
58 {
59  return vmContext0.ptBase;
60 }
61 
62 Addr
64 {
66 }
67 
68 void
70 {
71  uint32_t value = pkt->getLE<uint32_t>();
72 
73  switch (offset) {
74  // MMHUB MMIOs
76  DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
77  pkt->setLE<uint32_t>(1);
78  break;
80  // This is only used by driver initialization and only expects an ACK
81  // for VMID 0 which is the first bit in the response.
82  DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
83  pkt->setLE<uint32_t>(1);
84  break;
86  mmhubBase = ((Addr)bits(value, 23, 0) << 24);
87  DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
88  break;
90  mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
91  DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
92  break;
93  // GRBM MMIOs
95  DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
96  pkt->setLE<uint32_t>(1);
97  break;
98  default:
99  DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
100  break;
101  }
102 }
103 
104 void
106 {
107  switch (offset) {
108  // VMID0 MMIOs
110  vmContext0.ptBaseL = pkt->getLE<uint32_t>();
111  // Clear extra bits not part of address
113  break;
115  vmContext0.ptBaseH = pkt->getLE<uint32_t>();
116  break;
118  vmContext0.ptStartL = pkt->getLE<uint32_t>();
119  break;
121  vmContext0.ptStartH = pkt->getLE<uint32_t>();
122  break;
124  vmContext0.ptEndL = pkt->getLE<uint32_t>();
125  break;
127  vmContext0.ptEndH = pkt->getLE<uint32_t>();
128  break;
129  case mmMC_VM_AGP_TOP: {
130  uint32_t val = pkt->getLE<uint32_t>();
131  vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
132  } break;
133  case mmMC_VM_AGP_BOT: {
134  uint32_t val = pkt->getLE<uint32_t>();
135  vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
136  } break;
137  case mmMC_VM_AGP_BASE: {
138  uint32_t val = pkt->getLE<uint32_t>();
139  vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
140  } break;
142  uint32_t val = pkt->getLE<uint32_t>();
143  vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
144  } break;
146  uint32_t val = pkt->getLE<uint32_t>();
147  vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
148  } break;
149  case mmMC_VM_FB_OFFSET: {
150  uint32_t val = pkt->getLE<uint32_t>();
151  vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
152  } break;
154  uint32_t val = pkt->getLE<uint32_t>();
155  vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
156  } break;
158  uint32_t val = pkt->getLE<uint32_t>();
159  vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
160  } break;
161  default:
162  break;
163  }
164 }
165 
166 void
168 {
169  DPRINTF(AMDGPUDevice, "Registered a TLB with device\n");
170  gpu_tlbs.push_back(tlb);
171 }
172 
173 void
175 {
176  DPRINTF(AMDGPUDevice, "Invalidating all TLBs\n");
177  for (auto &tlb : gpu_tlbs) {
178  tlb->invalidateAll();
179  DPRINTF(AMDGPUDevice, " ... TLB invalidated\n");
180  }
181 }
182 
183 void
185 {
186  Addr vm0PTBase = vmContext0.ptBase;
187  Addr vm0PTStart = vmContext0.ptStart;
188  Addr vm0PTEnd = vmContext0.ptEnd;
189  SERIALIZE_SCALAR(vm0PTBase);
190  SERIALIZE_SCALAR(vm0PTStart);
191  SERIALIZE_SCALAR(vm0PTEnd);
192 
201 
204 
205  Addr ptBase[AMDGPU_VM_COUNT];
206  Addr ptStart[AMDGPU_VM_COUNT];
207  Addr ptEnd[AMDGPU_VM_COUNT];
208  for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
209  ptBase[i] = vmContexts[i].ptBase;
210  ptStart[i] = vmContexts[i].ptStart;
211  ptEnd[i] = vmContexts[i].ptEnd;
212  }
216 }
217 
218 void
220 {
221  // Unserialize requires fields not be packed
222  Addr vm0PTBase;
223  Addr vm0PTStart;
224  Addr vm0PTEnd;
225  UNSERIALIZE_SCALAR(vm0PTBase);
226  UNSERIALIZE_SCALAR(vm0PTStart);
227  UNSERIALIZE_SCALAR(vm0PTEnd);
228  vmContext0.ptBase = vm0PTBase;
229  vmContext0.ptStart = vm0PTStart;
230  vmContext0.ptEnd = vm0PTEnd;
231 
240 
243 
244  Addr ptBase[AMDGPU_VM_COUNT];
245  Addr ptStart[AMDGPU_VM_COUNT];
246  Addr ptEnd[AMDGPU_VM_COUNT];
250  for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
251  vmContexts[i].ptBase = ptBase[i];
252  vmContexts[i].ptStart = ptStart[i];
253  vmContexts[i].ptEnd = ptEnd[i];
254  }
255 }
256 
257 void
259 {
260  assert(vm->inAGP(range.vaddr));
261 
262  Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
263  if (next == range.vaddr)
264  next += AMDGPU_AGP_PAGE_SIZE;
265 
266  range.size = std::min(range.size, next - range.vaddr);
267  range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
268 
269  DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
270  range.vaddr, range.paddr);
271 }
272 
273 void
275 {
276  Addr next = roundUp(range.vaddr, AMDGPU_GART_PAGE_SIZE);
277  if (next == range.vaddr)
278  next += AMDGPU_GART_PAGE_SIZE;
279  range.size = std::min(range.size, next - range.vaddr);
280 
281  Addr gart_addr = bits(range.vaddr, 63, 12);
282 
283  // This table is a bit hard to iterate over. If we cross a page, the next
284  // PTE is not necessarily the next entry but actually 7 entries away.
285  Addr lsb = bits(gart_addr, 2, 0);
286  gart_addr += lsb * 7;
287 
288  // GART is a single level translation, so the value at the "virtual" addr
289  // is the PTE containing the physical address.
290  auto result = vm->gartTable.find(gart_addr);
291  if (result == vm->gartTable.end()) {
292  // There is no reason to fault as there is no recovery mechanism for
293  // invalid GART entries. Simply panic in this case
294  warn("GART translation for %p not found", range.vaddr);
295 
296  // Some PM4 packets have register addresses which we ignore. In that
297  // case just return the vaddr rather than faulting.
298  range.paddr = range.vaddr;
299  } else {
300  Addr pte = result->second;
301  Addr lower_bits = bits(range.vaddr, 11, 0);
302  range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
303  }
304 
305  DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
306  range.vaddr, range.paddr);
307 }
308 
309 void
311 {
312  assert(vm->inMMHUB(range.vaddr));
313 
314  Addr next = roundUp(range.vaddr, AMDGPU_MMHUB_PAGE_SIZE);
315  if (next == range.vaddr)
316  next += AMDGPU_MMHUB_PAGE_SIZE;
317 
318  range.size = std::min(range.size, next - range.vaddr);
319  range.paddr = range.vaddr - vm->getMMHUBBase();
320 
321  DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
322  range.vaddr, range.paddr);
323 }
324 
325 void
327 {
328  // Get base address of the page table for this vmid
329  Addr base = vm->getPageTableBase(vmid);
330  Addr start = vm->getPageTableStart(vmid);
331  DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
332  base, start, walker);
333 
334  bool system_bit;
335  unsigned logBytes;
336  Addr paddr = range.vaddr;
337  Fault fault = walker->startFunctional(base, paddr, logBytes,
338  BaseMMU::Mode::Read, system_bit);
339  if (fault != NoFault) {
340  fatal("User translation fault");
341  }
342 
343  // GPU page size is variable. Use logBytes to determine size.
344  const Addr page_size = 1 << logBytes;
345  Addr next = roundUp(range.vaddr, page_size);
346  if (next == range.vaddr) {
347  // We don't know the size of the next page, use default.
348  next += AMDGPU_USER_PAGE_SIZE;
349  }
350 
351  // If we are not in system/host memory, change the address to the MMHUB
352  // aperture. This is mapped to the same backing memory as device memory.
353  if (!system_bit) {
354  paddr += vm->getMMHUBBase();
355  assert(vm->inMMHUB(paddr));
356  }
357 
358  range.size = std::min(range.size, next - range.vaddr);
359  range.paddr = paddr;
360 }
361 
362 } // namespace gem5
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32
Definition: amdgpu_vm.hh:58
#define mmMMHUB_VM_FB_LOCATION_BASE
Definition: amdgpu_vm.hh:74
#define mmMMHUB_VM_FB_LOCATION_TOP
Definition: amdgpu_vm.hh:75
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32
Definition: amdgpu_vm.hh:59
#define mmMC_VM_SYSTEM_APERTURE_LOW_ADDR
Definition: amdgpu_vm.hh:68
#define mmMC_VM_FB_LOCATION_BASE
Definition: amdgpu_vm.hh:63
#define mmMC_VM_AGP_BASE
Definition: amdgpu_vm.hh:67
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition: amdgpu_vm.hh:83
#define mmMC_VM_AGP_BOT
Definition: amdgpu_vm.hh:66
#define mmMMHUB_VM_INVALIDATE_ENG17_SEM
Definition: amdgpu_vm.hh:71
#define mmMC_VM_AGP_TOP
Definition: amdgpu_vm.hh:65
#define mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR
Definition: amdgpu_vm.hh:69
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32
Definition: amdgpu_vm.hh:55
#define mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32
Definition: amdgpu_vm.hh:60
#define mmMC_VM_FB_OFFSET
Definition: amdgpu_vm.hh:62
#define mmMC_VM_FB_LOCATION_TOP
Definition: amdgpu_vm.hh:64
static constexpr int AMDGPU_GART_PAGE_SIZE
Definition: amdgpu_vm.hh:82
static constexpr int AMDGPU_USER_PAGE_SIZE
Definition: amdgpu_vm.hh:86
#define mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32
Definition: amdgpu_vm.hh:57
#define mmVM_INVALIDATE_ENG17_ACK
MMIO offsets for graphics register bus manager (GRBM).
Definition: amdgpu_vm.hh:54
#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32
Definition: amdgpu_vm.hh:56
static constexpr int AMDGPU_AGP_PAGE_SIZE
Definition: amdgpu_vm.hh:81
#define mmMMHUB_VM_INVALIDATE_ENG17_ACK
Definition: amdgpu_vm.hh:73
#define DPRINTF(x,...)
Definition: trace.hh:186
Device model for an AMD GPU.
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition: amdgpu_vm.cc:258
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition: amdgpu_vm.cc:274
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition: amdgpu_vm.cc:310
void translate(Range &range) const override
Subclasses implement this function to complete TranslationGen.
Definition: amdgpu_vm.cc:326
uint64_t mmhubBase
Definition: amdgpu_vm.hh:143
std::vector< AMDGPUVMContext > vmContexts
Definition: amdgpu_vm.hh:139
void invalidateTLBs()
Definition: amdgpu_vm.cc:174
Addr getAGPBot()
Definition: amdgpu_vm.hh:182
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:177
std::vector< VegaISA::GpuTLB * > gpu_tlbs
List of TLBs associated with the GPU device.
Definition: amdgpu_vm.hh:150
void readMMIO(PacketPtr pkt, Addr offset)
Definition: amdgpu_vm.cc:69
void writeMMIO(PacketPtr pkt, Addr offset)
Definition: amdgpu_vm.cc:105
uint64_t mmhubTop
Definition: amdgpu_vm.hh:144
AMDGPUSysVMContext vmContext0
Definition: amdgpu_vm.hh:138
Addr gartBase()
Return base address of GART table in framebuffer.
Definition: amdgpu_vm.cc:57
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: amdgpu_vm.cc:219
Addr gartSize()
Return size of GART in number of PTEs.
Definition: amdgpu_vm.cc:63
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: amdgpu_vm.cc:184
Addr getAGPBase()
Definition: amdgpu_vm.hh:184
void registerTLB(VegaISA::GpuTLB *tlb)
Control methods for TLBs associated with the GPU device.
Definition: amdgpu_vm.cc:167
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
void setLE(T v)
Set the value in the data pointer to v as little endian.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
static constexpr T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:260
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition: bitfield.hh:166
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
#define warn(...)
Definition: logging.hh:246
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 0 > vm
Definition: misc_types.hh:291
Bitfield< 59, 56 > tlb
Definition: misc_types.hh:92
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Bitfield< 63 > val
Definition: misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
std::ostream CheckpointOut
Definition: serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
static constexpr int AMDGPU_VM_COUNT
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
This structure represents a single, contiguous translation, or carries information about whatever fau...

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1