gem5  v22.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "arch/amdgpu/vega/tlb.hh"
33 
34 #include <cmath>
35 #include <cstring>
36 
40 #include "debug/GPUPrefetch.hh"
41 #include "debug/GPUTLB.hh"
43 
44 namespace gem5
45 {
46 namespace VegaISA
47 {
48 
49 // we have no limit for the number of translations we send
50 // downstream as we depend on the limit of the coalescer
51 // above us
52 GpuTLB::GpuTLB(const VegaGPUTLBParams &p)
53  : ClockedObject(p), walker(p.walker),
54  gpuDevice(p.gpu_device), size(p.size), stats(this),
55  cleanupEvent([this]{ cleanup(); }, name(), false,
57 {
58  assoc = p.assoc;
59  assert(assoc <= size);
60  numSets = size/assoc;
61  allocationPolicy = p.allocationPolicy;
62  hasMemSidePort = false;
63 
64  tlb.assign(size, VegaTlbEntry());
65 
66  freeList.resize(numSets);
67  entryList.resize(numSets);
68 
69  for (int set = 0; set < numSets; ++set) {
70  for (int way = 0; way < assoc; ++way) {
71  int x = set * assoc + way;
72  freeList[set].push_back(&tlb.at(x));
73  }
74  }
75 
76  FA = (size == assoc);
77  setMask = numSets - 1;
78 
79  maxCoalescedReqs = p.maxOutstandingReqs;
80 
81 
82  outstandingReqs = 0;
83  hitLatency = p.hitLatency;
84  missLatency1 = p.missLatency1;
85  missLatency2 = p.missLatency2;
86 
87  // create the response ports based on the number of connected ports
88  for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
89  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
90  name(), i), this, i));
91  }
92 
93  // create the requestor ports based on the number of connected ports
94  for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
95  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
96  name(), i), this, i));
97  }
98 
99  // assuming one walker per TLB, set our walker's TLB to this TLB.
100  walker->setTLB(this);
101 
102  // gpuDevice should be non-null in full system only and is set by GpuTLB
103  // params from the config file.
104  if (gpuDevice) {
105  gpuDevice->getVM().registerTLB(this);
106  }
107 }
108 
110 {
111 }
112 
113 Port &
114 GpuTLB::getPort(const std::string &if_name, PortID idx)
115 {
116  if (if_name == "cpu_side_ports") {
117  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
118  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
119  }
120 
121  return *cpuSidePort[idx];
122  } else if (if_name == "mem_side_ports") {
123  if (idx >= static_cast<PortID>(memSidePort.size())) {
124  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
125  }
126 
127  hasMemSidePort = true;
128 
129  return *memSidePort[idx];
130  } else {
131  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
132  }
133 }
134 
135 Fault
137 {
138  DPRINTF(GPUTLB, "GPUTLB: Raising page fault.\n");
139  ExceptionCode code;
140  if (mode == BaseMMU::Read)
142  else if (mode == BaseMMU::Write)
144  else
146  return std::make_shared<PageFault>(vaddr, code, true, mode, true);
147 }
148 
149 Addr
151 {
152  Addr pageMask = mask(VegaISA::PageShift);
153  return (vaddr & ~pageMask);
154 }
155 
156 VegaTlbEntry*
157 GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
158 {
159  VegaTlbEntry *newEntry = nullptr;
160 
168  Addr real_vpn = vpn & ~(entry.size() - 1);
169 
173  Addr real_ppn = entry.paddr & ~(entry.size() - 1);
174 
175  int set = (real_vpn >> VegaISA::PageShift) & setMask;
176 
177  DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
178  real_vpn, real_ppn, entry.size(), set);
179 
180  if (!freeList[set].empty()) {
181  newEntry = freeList[set].front();
182  freeList[set].pop_front();
183  } else {
184  newEntry = entryList[set].back();
185  entryList[set].pop_back();
186  }
187 
188  *newEntry = entry;
189  newEntry->vaddr = real_vpn;
190  newEntry->paddr = real_ppn;
191  entryList[set].push_front(newEntry);
192 
193  return newEntry;
194 }
195 
196 GpuTLB::EntryList::iterator
197 GpuTLB::lookupIt(Addr va, bool update_lru)
198 {
199  int set = (va >> VegaISA::PageShift) & setMask;
200 
201  if (FA) {
202  assert(!set);
203  }
204 
205  auto entry = entryList[set].begin();
206  for (; entry != entryList[set].end(); ++entry) {
207  int page_size = (*entry)->size();
208 
209  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
210  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
211  "with size %#x.\n", va, (*entry)->vaddr, page_size);
212 
213  if (update_lru) {
214  entryList[set].push_front(*entry);
215  entryList[set].erase(entry);
216  entry = entryList[set].begin();
217  }
218 
219  break;
220  }
221  }
222 
223  return entry;
224 }
225 
226 VegaTlbEntry*
227 GpuTLB::lookup(Addr va, bool update_lru)
228 {
229  int set = (va >> VegaISA::PageShift) & setMask;
230 
231  auto entry = lookupIt(va, update_lru);
232 
233  if (entry == entryList[set].end())
234  return nullptr;
235  else
236  return *entry;
237 }
238 
239 void
241 {
242  DPRINTF(GPUTLB, "Invalidating all entries.\n");
243 
244  for (int i = 0; i < numSets; ++i) {
245  while (!entryList[i].empty()) {
246  VegaTlbEntry *entry = entryList[i].front();
247  entryList[i].pop_front();
248  freeList[i].push_back(entry);
249  }
250  }
251 }
252 
253 void
254 GpuTLB::demapPage(Addr va, uint64_t asn)
255 {
256 
257  int set = (va >> VegaISA::PageShift) & setMask;
258  auto entry = lookupIt(va, false);
259 
260  if (entry != entryList[set].end()) {
261  freeList[set].push_back(*entry);
262  entryList[set].erase(entry);
263  }
264 }
265 
266 
267 
275 VegaTlbEntry *
276 GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats)
277 {
278  Addr vaddr = req->getVaddr();
279  Addr alignedVaddr = pageAlign(vaddr);
280  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
281 
282  //update LRU stack on a hit
283  VegaTlbEntry *entry = lookup(alignedVaddr, true);
284 
285  if (!update_stats) {
286  // functional tlb access for memory initialization
287  // i.e., memory seeding or instr. seeding -> don't update
288  // TLB and stats
289  return entry;
290  }
291 
293 
294  if (!entry) {
296  } else {
298  }
299 
300  return entry;
301 }
302 
303 Walker*
305 {
306  return walker;
307 }
308 
309 
310 void
312 {
313 }
314 
315 void
317 {
318 }
319 
325 void
327 {
328  assert(pkt);
329  assert(pkt->senderState);
330 
337  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
339 
340  GpuTranslationState *sender_state =
341  safe_cast<GpuTranslationState*>(pkt->senderState);
342 
343  bool update_stats = !sender_state->isPrefetch;
344 
345  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
346  virt_page_addr);
347 
348  int req_cnt = sender_state->reqCnt.back();
349 
350  if (update_stats) {
351  stats.accessCycles -= (curCycle() * req_cnt);
353  stats.globalNumTLBAccesses += req_cnt;
354  }
355 
356  tlbOutcome lookup_outcome = TLB_MISS;
357  const RequestPtr &tmp_req = pkt->req;
358 
359  // Access the TLB and figure out if it's a hit or a miss.
360  auto entry = tlbLookup(tmp_req, update_stats);
361 
362  if (entry) {
363  lookup_outcome = TLB_HIT;
364  // Put the entry in SenderState
365  VegaTlbEntry *entry = lookup(virt_page_addr, false);
366  assert(entry);
367 
368  // Set if this is a system request
369  pkt->req->setSystemReq(entry->pte.s);
370 
371  Addr alignedPaddr = pageAlign(entry->paddr);
372  sender_state->tlbEntry =
373  new VegaTlbEntry(1 /* VMID */, virt_page_addr, alignedPaddr,
374  entry->logBytes, entry->pte);
375 
376  if (update_stats) {
377  // the reqCnt has an entry per level, so its size tells us
378  // which level we are in
379  sender_state->hitLevel = sender_state->reqCnt.size();
380  stats.globalNumTLBHits += req_cnt;
381  }
382  } else {
383  if (update_stats)
384  stats.globalNumTLBMisses += req_cnt;
385  }
386 
387  /*
388  * We now know the TLB lookup outcome (if it's a hit or a miss), as
389  * well as the TLB access latency.
390  *
391  * We create and schedule a new TLBEvent which will help us take the
392  * appropriate actions (e.g., update TLB on a hit, send request to
393  * lower level TLB on a miss, or start a page walk if this was the
394  * last-level TLB)
395  */
396  TLBEvent *tlb_event =
397  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
398 
399  if (translationReturnEvent.count(virt_page_addr)) {
400  panic("Virtual Page Address %#x already has a return event\n",
401  virt_page_addr);
402  }
403 
404  translationReturnEvent[virt_page_addr] = tlb_event;
405  assert(tlb_event);
406 
407  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
409 
410  schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
411 }
412 
414  tlbOutcome tlb_outcome, PacketPtr _pkt)
415  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
416  outcome(tlb_outcome), pkt(_pkt)
417 {
418 }
419 
424 void
425 GpuTLB::pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry * tlb_entry,
426  Mode mode)
427 {
428  // Do paging protection checks.
429  bool badWrite = (!tlb_entry->writable());
430 
431  if (mode == BaseMMU::Write && badWrite) {
432  // The page must have been present to get into the TLB in
433  // the first place. We'll assume the reserved bits are
434  // fine even though we're not checking them.
435  fatal("Page fault on addr %lx PTE=%#lx", pkt->req->getVaddr(),
436  (uint64_t)tlb_entry->pte);
437  }
438 }
439 
440 void
441 GpuTLB::walkerResponse(VegaTlbEntry& entry, PacketPtr pkt)
442 {
443  DPRINTF(GPUTLB, "WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
444  pkt->req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
445 
446  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
448 
449  Addr page_addr = entry.pte.ppn << VegaISA::PageShift;
450  Addr paddr = insertBits(page_addr, entry.logBytes - 1, 0, entry.vaddr);
451  pkt->req->setPaddr(paddr);
452  pkt->req->setSystemReq(entry.pte.s);
453 
454  GpuTranslationState *sender_state =
455  safe_cast<GpuTranslationState*>(pkt->senderState);
456  sender_state->tlbEntry = new VegaTlbEntry(entry);
457 
458  handleTranslationReturn(virt_page_addr, TLB_MISS, pkt);
459 }
460 
466 void
468  tlbOutcome tlb_outcome, PacketPtr pkt)
469 {
470  assert(pkt);
471  Addr vaddr = pkt->req->getVaddr();
472 
473  GpuTranslationState *sender_state =
474  safe_cast<GpuTranslationState*>(pkt->senderState);
475 
476  Mode mode = sender_state->tlbMode;
477 
478  VegaTlbEntry *local_entry, *new_entry;
479 
480  int req_cnt = sender_state->reqCnt.back();
481  bool update_stats = !sender_state->isPrefetch;
482 
483  if (update_stats) {
484  stats.accessCycles += (req_cnt * curCycle());
486  }
487 
488  if (tlb_outcome == TLB_HIT) {
489  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
490  vaddr);
491  local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
492  } else {
493  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
494  vaddr);
495 
501  new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
502  assert(new_entry);
503  local_entry = new_entry;
504 
505  if (allocationPolicy) {
506  assert(new_entry->pte);
507  DPRINTF(GPUTLB, "allocating entry w/ addr %#lx of size %#lx\n",
508  virt_page_addr, new_entry->size());
509 
510  local_entry = insert(virt_page_addr, *new_entry);
511  }
512 
513  assert(local_entry);
514  }
515 
521  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
522  "while paddr was %#x.\n", local_entry->vaddr,
523  local_entry->paddr);
524 
525  pagingProtectionChecks(pkt, local_entry, mode);
526  int page_size = local_entry->size();
527  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
528  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
529 
530  // Since this packet will be sent through the cpu side port, it must be
531  // converted to a response pkt if it is not one already
532  if (pkt->isRequest()) {
533  pkt->makeTimingResponse();
534  }
535 
536  pkt->req->setPaddr(paddr);
537 
538  if (local_entry->uncacheable()) {
539  pkt->req->setFlags(Request::UNCACHEABLE);
540  }
541 
542  //send packet back to coalescer
543  cpuSidePort[0]->sendTimingResp(pkt);
544  //schedule cleanup event
545  cleanupQueue.push(virt_page_addr);
546 
547  DPRINTF(GPUTLB, "Scheduled %#lx for cleanup\n", virt_page_addr);
548 
549  // schedule this only once per cycle.
550  // The check is required because we might have multiple translations
551  // returning the same cycle
552  // this is a maximum priority event and must be on the same cycle
553  // as the cleanup event in TLBCoalescer to avoid a race with
554  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
555  if (!cleanupEvent.scheduled())
557 }
558 
563 void
565  PacketPtr pkt)
566 {
567  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
568 
569  assert(translationReturnEvent[virtPageAddr]);
570  assert(pkt);
571 
572  GpuTranslationState *tmp_sender_state =
573  safe_cast<GpuTranslationState*>(pkt->senderState);
574 
575  int req_cnt = tmp_sender_state->reqCnt.back();
576  bool update_stats = !tmp_sender_state->isPrefetch;
577 
578 
579  if (outcome == TLB_HIT) {
580  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
581 
582  } else if (outcome == TLB_MISS) {
583 
584  DPRINTF(GPUTLB, "This is a TLB miss\n");
585  if (hasMemSidePort) {
586  // the one cyle added here represent the delay from when we get
587  // the reply back till when we propagate it to the coalescer
588  // above.
589 
595  tmp_sender_state->deviceId = 1;
596  tmp_sender_state->pasId = 0;
597 
598  if (!memSidePort[0]->sendTimingReq(pkt)) {
599  DPRINTF(GPUTLB, "Failed sending translation request to "
600  "lower level TLB for addr %#x\n", virtPageAddr);
601 
602  memSidePort[0]->retries.push_back(pkt);
603  } else {
604  DPRINTF(GPUTLB, "Sent translation request to lower level "
605  "TLB for addr %#x\n", virtPageAddr);
606  }
607  } else {
608  //this is the last level TLB. Start a page walk
609  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
610  "addr %#x\n", virtPageAddr);
611 
612  if (update_stats)
613  stats.pageTableCycles -= (req_cnt*curCycle());
614 
615  TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
616  assert(tlb_event);
617  tlb_event->updateOutcome(PAGE_WALK);
618  schedule(tlb_event,
620  }
621  } else if (outcome == PAGE_WALK) {
622  if (update_stats)
623  stats.pageTableCycles += (req_cnt*curCycle());
624 
625  // Need to access the page table and update the TLB
626  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
627  virtPageAddr);
628 
630  Addr vaddr = pkt->req->getVaddr();
632 
633  // Do page table walk
634  walker->startTiming(pkt, base, vaddr, BaseMMU::Mode::Read);
635  } else if (outcome == MISS_RETURN) {
639  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
640  } else {
641  panic("Unexpected TLB outcome %d", outcome);
642  }
643 }
644 
645 void
647 {
648  tlb->translationReturn(virtPageAddr, outcome, pkt);
649 }
650 
651 const char*
653 {
654  return "trigger translationDoneEvent";
655 }
656 
657 void
659 {
660  outcome = _outcome;
661 }
662 
663 Addr
665 {
666  return virtPageAddr;
667 }
668 
675 bool
677 {
678  bool ret = false;
679  [[maybe_unused]] Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
681 
682  if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
683  assert(!tlb->translationReturnEvent.count(virt_page_addr));
684  tlb->issueTLBLookup(pkt);
685  // update number of outstanding translation requests
686  tlb->outstandingReqs++;
687  ret = true;
688  } else {
689  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
690  tlb->outstandingReqs);
691  tlb->stats.maxDownstreamReached++;
692  ret = false;
693 
694  }
695 
696  if (tlb->outstandingReqs > tlb->stats.outstandingReqsMax.value())
697  tlb->stats.outstandingReqsMax = tlb->outstandingReqs;
698 
699  return ret;
700 }
701 
710 void
712 {
713  GpuTranslationState *sender_state =
714  safe_cast<GpuTranslationState*>(pkt->senderState);
715 
716  Mode mode = sender_state->tlbMode;
717  Addr vaddr = pkt->req->getVaddr();
718 
719  VegaTlbEntry *local_entry, *new_entry;
720 
721  if (tlb_outcome == TLB_HIT) {
722  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
723  "%#x\n", vaddr);
724 
725  local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
726  } else {
727  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
728  "%#x\n", vaddr);
729 
735  new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
736  assert(new_entry);
737  local_entry = new_entry;
738 
739  if (allocationPolicy) {
740  Addr virt_page_addr = roundDown(vaddr, VegaISA::PageBytes);
741 
742  DPRINTF(GPUTLB, "allocating entry w/ addr %#lx\n",
743  virt_page_addr);
744 
745  local_entry = insert(virt_page_addr, *new_entry);
746  }
747 
748  assert(local_entry);
749  }
750 
751  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
752  "while paddr was %#x.\n", local_entry->vaddr,
753  local_entry->paddr);
754 
766  if (!sender_state->isPrefetch && sender_state->tlbEntry)
767  pagingProtectionChecks(pkt, local_entry, mode);
768 
769  int page_size = local_entry->size();
770  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
771  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
772 
773  pkt->req->setPaddr(paddr);
774 
775  if (local_entry->uncacheable())
776  pkt->req->setFlags(Request::UNCACHEABLE);
777 }
778 
779 // This is used for atomic translations. Need to
780 // make it all happen during the same cycle.
781 void
783 {
784  GpuTranslationState *sender_state =
785  safe_cast<GpuTranslationState*>(pkt->senderState);
786 
787  bool update_stats = !sender_state->isPrefetch;
788 
789  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
791 
792  // do the TLB lookup without updating the stats
793  bool success = tlb->tlbLookup(pkt->req, update_stats);
794  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
795 
796  // functional mode means no coalescing
797  // global metrics are the same as the local metrics
798  if (update_stats) {
799  tlb->stats.globalNumTLBAccesses++;
800 
801  if (success) {
802  sender_state->hitLevel = sender_state->reqCnt.size();
803  tlb->stats.globalNumTLBHits++;
804  } else {
805  tlb->stats.globalNumTLBMisses++;
806  }
807  }
808 
809  if (!success) {
810  if (tlb->hasMemSidePort) {
811  // there is a TLB below -> propagate down the TLB hierarchy
812  tlb->memSidePort[0]->sendFunctional(pkt);
813  // If no valid translation from a prefetch, then just return
814  if (sender_state->isPrefetch && !pkt->req->hasPaddr())
815  return;
816  } else {
817  // Need to access the page table and update the TLB
818  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
819  virt_page_addr);
820 
821  Addr vaddr = pkt->req->getVaddr();
822  [[maybe_unused]] Addr alignedVaddr =
823  tlb->pageAlign(virt_page_addr);
824  assert(alignedVaddr == virt_page_addr);
825 
826  unsigned logBytes;
827  PageTableEntry pte;
828 
829  // Initialize walker state for VMID
830  Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);
831  tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());
832 
833  // Do page table walk
834  Fault fault = tlb->walker->startFunctional(base, vaddr, pte,
835  logBytes,
836  BaseMMU::Mode::Read);
837  if (fault != NoFault) {
838  fatal("Translation fault in TLB at %d!", __LINE__);
839  }
840 
841  // PPN is already shifted by fragment so we only shift by native
842  // page size. Fragment is still used via logBytes to select lower
843  // bits from vaddr.
844  Addr page_addr = pte.ppn << PageShift;
845  Addr paddr = insertBits(page_addr, logBytes - 1, 0, vaddr);
846  Addr alignedPaddr = tlb->pageAlign(paddr);
847  pkt->req->setPaddr(paddr);
848  pkt->req->setSystemReq(pte.s);
849 
850  if (!sender_state->isPrefetch) {
851  assert(paddr);
852 
853  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
854 
855  sender_state->tlbEntry =
856  new VegaTlbEntry(1 /* VMID */, virt_page_addr,
857  alignedPaddr, logBytes, pte);
858  } else {
859  // If this was a prefetch, then do the normal thing if it
860  // was a successful translation. Otherwise, send an empty
861  // TLB entry back so that it can be figured out as empty
862  // and handled accordingly.
863  if (paddr) {
864  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
865 
866  sender_state->tlbEntry =
867  new VegaTlbEntry(1 /* VMID */, virt_page_addr,
868  alignedPaddr, logBytes, pte);
869  } else {
870  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", vaddr);
871 
872  sender_state->tlbEntry = nullptr;
873 
874  return;
875  }
876  }
877  }
878  } else {
879  VegaTlbEntry *entry = tlb->lookup(virt_page_addr, update_stats);
880  assert(entry);
881 
882  if (sender_state->isPrefetch) {
883  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
884  entry->vaddr);
885  }
886 
887  sender_state->tlbEntry = new VegaTlbEntry(1 /* VMID */, entry->vaddr,
888  entry->paddr, entry->logBytes,
889  entry->pte);
890  }
891 
892  // This is the function that would populate pkt->req with the paddr of
893  // the translation. But if no translation happens (i.e Prefetch fails)
894  // then the early returns in the above code wiill keep this function
895  // from executing.
896  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
897 }
898 
899 void
901 {
902  // The CPUSidePort never sends anything but replies. No retries
903  // expected.
904  panic("recvReqRetry called");
905 }
906 
909 {
910  // currently not checked by the requestor
911  AddrRangeList ranges;
912 
913  return ranges;
914 }
915 
921 bool
923 {
924  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
926 
927  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
928  virt_page_addr);
929 
930  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
931  assert(tlb_event);
932  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
933 
934  tlb_event->updateOutcome(MISS_RETURN);
935  tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
936 
937  return true;
938 }
939 
940 void
942 {
943  // No retries should reach the TLB. The retries
944  // should only reach the TLBCoalescer.
945  panic("recvReqRetry called");
946 }
947 
948 void
950 {
951  while (!cleanupQueue.empty()) {
952  Addr cleanup_addr = cleanupQueue.front();
953  cleanupQueue.pop();
954 
955  DPRINTF(GPUTLB, "Deleting return event for %#lx\n", cleanup_addr);
956 
957  // delete TLBEvent
958  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
959  delete old_tlb_event;
960  translationReturnEvent.erase(cleanup_addr);
961 
962  // update number of outstanding requests
963  outstandingReqs--;
964  }
965 
969  for (int i = 0; i < cpuSidePort.size(); ++i) {
970  cpuSidePort[i]->sendRetryReq();
971  }
972 }
973 
975  : statistics::Group(parent),
976  ADD_STAT(maxDownstreamReached, "Number of refused translation requests"),
977  ADD_STAT(outstandingReqsMax, "Maximum count in coalesced request queue"),
978  ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
979  ADD_STAT(localNumTLBHits, "Number of TLB hits"),
980  ADD_STAT(localNumTLBMisses, "Number of TLB misses"),
981  ADD_STAT(localTLBMissRate, "TLB miss rate"),
982  ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),
983  ADD_STAT(globalNumTLBHits, "Number of TLB hits"),
984  ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),
985  ADD_STAT(globalTLBMissRate, "TLB miss rate"),
986  ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),
987  ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),
988  ADD_STAT(localCycles, "Number of cycles spent in queue for all "
989  "incoming reqs"),
990  ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs")
991 {
994 
996 }
997 
998 } // namespace VegaISA
999 } // namespace gem5
gem5::VegaISA::GpuTLB::VegaTLBStats::pageTableCycles
statistics::Scalar pageTableCycles
Definition: tlb.hh:181
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
gem5::ArmISA::tlb
Bitfield< 59, 56 > tlb
Definition: misc_types.hh:92
gem5::VegaISA::GpuTLB::tlbLookup
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
Definition: tlb.cc:276
gem5::BaseMMU::Read
@ Read
Definition: mmu.hh:56
gem5::GpuTranslationState::deviceId
int deviceId
Definition: gpu_translation_state.hh:65
gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
gem5::Packet::isRequest
bool isRequest() const
Definition: packet.hh:594
gem5::VegaISA::GpuTLB::cpuSidePort
std::vector< CpuSidePort * > cpuSidePort
Definition: tlb.hh:261
gem5::GpuTranslationState::pasId
int pasId
Definition: gpu_translation_state.hh:66
gem5::Clocked::curCycle
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Definition: clocked_object.hh:195
gem5::VegaISA::GpuTLB::cleanupQueue
std::queue< Addr > cleanupQueue
Definition: tlb.hh:314
gem5::GpuTranslationState
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition: gpu_translation_state.hh:58
gem5::VegaISA::PageShift
const Addr PageShift
Definition: page_size.hh:41
gem5::VegaISA::GpuTLB::freeList
std::vector< EntryList > freeList
Definition: tlb.hh:140
gem5::VegaISA::GpuTLB::issueTLBLookup
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition: tlb.cc:326
gem5::VegaISA::GpuTLB::lookup
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
Definition: tlb.cc:227
gem5::VegaISA::GpuTLB
Definition: tlb.hh:62
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:374
gem5::BaseMMU::Write
@ Write
Definition: mmu.hh:56
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::ArmISA::set
Bitfield< 12, 11 > set
Definition: misc_types.hh:703
gem5::VegaISA::GpuTLB::cleanup
void cleanup()
Definition: tlb.cc:949
gem5::VegaISA::GpuTLB::pageAlign
Addr pageAlign(Addr vaddr)
Definition: tlb.cc:150
gem5::VegaISA::GpuTLB::FA
bool FA
true if this is a fully-associative TLB
Definition: tlb.hh:119
gem5::VegaISA::GpuTLB::VegaTLBStats::localCycles
statistics::Scalar localCycles
Definition: tlb.hh:184
gem5::VegaISA::GpuTLB::GpuTLB
GpuTLB(const VegaGPUTLBParams &p)
Definition: tlb.cc:52
gem5::VegaISA::GpuTLB::lookupIt
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: tlb.cc:197
gem5::VegaISA::GpuTLB::insert
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
Definition: tlb.cc:157
gem5::VegaISA::GpuTLB::CpuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition: tlb.cc:676
gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBHits
statistics::Scalar localNumTLBHits
Definition: tlb.hh:167
gem5::VegaISA::INST_PAGE
@ INST_PAGE
Definition: faults.hh:47
gem5::VegaISA::Walker::startTiming
void startTiming(PacketPtr pkt, Addr base, Addr vaddr, BaseMMU::Mode mode)
Definition: pagetable_walker.cc:109
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:167
gem5::VegaISA::GpuTLB::hitLatency
int hitLatency
Definition: tlb.hh:153
gem5::VegaISA::GpuTLB::TLB_HIT
@ TLB_HIT
Definition: tlb.hh:195
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBAccesses
statistics::Scalar globalNumTLBAccesses
Definition: tlb.hh:174
gem5::VegaISA::GpuTLB::~GpuTLB
~GpuTLB()
Definition: tlb.cc:109
gem5::VegaISA::GpuTLB::walkerResponse
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
Definition: tlb.cc:441
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
tlb.hh
gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBHits
statistics::Scalar globalNumTLBHits
Definition: tlb.hh:175
gem5::VegaISA::GpuTLB::PAGE_WALK
@ PAGE_WALK
Definition: tlb.hh:195
gem5::mask
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
gem5::VegaISA::GpuTLB::entryList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition: tlb.hh:149
gem5::VegaISA::GpuTLB::Mode
enum BaseMMU::Mode Mode
Definition: tlb.hh:68
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::VegaISA::GpuTLB::demapPage
void demapPage(Addr va, uint64_t asn)
Definition: tlb.cc:254
gem5::VegaISA::GpuTLB::TLBEvent::description
const char * description() const
Return a C string describing the event.
Definition: tlb.cc:652
gem5::VegaISA::GpuTLB::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: tlb.cc:114
gem5::VegaISA::GpuTLB::setMask
Addr setMask
Definition: tlb.hh:120
gem5::VegaISA::GpuTLB::allocationPolicy
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: tlb.hh:126
gem5::VegaISA::GpuTLB::TLB_MISS
@ TLB_MISS
Definition: tlb.hh:195
gem5::Request::UNCACHEABLE
@ UNCACHEABLE
The request is to an uncacheable address.
Definition: request.hh:125
gem5::VegaISA::GpuTLB::TLBEvent::updateOutcome
void updateOutcome(tlbOutcome _outcome)
Definition: tlb.cc:658
gem5::VegaISA::GpuTLB::CpuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: tlb.cc:782
gem5::VegaISA::GpuTLB::hasMemSidePort
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: tlb.hh:131
gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::VegaISA::GpuTLB::translationReturnEvent
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: tlb.hh:310
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::VegaISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:42
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
amdgpu_device.hh
gem5::VegaISA::GpuTLB::VegaTLBStats::globalTLBMissRate
statistics::Formula globalTLBMissRate
Definition: tlb.hh:177
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::VegaISA::GpuTLB::VegaTLBStats::accessCycles
statistics::Scalar accessCycles
Definition: tlb.hh:180
gem5::VegaISA::GpuTLB::CpuSidePort::recvReqRetry
virtual void recvReqRetry()
Definition: tlb.cc:900
gem5::VegaISA::GpuTLB::stats
gem5::VegaISA::GpuTLB::VegaTLBStats stats
gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBMisses
statistics::Scalar localNumTLBMisses
Definition: tlb.hh:168
gem5::VegaISA::x
Bitfield< 4 > x
Definition: pagetable.hh:61
gem5::EventBase::Maximum_Pri
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:241
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
pagetable_walker.hh
gem5::VegaISA::GpuTLB::VegaTLBStats::localLatency
statistics::Formula localLatency
Definition: tlb.hh:185
gem5::GpuTranslationState::tlbEntry
Serializable * tlbEntry
Definition: gpu_translation_state.hh:73
gem5::VegaISA::GpuTLB::TLBEvent::TLBEvent
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: tlb.cc:413
gem5::VegaISA::GpuTLB::MemSidePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: tlb.cc:922
gem5::SparcISA::PageTableEntry
Definition: pagetable.hh:68
gem5::GpuTranslationState::hitLevel
int hitLevel
Definition: gpu_translation_state.hh:85
gem5::VegaISA::GpuTLB::VegaTLBStats::globalNumTLBMisses
statistics::Scalar globalNumTLBMisses
Definition: tlb.hh:176
gem5::insertBits
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition: bitfield.hh:166
gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
gpu_translation_state.hh
gem5::VegaISA::GpuTLB::missLatency2
int missLatency2
Definition: tlb.hh:155
gem5::VegaISA::GpuTLB::getWalker
Walker * getWalker()
Definition: tlb.cc:304
gem5::GpuTranslationState::reqCnt
std::vector< int > reqCnt
Definition: gpu_translation_state.hh:83
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::VegaISA::GpuTLB::TLBEvent::getTLBEventVaddr
Addr getTLBEventVaddr()
Definition: tlb.cc:664
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:542
name
const std::string & name()
Definition: trace.cc:49
gem5::ArmISA::va
Bitfield< 8 > va
Definition: misc_types.hh:276
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::VegaISA::GpuTLB::TLBEvent
Definition: tlb.hh:287
gem5::AMDGPUVM::getPageTableBase
Addr getPageTableBase(uint16_t vmid)
Definition: amdgpu_vm.hh:272
gem5::VegaISA::GpuTLB::handleTranslationReturn
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition: tlb.cc:467
gem5::VegaISA::GpuTLB::createPagefault
Fault createPagefault(Addr vaddr, Mode mode)
Definition: tlb.cc:136
gem5::VegaISA::LOAD_PAGE
@ LOAD_PAGE
Definition: faults.hh:48
gem5::VegaISA::ExceptionCode
ExceptionCode
Definition: faults.hh:45
gem5::Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:1062
gem5::VegaISA::GpuTLB::outstandingReqs
int outstandingReqs
Definition: tlb.hh:275
gem5::VegaISA::GpuTLB::TLBEvent::process
void process()
Definition: tlb.cc:646
gem5::VegaISA::GpuTLB::MISS_RETURN
@ MISS_RETURN
Definition: tlb.hh:195
faults.hh
gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:61
gem5::VegaISA::GpuTLB::CpuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: tlb.cc:908
gem5::GpuTranslationState::tlbMode
BaseMMU::Mode tlbMode
Definition: gpu_translation_state.hh:61
gem5::VegaISA::GpuTLB::pagingProtectionChecks
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: tlb.cc:425
gem5::VegaISA::GpuTLB::handleFuncTranslationReturn
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: tlb.cc:711
gem5::VegaISA::GpuTLB::gpuDevice
AMDGPUDevice * gpuDevice
Definition: tlb.hh:110
gem5::VegaISA::GpuTLB::cleanupEvent
EventFunctionWrapper cleanupEvent
Definition: tlb.hh:320
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::VegaISA::GpuTLB::memSidePort
std::vector< MemSidePort * > memSidePort
Definition: tlb.hh:263
gem5::VegaISA::GpuTLB::tlb
std::vector< VegaTlbEntry > tlb
Definition: tlb.hh:133
gem5::VegaISA::GpuTLB::serialize
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: tlb.cc:311
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
gem5::VegaISA::STORE_PAGE
@ STORE_PAGE
Definition: faults.hh:49
gem5::GpuTranslationState::isPrefetch
bool isPrefetch
Definition: gpu_translation_state.hh:75
gem5::VegaISA::GpuTLB::unserialize
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: tlb.cc:316
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::VegaISA::GpuTLB::translationReturn
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition: tlb.cc:564
std::list< AddrRange >
gem5::VegaISA::Walker::setDevRequestor
void setDevRequestor(RequestorID mid)
Definition: pagetable_walker.hh:162
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::VegaISA::GpuTLB::MemSidePort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: tlb.cc:941
gem5::VegaISA::GpuTLB::VegaTLBStats::localNumTLBAccesses
statistics::Scalar localNumTLBAccesses
Definition: tlb.hh:166
gem5::VegaISA::GpuTLB::tlbOutcome
tlbOutcome
Definition: tlb.hh:195
gem5::VegaISA::GpuTLB::invalidateAll
void invalidateAll()
Definition: tlb.cc:240
gem5::AMDGPUDevice::vramRequestorId
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Definition: amdgpu_device.hh:187
gem5::VegaISA::GpuTLB::walker
Walker * walker
Definition: tlb.hh:109
gem5::VegaISA::GpuTLB::VegaTLBStats::VegaTLBStats
VegaTLBStats(statistics::Group *parent)
Definition: tlb.cc:974
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
gem5::VegaISA::Walker
Definition: pagetable_walker.hh:54
gem5::VegaISA::GpuTLB::VegaTLBStats::localTLBMissRate
statistics::Formula localTLBMissRate
Definition: tlb.hh:169
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
gem5::VegaISA::GpuTLB::numSets
int numSets
Definition: tlb.hh:114

Generated on Thu Jun 16 2022 10:41:09 for gem5 by doxygen 1.8.17