gem5  v22.1.0.0
tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "arch/amdgpu/vega/tlb.hh"
33 
34 #include <cmath>
35 #include <cstring>
36 
40 #include "debug/GPUPrefetch.hh"
41 #include "debug/GPUTLB.hh"
43 
44 namespace gem5
45 {
46 namespace VegaISA
47 {
48 
49 // we have no limit for the number of translations we send
50 // downstream as we depend on the limit of the coalescer
51 // above us
52 GpuTLB::GpuTLB(const VegaGPUTLBParams &p)
53  : ClockedObject(p), walker(p.walker),
54  gpuDevice(p.gpu_device), size(p.size), stats(this),
55  cleanupEvent([this]{ cleanup(); }, name(), false,
57 {
58  assoc = p.assoc;
59  assert(assoc <= size);
60  numSets = size/assoc;
61  allocationPolicy = p.allocationPolicy;
62  hasMemSidePort = false;
63 
64  tlb.assign(size, VegaTlbEntry());
65 
66  freeList.resize(numSets);
67  entryList.resize(numSets);
68 
69  for (int set = 0; set < numSets; ++set) {
70  for (int way = 0; way < assoc; ++way) {
71  int x = set * assoc + way;
72  freeList[set].push_back(&tlb.at(x));
73  }
74  }
75 
76  FA = (size == assoc);
77  setMask = numSets - 1;
78 
79  maxCoalescedReqs = p.maxOutstandingReqs;
80 
81 
82  outstandingReqs = 0;
83  hitLatency = p.hitLatency;
84  missLatency1 = p.missLatency1;
85  missLatency2 = p.missLatency2;
86 
87  // create the response ports based on the number of connected ports
88  for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
89  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
90  name(), i), this, i));
91  }
92 
93  // create the requestor ports based on the number of connected ports
94  for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
95  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
96  name(), i), this, i));
97  }
98 
99  // assuming one walker per TLB, set our walker's TLB to this TLB.
100  walker->setTLB(this);
101 
102  // gpuDevice should be non-null in full system only and is set by GpuTLB
103  // params from the config file.
104  if (gpuDevice) {
105  gpuDevice->getVM().registerTLB(this);
106  }
107 }
108 
110 {
111 }
112 
113 Port &
114 GpuTLB::getPort(const std::string &if_name, PortID idx)
115 {
116  if (if_name == "cpu_side_ports") {
117  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
118  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
119  }
120 
121  return *cpuSidePort[idx];
122  } else if (if_name == "mem_side_ports") {
123  if (idx >= static_cast<PortID>(memSidePort.size())) {
124  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
125  }
126 
127  hasMemSidePort = true;
128 
129  return *memSidePort[idx];
130  } else {
131  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
132  }
133 }
134 
135 Fault
137 {
138  DPRINTF(GPUTLB, "GPUTLB: Raising page fault.\n");
139  ExceptionCode code;
140  if (mode == BaseMMU::Read)
142  else if (mode == BaseMMU::Write)
144  else
146  return std::make_shared<PageFault>(vaddr, code, true, mode, true);
147 }
148 
149 Addr
151 {
152  Addr pageMask = mask(VegaISA::PageShift);
153  return (vaddr & ~pageMask);
154 }
155 
156 VegaTlbEntry*
157 GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
158 {
159  VegaTlbEntry *newEntry = nullptr;
160 
161  int set = (entry.vaddr >> VegaISA::PageShift) & setMask;
162 
163  if (!freeList[set].empty()) {
164  newEntry = freeList[set].front();
165  freeList[set].pop_front();
166  } else {
167  newEntry = entryList[set].back();
168  entryList[set].pop_back();
169  }
170 
171  *newEntry = entry;
172  entryList[set].push_front(newEntry);
173 
174  DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
175  newEntry->vaddr, newEntry->paddr, entry.size(), set);
176 
177  return newEntry;
178 }
179 
180 GpuTLB::EntryList::iterator
181 GpuTLB::lookupIt(Addr va, bool update_lru)
182 {
183  int set = (va >> VegaISA::PageShift) & setMask;
184 
185  if (FA) {
186  assert(!set);
187  }
188 
189  auto entry = entryList[set].begin();
190  for (; entry != entryList[set].end(); ++entry) {
191  int page_size = (*entry)->size();
192 
193  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
194  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
195  "with size %#x.\n", va, (*entry)->vaddr, page_size);
196 
197  if (update_lru) {
198  entryList[set].push_front(*entry);
199  entryList[set].erase(entry);
200  entry = entryList[set].begin();
201  }
202 
203  break;
204  }
205  }
206 
207  return entry;
208 }
209 
210 VegaTlbEntry*
211 GpuTLB::lookup(Addr va, bool update_lru)
212 {
213  int set = (va >> VegaISA::PageShift) & setMask;
214 
215  auto entry = lookupIt(va, update_lru);
216 
217  if (entry == entryList[set].end())
218  return nullptr;
219  else
220  return *entry;
221 }
222 
223 void
225 {
226  DPRINTF(GPUTLB, "Invalidating all entries.\n");
227 
228  for (int i = 0; i < numSets; ++i) {
229  while (!entryList[i].empty()) {
230  VegaTlbEntry *entry = entryList[i].front();
231  entryList[i].pop_front();
232  freeList[i].push_back(entry);
233  }
234  }
235 }
236 
237 void
238 GpuTLB::demapPage(Addr va, uint64_t asn)
239 {
240 
241  int set = (va >> VegaISA::PageShift) & setMask;
242  auto entry = lookupIt(va, false);
243 
244  if (entry != entryList[set].end()) {
245  freeList[set].push_back(*entry);
246  entryList[set].erase(entry);
247  }
248 }
249 
250 
251 
259 VegaTlbEntry *
260 GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats)
261 {
262  Addr vaddr = req->getVaddr();
263  Addr alignedVaddr = pageAlign(vaddr);
264  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
265 
266  //update LRU stack on a hit
267  VegaTlbEntry *entry = lookup(alignedVaddr, true);
268 
269  if (!update_stats) {
270  // functional tlb access for memory initialization
271  // i.e., memory seeding or instr. seeding -> don't update
272  // TLB and stats
273  return entry;
274  }
275 
277 
278  if (!entry) {
280  } else {
282  }
283 
284  return entry;
285 }
286 
287 Walker*
289 {
290  return walker;
291 }
292 
293 
294 void
296 {
297 }
298 
299 void
301 {
302 }
303 
309 void
311 {
312  assert(pkt);
313  assert(pkt->senderState);
314 
321  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
323 
324  GpuTranslationState *sender_state =
325  safe_cast<GpuTranslationState*>(pkt->senderState);
326 
327  bool update_stats = !sender_state->isPrefetch;
328 
329  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
330  virt_page_addr);
331 
332  int req_cnt = sender_state->reqCnt.back();
333 
334  if (update_stats) {
335  stats.accessCycles -= (curCycle() * req_cnt);
337  stats.globalNumTLBAccesses += req_cnt;
338  }
339 
340  tlbOutcome lookup_outcome = TLB_MISS;
341  const RequestPtr &tmp_req = pkt->req;
342 
343  // Access the TLB and figure out if it's a hit or a miss.
344  auto entry = tlbLookup(tmp_req, update_stats);
345 
346  if (entry) {
347  lookup_outcome = TLB_HIT;
348  // Put the entry in SenderState
349  VegaTlbEntry *entry = lookup(virt_page_addr, false);
350  assert(entry);
351 
352  // Set if this is a system request
353  pkt->req->setSystemReq(entry->pte.s);
354 
355  Addr alignedPaddr = pageAlign(entry->paddr);
356  sender_state->tlbEntry =
357  new VegaTlbEntry(1 /* VMID */, virt_page_addr, alignedPaddr,
358  entry->logBytes, entry->pte);
359 
360  if (update_stats) {
361  // the reqCnt has an entry per level, so its size tells us
362  // which level we are in
363  sender_state->hitLevel = sender_state->reqCnt.size();
364  stats.globalNumTLBHits += req_cnt;
365  }
366  } else {
367  if (update_stats)
368  stats.globalNumTLBMisses += req_cnt;
369  }
370 
371  /*
372  * We now know the TLB lookup outcome (if it's a hit or a miss), as
373  * well as the TLB access latency.
374  *
375  * We create and schedule a new TLBEvent which will help us take the
376  * appropriate actions (e.g., update TLB on a hit, send request to
377  * lower level TLB on a miss, or start a page walk if this was the
378  * last-level TLB)
379  */
380  TLBEvent *tlb_event =
381  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
382 
383  if (translationReturnEvent.count(virt_page_addr)) {
384  panic("Virtual Page Address %#x already has a return event\n",
385  virt_page_addr);
386  }
387 
388  translationReturnEvent[virt_page_addr] = tlb_event;
389  assert(tlb_event);
390 
391  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
393 
394  schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
395 }
396 
398  tlbOutcome tlb_outcome, PacketPtr _pkt)
399  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
400  outcome(tlb_outcome), pkt(_pkt)
401 {
402 }
403 
408 void
409 GpuTLB::pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry * tlb_entry,
410  Mode mode)
411 {
412  // Do paging protection checks.
413  bool badWrite = (!tlb_entry->writable());
414 
415  if (mode == BaseMMU::Write && badWrite) {
416  // The page must have been present to get into the TLB in
417  // the first place. We'll assume the reserved bits are
418  // fine even though we're not checking them.
419  fatal("Page fault on addr %lx PTE=%#lx", pkt->req->getVaddr(),
420  (uint64_t)tlb_entry->pte);
421  }
422 }
423 
424 void
425 GpuTLB::walkerResponse(VegaTlbEntry& entry, PacketPtr pkt)
426 {
427  DPRINTF(GPUTLB, "WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
428  pkt->req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
429 
430  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
432 
433  Addr page_addr = entry.pte.ppn << VegaISA::PageShift;
434  Addr paddr = page_addr + (entry.vaddr & mask(entry.logBytes));
435  pkt->req->setPaddr(paddr);
436  pkt->req->setSystemReq(entry.pte.s);
437 
438  GpuTranslationState *sender_state =
439  safe_cast<GpuTranslationState*>(pkt->senderState);
440  sender_state->tlbEntry = new VegaTlbEntry(entry);
441 
442  handleTranslationReturn(virt_page_addr, TLB_MISS, pkt);
443 }
444 
450 void
452  tlbOutcome tlb_outcome, PacketPtr pkt)
453 {
454  assert(pkt);
455  Addr vaddr = pkt->req->getVaddr();
456 
457  GpuTranslationState *sender_state =
458  safe_cast<GpuTranslationState*>(pkt->senderState);
459 
460  Mode mode = sender_state->tlbMode;
461 
462  VegaTlbEntry *local_entry, *new_entry;
463 
464  int req_cnt = sender_state->reqCnt.back();
465  bool update_stats = !sender_state->isPrefetch;
466 
467  if (update_stats) {
468  stats.accessCycles += (req_cnt * curCycle());
470  }
471 
472  if (tlb_outcome == TLB_HIT) {
473  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
474  vaddr);
475  local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
476  } else {
477  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
478  vaddr);
479 
485  new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
486  assert(new_entry);
487  local_entry = new_entry;
488 
489  if (allocationPolicy) {
490  assert(new_entry->pte);
491  DPRINTF(GPUTLB, "allocating entry w/ addr %#lx of size %#lx\n",
492  virt_page_addr, new_entry->size());
493 
494  local_entry = insert(virt_page_addr, *new_entry);
495  }
496 
497  assert(local_entry);
498  }
499 
505  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
506  "while paddr was %#x.\n", local_entry->vaddr,
507  local_entry->paddr);
508 
509  pagingProtectionChecks(pkt, local_entry, mode);
510  int page_size = local_entry->size();
511  Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
512  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
513 
514  // Since this packet will be sent through the cpu side port, it must be
515  // converted to a response pkt if it is not one already
516  if (pkt->isRequest()) {
517  pkt->makeTimingResponse();
518  }
519 
520  pkt->req->setPaddr(paddr);
521 
522  if (local_entry->uncacheable()) {
523  pkt->req->setFlags(Request::UNCACHEABLE);
524  }
525 
526  //send packet back to coalescer
527  cpuSidePort[0]->sendTimingResp(pkt);
528  //schedule cleanup event
529  cleanupQueue.push(virt_page_addr);
530 
531  DPRINTF(GPUTLB, "Scheduled %#lx for cleanup\n", virt_page_addr);
532 
533  // schedule this only once per cycle.
534  // The check is required because we might have multiple translations
535  // returning the same cycle
536  // this is a maximum priority event and must be on the same cycle
537  // as the cleanup event in TLBCoalescer to avoid a race with
538  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
539  if (!cleanupEvent.scheduled())
541 }
542 
547 void
549  PacketPtr pkt)
550 {
551  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
552 
553  assert(translationReturnEvent[virtPageAddr]);
554  assert(pkt);
555 
556  GpuTranslationState *tmp_sender_state =
557  safe_cast<GpuTranslationState*>(pkt->senderState);
558 
559  int req_cnt = tmp_sender_state->reqCnt.back();
560  bool update_stats = !tmp_sender_state->isPrefetch;
561 
562 
563  if (outcome == TLB_HIT) {
564  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
565 
566  } else if (outcome == TLB_MISS) {
567 
568  DPRINTF(GPUTLB, "This is a TLB miss\n");
569  if (hasMemSidePort) {
570  // the one cyle added here represent the delay from when we get
571  // the reply back till when we propagate it to the coalescer
572  // above.
573 
579  tmp_sender_state->deviceId = 1;
580  tmp_sender_state->pasId = 0;
581 
582  if (!memSidePort[0]->sendTimingReq(pkt)) {
583  DPRINTF(GPUTLB, "Failed sending translation request to "
584  "lower level TLB for addr %#x\n", virtPageAddr);
585 
586  memSidePort[0]->retries.push_back(pkt);
587  } else {
588  DPRINTF(GPUTLB, "Sent translation request to lower level "
589  "TLB for addr %#x\n", virtPageAddr);
590  }
591  } else {
592  //this is the last level TLB. Start a page walk
593  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
594  "addr %#x\n", virtPageAddr);
595 
596  if (update_stats)
597  stats.pageTableCycles -= (req_cnt*curCycle());
598 
599  TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
600  assert(tlb_event);
601  tlb_event->updateOutcome(PAGE_WALK);
602  schedule(tlb_event,
604  }
605  } else if (outcome == PAGE_WALK) {
606  if (update_stats)
607  stats.pageTableCycles += (req_cnt*curCycle());
608 
609  // Need to access the page table and update the TLB
610  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
611  virtPageAddr);
612 
614  Addr vaddr = pkt->req->getVaddr();
616 
617  // Do page table walk
618  walker->startTiming(pkt, base, vaddr, BaseMMU::Mode::Read);
619  } else if (outcome == MISS_RETURN) {
623  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
624  } else {
625  panic("Unexpected TLB outcome %d", outcome);
626  }
627 }
628 
629 void
631 {
632  tlb->translationReturn(virtPageAddr, outcome, pkt);
633 }
634 
635 const char*
637 {
638  return "trigger translationDoneEvent";
639 }
640 
641 void
643 {
644  outcome = _outcome;
645 }
646 
647 Addr
649 {
650  return virtPageAddr;
651 }
652 
659 bool
661 {
662  bool ret = false;
663  [[maybe_unused]] Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
665 
666  if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
667  assert(!tlb->translationReturnEvent.count(virt_page_addr));
668  tlb->issueTLBLookup(pkt);
669  // update number of outstanding translation requests
670  tlb->outstandingReqs++;
671  ret = true;
672  } else {
673  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
674  tlb->outstandingReqs);
675  tlb->stats.maxDownstreamReached++;
676  ret = false;
677 
678  }
679 
680  if (tlb->outstandingReqs > tlb->stats.outstandingReqsMax.value())
681  tlb->stats.outstandingReqsMax = tlb->outstandingReqs;
682 
683  return ret;
684 }
685 
694 void
696 {
697  GpuTranslationState *sender_state =
698  safe_cast<GpuTranslationState*>(pkt->senderState);
699 
700  Mode mode = sender_state->tlbMode;
701  Addr vaddr = pkt->req->getVaddr();
702 
703  VegaTlbEntry *local_entry, *new_entry;
704 
705  if (tlb_outcome == TLB_HIT) {
706  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
707  "%#x\n", vaddr);
708 
709  local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
710  } else {
711  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
712  "%#x\n", vaddr);
713 
719  new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
720  assert(new_entry);
721  local_entry = new_entry;
722 
723  if (allocationPolicy) {
724  Addr virt_page_addr = roundDown(vaddr, VegaISA::PageBytes);
725 
726  DPRINTF(GPUTLB, "allocating entry w/ addr %#lx\n",
727  virt_page_addr);
728 
729  local_entry = insert(virt_page_addr, *new_entry);
730  }
731 
732  assert(local_entry);
733  }
734 
735  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
736  "while paddr was %#x.\n", local_entry->vaddr,
737  local_entry->paddr);
738 
750  if (!sender_state->isPrefetch && sender_state->tlbEntry)
751  pagingProtectionChecks(pkt, local_entry, mode);
752 
753  int page_size = local_entry->size();
754  Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
755  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
756 
757  pkt->req->setPaddr(paddr);
758 
759  if (local_entry->uncacheable())
760  pkt->req->setFlags(Request::UNCACHEABLE);
761 }
762 
763 // This is used for atomic translations. Need to
764 // make it all happen during the same cycle.
765 void
767 {
768  GpuTranslationState *sender_state =
769  safe_cast<GpuTranslationState*>(pkt->senderState);
770 
771  bool update_stats = !sender_state->isPrefetch;
772 
773  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
775 
776  // do the TLB lookup without updating the stats
777  bool success = tlb->tlbLookup(pkt->req, update_stats);
778  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
779 
780  // functional mode means no coalescing
781  // global metrics are the same as the local metrics
782  if (update_stats) {
783  tlb->stats.globalNumTLBAccesses++;
784 
785  if (success) {
786  sender_state->hitLevel = sender_state->reqCnt.size();
787  tlb->stats.globalNumTLBHits++;
788  } else {
789  tlb->stats.globalNumTLBMisses++;
790  }
791  }
792 
793  if (!success) {
794  if (tlb->hasMemSidePort) {
795  // there is a TLB below -> propagate down the TLB hierarchy
796  tlb->memSidePort[0]->sendFunctional(pkt);
797  // If no valid translation from a prefetch, then just return
798  if (sender_state->isPrefetch && !pkt->req->hasPaddr())
799  return;
800  } else {
801  // Need to access the page table and update the TLB
802  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
803  virt_page_addr);
804 
805  Addr vaddr = pkt->req->getVaddr();
806  [[maybe_unused]] Addr alignedVaddr =
807  tlb->pageAlign(virt_page_addr);
808  assert(alignedVaddr == virt_page_addr);
809 
810  unsigned logBytes;
811  PageTableEntry pte;
812 
813  // Initialize walker state for VMID
814  Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);
815  tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());
816 
817  // Do page table walk
818  Fault fault = tlb->walker->startFunctional(base, vaddr, pte,
819  logBytes,
820  BaseMMU::Mode::Read);
821  if (fault != NoFault) {
822  fatal("Translation fault in TLB at %d!", __LINE__);
823  }
824 
825  // PPN is already shifted by fragment so we only shift by native
826  // page size. Fragment is still used via logBytes to select lower
827  // bits from vaddr.
828  Addr page_addr = pte.ppn << PageShift;
829  Addr paddr = page_addr + (vaddr & mask(logBytes));
830  Addr alignedPaddr = tlb->pageAlign(paddr);
831  pkt->req->setPaddr(paddr);
832  pkt->req->setSystemReq(pte.s);
833 
834  if (!sender_state->isPrefetch) {
835  assert(paddr);
836 
837  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
838 
839  sender_state->tlbEntry =
840  new VegaTlbEntry(1 /* VMID */, virt_page_addr,
841  alignedPaddr, logBytes, pte);
842  } else {
843  // If this was a prefetch, then do the normal thing if it
844  // was a successful translation. Otherwise, send an empty
845  // TLB entry back so that it can be figured out as empty
846  // and handled accordingly.
847  if (paddr) {
848  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
849 
850  sender_state->tlbEntry =
851  new VegaTlbEntry(1 /* VMID */, virt_page_addr,
852  alignedPaddr, logBytes, pte);
853  } else {
854  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", vaddr);
855 
856  sender_state->tlbEntry = nullptr;
857 
858  return;
859  }
860  }
861  }
862  } else {
863  VegaTlbEntry *entry = tlb->lookup(virt_page_addr, update_stats);
864  assert(entry);
865 
866  if (sender_state->isPrefetch) {
867  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
868  entry->vaddr);
869  }
870 
871  sender_state->tlbEntry = new VegaTlbEntry(1 /* VMID */, entry->vaddr,
872  entry->paddr, entry->logBytes,
873  entry->pte);
874  }
875 
876  // This is the function that would populate pkt->req with the paddr of
877  // the translation. But if no translation happens (i.e Prefetch fails)
878  // then the early returns in the above code wiill keep this function
879  // from executing.
880  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
881 }
882 
883 void
885 {
886  // The CPUSidePort never sends anything but replies. No retries
887  // expected.
888  panic("recvReqRetry called");
889 }
890 
893 {
894  // currently not checked by the requestor
895  AddrRangeList ranges;
896 
897  return ranges;
898 }
899 
905 bool
907 {
908  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
910 
911  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
912  virt_page_addr);
913 
914  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
915  assert(tlb_event);
916  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
917 
918  tlb_event->updateOutcome(MISS_RETURN);
919  tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
920 
921  return true;
922 }
923 
924 void
926 {
927  // No retries should reach the TLB. The retries
928  // should only reach the TLBCoalescer.
929  panic("recvReqRetry called");
930 }
931 
932 void
934 {
935  while (!cleanupQueue.empty()) {
936  Addr cleanup_addr = cleanupQueue.front();
937  cleanupQueue.pop();
938 
939  DPRINTF(GPUTLB, "Deleting return event for %#lx\n", cleanup_addr);
940 
941  // delete TLBEvent
942  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
943  delete old_tlb_event;
944  translationReturnEvent.erase(cleanup_addr);
945 
946  // update number of outstanding requests
947  outstandingReqs--;
948  }
949 
953  for (int i = 0; i < cpuSidePort.size(); ++i) {
954  cpuSidePort[i]->sendRetryReq();
955  }
956 }
957 
959  : statistics::Group(parent),
960  ADD_STAT(maxDownstreamReached, "Number of refused translation requests"),
961  ADD_STAT(outstandingReqsMax, "Maximum count in coalesced request queue"),
962  ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
963  ADD_STAT(localNumTLBHits, "Number of TLB hits"),
964  ADD_STAT(localNumTLBMisses, "Number of TLB misses"),
965  ADD_STAT(localTLBMissRate, "TLB miss rate"),
966  ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),
967  ADD_STAT(globalNumTLBHits, "Number of TLB hits"),
968  ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),
969  ADD_STAT(globalTLBMissRate, "TLB miss rate"),
970  ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),
971  ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),
972  ADD_STAT(localCycles, "Number of cycles spent in queue for all "
973  "incoming reqs"),
974  ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs")
975 {
978 
980 }
981 
982 } // namespace VegaISA
983 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
AMDGPUVM & getVM()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Addr getPageTableBase(uint16_t vmid)
Definition: amdgpu_vm.hh:272
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
void makeTimingResponse()
Definition: packet.hh:1077
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
RequestPtr req
A pointer to the original request.
Definition: packet.hh:376
bool isRequest() const
Definition: packet.hh:596
Ports are used to interface objects to each other.
Definition: port.hh:62
@ UNCACHEABLE
The request is to an uncacheable address.
Definition: request.hh:125
virtual void recvReqRetry()
Definition: tlb.cc:884
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition: tlb.cc:660
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: tlb.cc:892
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: tlb.cc:766
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: tlb.cc:906
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: tlb.cc:925
const char * description() const
Return a C string describing the event.
Definition: tlb.cc:636
void updateOutcome(tlbOutcome _outcome)
Definition: tlb.cc:642
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: tlb.cc:397
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
Definition: tlb.cc:211
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: tlb.cc:181
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
Definition: tlb.cc:425
std::vector< CpuSidePort * > cpuSidePort
Definition: tlb.hh:261
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: tlb.cc:295
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: tlb.cc:300
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition: tlb.cc:310
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
Definition: tlb.cc:157
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition: tlb.cc:548
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: tlb.cc:409
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: tlb.cc:695
enum BaseMMU::Mode Mode
Definition: tlb.hh:68
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: tlb.hh:310
std::vector< MemSidePort * > memSidePort
Definition: tlb.hh:263
Walker * getWalker()
Definition: tlb.cc:288
std::vector< EntryList > freeList
Definition: tlb.hh:140
void invalidateAll()
Definition: tlb.cc:224
Walker * walker
Definition: tlb.hh:109
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: tlb.cc:114
AMDGPUDevice * gpuDevice
Definition: tlb.hh:110
Addr pageAlign(Addr vaddr)
Definition: tlb.cc:150
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition: tlb.cc:451
std::queue< Addr > cleanupQueue
Definition: tlb.hh:314
bool FA
true if this is a fully-associative TLB
Definition: tlb.hh:119
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: tlb.hh:126
EventFunctionWrapper cleanupEvent
Definition: tlb.hh:320
gem5::VegaISA::GpuTLB::VegaTLBStats stats
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
Definition: tlb.cc:260
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition: tlb.hh:149
void demapPage(Addr va, uint64_t asn)
Definition: tlb.cc:238
GpuTLB(const VegaGPUTLBParams &p)
Definition: tlb.cc:52
std::vector< VegaTlbEntry > tlb
Definition: tlb.hh:133
Fault createPagefault(Addr vaddr, Mode mode)
Definition: tlb.cc:136
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: tlb.hh:131
void setDevRequestor(RequestorID mid)
void startTiming(PacketPtr pkt, Addr base, Addr vaddr, BaseMMU::Mode mode)
Statistics container.
Definition: group.hh:94
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:241
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 12, 11 > set
Definition: misc_types.hh:709
Bitfield< 8 > va
Definition: misc_types.hh:282
Bitfield< 59, 56 > tlb
Definition: misc_types.hh:92
Bitfield< 4 > x
Definition: pagetable.hh:61
const Addr PageShift
Definition: page_size.hh:41
const Addr PageBytes
Definition: page_size.hh:42
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
std::ostream CheckpointOut
Definition: serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
statistics::Scalar localNumTLBMisses
Definition: tlb.hh:168
statistics::Formula localLatency
Definition: tlb.hh:185
statistics::Formula localTLBMissRate
Definition: tlb.hh:169
statistics::Scalar localCycles
Definition: tlb.hh:184
statistics::Scalar globalNumTLBAccesses
Definition: tlb.hh:174
statistics::Scalar accessCycles
Definition: tlb.hh:180
statistics::Scalar localNumTLBAccesses
Definition: tlb.hh:166
VegaTLBStats(statistics::Group *parent)
Definition: tlb.cc:958
statistics::Formula globalTLBMissRate
Definition: tlb.hh:177
statistics::Scalar globalNumTLBMisses
Definition: tlb.hh:176
statistics::Scalar localNumTLBHits
Definition: tlb.hh:167
statistics::Scalar pageTableCycles
Definition: tlb.hh:181
statistics::Scalar globalNumTLBHits
Definition: tlb.hh:175
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:15 for gem5 by doxygen 1.9.1