gem5  v22.0.0.2
tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
34 
35 #include <cmath>
36 #include <cstring>
37 
39 #include "arch/x86/faults.hh"
41 #include "arch/x86/page_size.hh"
42 #include "arch/x86/pagetable.hh"
44 #include "arch/x86/regs/misc.hh"
45 #include "arch/x86/regs/msr.hh"
46 #include "arch/x86/regs/segment.hh"
47 #include "arch/x86/x86_traits.hh"
48 #include "base/bitfield.hh"
49 #include "base/logging.hh"
50 #include "base/output.hh"
51 #include "base/trace.hh"
52 #include "cpu/base.hh"
53 #include "cpu/thread_context.hh"
54 #include "debug/GPUPrefetch.hh"
55 #include "debug/GPUTLB.hh"
56 #include "mem/packet_access.hh"
57 #include "mem/page_table.hh"
58 #include "mem/request.hh"
59 #include "sim/process.hh"
60 #include "sim/pseudo_inst.hh"
61 
62 namespace gem5
63 {
64 namespace X86ISA
65 {
66 
68  : ClockedObject(p), configAddress(0), size(p.size),
69  cleanupEvent([this]{ cleanup(); }, name(), false,
71  exitEvent([this]{ exitCallback(); }, name()), stats(this)
72  {
73  assoc = p.assoc;
74  assert(assoc <= size);
75  numSets = size/assoc;
76  allocationPolicy = p.allocationPolicy;
77  hasMemSidePort = false;
78  accessDistance = p.accessDistance;
79 
80  tlb.assign(size, TlbEntry());
81 
82  freeList.resize(numSets);
83  entryList.resize(numSets);
84 
85  for (int set = 0; set < numSets; ++set) {
86  for (int way = 0; way < assoc; ++way) {
87  int x = set * assoc + way;
88  freeList[set].push_back(&tlb.at(x));
89  }
90  }
91 
92  FA = (size == assoc);
93 
102  setMask = numSets - 1;
103 
104  maxCoalescedReqs = p.maxOutstandingReqs;
105 
106  // Do not allow maxCoalescedReqs to be more than the TLB associativity
107  if (maxCoalescedReqs > assoc) {
109  cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
110  }
111 
112  outstandingReqs = 0;
113  hitLatency = p.hitLatency;
114  missLatency1 = p.missLatency1;
115  missLatency2 = p.missLatency2;
116 
117  // create the response ports based on the number of connected ports
118  for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
119  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
120  name(), i), this, i));
121  }
122 
123  // create the request ports based on the number of connected ports
124  for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
125  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
126  name(), i), this, i));
127  }
128  }
129 
130  // fixme: this is never called?
132  {
133  // make sure all the hash-maps are empty
134  assert(translationReturnEvent.empty());
135  }
136 
137  Port &
138  GpuTLB::getPort(const std::string &if_name, PortID idx)
139  {
140  if (if_name == "cpu_side_ports") {
141  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
142  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
143  }
144 
145  return *cpuSidePort[idx];
146  } else if (if_name == "mem_side_ports") {
147  if (idx >= static_cast<PortID>(memSidePort.size())) {
148  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
149  }
150 
151  hasMemSidePort = true;
152 
153  return *memSidePort[idx];
154  } else {
155  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
156  }
157  }
158 
159  TlbEntry*
161  {
162  TlbEntry *newEntry = nullptr;
163 
168  int set = (vpn >> PageShift) & setMask;
169 
170  if (!freeList[set].empty()) {
171  newEntry = freeList[set].front();
172  freeList[set].pop_front();
173  } else {
174  newEntry = entryList[set].back();
175  entryList[set].pop_back();
176  }
177 
178  *newEntry = entry;
179  newEntry->vaddr = vpn;
180  entryList[set].push_front(newEntry);
181 
182  return newEntry;
183  }
184 
185  GpuTLB::EntryList::iterator
186  GpuTLB::lookupIt(Addr va, bool update_lru)
187  {
188  int set = (va >> PageShift) & setMask;
189 
190  if (FA) {
191  assert(!set);
192  }
193 
194  auto entry = entryList[set].begin();
195  for (; entry != entryList[set].end(); ++entry) {
196  int page_size = (*entry)->size();
197 
198  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
199  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
200  "with size %#x.\n", va, (*entry)->vaddr, page_size);
201 
202  if (update_lru) {
203  entryList[set].push_front(*entry);
204  entryList[set].erase(entry);
205  entry = entryList[set].begin();
206  }
207 
208  break;
209  }
210  }
211 
212  return entry;
213  }
214 
215  TlbEntry*
216  GpuTLB::lookup(Addr va, bool update_lru)
217  {
218  int set = (va >> PageShift) & setMask;
219 
220  auto entry = lookupIt(va, update_lru);
221 
222  if (entry == entryList[set].end())
223  return nullptr;
224  else
225  return *entry;
226  }
227 
228  void
230  {
231  DPRINTF(GPUTLB, "Invalidating all entries.\n");
232 
233  for (int i = 0; i < numSets; ++i) {
234  while (!entryList[i].empty()) {
235  TlbEntry *entry = entryList[i].front();
236  entryList[i].pop_front();
237  freeList[i].push_back(entry);
238  }
239  }
240  }
241 
242  void
244  {
246  }
247 
248  void
250  {
251  DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
252 
253  for (int i = 0; i < numSets; ++i) {
254  for (auto entryIt = entryList[i].begin();
255  entryIt != entryList[i].end();) {
256  if (!(*entryIt)->global) {
257  freeList[i].push_back(*entryIt);
258  entryList[i].erase(entryIt++);
259  } else {
260  ++entryIt;
261  }
262  }
263  }
264  }
265 
266  void
267  GpuTLB::demapPage(Addr va, uint64_t asn)
268  {
269 
270  int set = (va >> PageShift) & setMask;
271  auto entry = lookupIt(va, false);
272 
273  if (entry != entryList[set].end()) {
274  freeList[set].push_back(*entry);
275  entryList[set].erase(entry);
276  }
277  }
278 
279 
280 
281  namespace
282  {
283 
284  Cycles
285  localMiscRegAccess(bool read, RegIndex regNum,
286  ThreadContext *tc, PacketPtr pkt)
287  {
288  if (read) {
289  RegVal data = htole(tc->readMiscReg(regNum));
290  // Make sure we don't trot off the end of data.
291  pkt->setData((uint8_t *)&data);
292  } else {
293  RegVal data = htole(tc->readMiscRegNoEffect(regNum));
294  tc->setMiscReg(regNum, letoh(data));
295  }
296  return Cycles(1);
297  }
298 
299  } // anonymous namespace
300 
301  Fault
302  GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
303  {
304  DPRINTF(GPUTLB, "Addresses references internal memory.\n");
305  Addr vaddr = req->getVaddr();
306  Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
307 
308  if (prefix == IntAddrPrefixCPUID) {
309  panic("CPUID memory space not yet implemented!\n");
310  } else if (prefix == IntAddrPrefixMSR) {
311  vaddr = (vaddr >> 3) & ~IntAddrPrefixMask;
312 
313  RegIndex regNum;
314  if (!msrAddrToIndex(regNum, vaddr))
315  return std::make_shared<GeneralProtection>(0);
316 
317  req->setLocalAccessor(
318  [read, regNum](ThreadContext *tc, PacketPtr pkt)
319  {
320  return localMiscRegAccess(read, regNum, tc, pkt);
321  }
322  );
323 
324  return NoFault;
325  } else if (prefix == IntAddrPrefixIO) {
326  // TODO If CPL > IOPL or in virtual mode, check the I/O permission
327  // bitmap in the TSS.
328 
329  Addr IOPort = vaddr & ~IntAddrPrefixMask;
330  // Make sure the address fits in the expected 16 bit IO address
331  // space.
332  assert(!(IOPort & ~0xFFFF));
333  if (IOPort == 0xCF8 && req->getSize() == 4) {
334  req->setLocalAccessor(
335  [read](ThreadContext *tc, PacketPtr pkt)
336  {
337  return localMiscRegAccess(
338  read, misc_reg::PciConfigAddress, tc, pkt);
339  }
340  );
341  } else if ((IOPort & ~mask(2)) == 0xCFC) {
345  if (bits(configAddress, 31, 31)) {
346  req->setPaddr(PhysAddrPrefixPciConfig |
347  mbits(configAddress, 30, 2) |
348  (IOPort & mask(2)));
349  } else {
350  req->setPaddr(PhysAddrPrefixIO | IOPort);
351  }
352  } else {
354  req->setPaddr(PhysAddrPrefixIO | IOPort);
355  }
356  return NoFault;
357  } else {
358  panic("Access to unrecognized internal address space %#x.\n",
359  prefix);
360  }
361  }
362 
370  bool
372  ThreadContext *tc, bool update_stats)
373  {
374  bool tlb_hit = false;
375  #ifndef NDEBUG
376  uint32_t flags = req->getFlags();
377  int seg = flags & SegmentFlagMask;
378  #endif
379 
380  assert(seg != segment_idx::Ms);
381  Addr vaddr = req->getVaddr();
382  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
383  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(misc_reg::M5Reg);
384 
385  if (m5Reg.prot) {
386  DPRINTF(GPUTLB, "In protected mode.\n");
387  // make sure we are in 64-bit mode
388  assert(m5Reg.mode == LongMode);
389 
390  // If paging is enabled, do the translation.
391  if (m5Reg.paging) {
392  DPRINTF(GPUTLB, "Paging enabled.\n");
393  //update LRU stack on a hit
394  TlbEntry *entry = lookup(vaddr, true);
395 
396  if (entry)
397  tlb_hit = true;
398 
399  if (!update_stats) {
400  // functional tlb access for memory initialization
401  // i.e., memory seeding or instr. seeding -> don't update
402  // TLB and stats
403  return tlb_hit;
404  }
405 
407 
408  if (!entry) {
410  } else {
412  }
413  }
414  }
415 
416  return tlb_hit;
417  }
418 
419  Fault
421  Translation *translation, Mode mode,
422  bool &delayedResponse, bool timing, int &latency)
423  {
424  uint32_t flags = req->getFlags();
425  int seg = flags & SegmentFlagMask;
426  bool storeCheck = flags & Request::READ_MODIFY_WRITE;
427 
428  // If this is true, we're dealing with a request
429  // to a non-memory address space.
430  if (seg == segment_idx::Ms) {
431  return translateInt(mode == Mode::Read, req, tc);
432  }
433 
434  delayedResponse = false;
435  Addr vaddr = req->getVaddr();
436  DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
437 
438  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(misc_reg::M5Reg);
439 
440  // If protected mode has been enabled...
441  if (m5Reg.prot) {
442  DPRINTF(GPUTLB, "In protected mode.\n");
443  // If we're not in 64-bit mode, do protection/limit checks
444  if (m5Reg.mode != LongMode) {
445  DPRINTF(GPUTLB, "Not in long mode. Checking segment "
446  "protection.\n");
447 
448  // Check for a null segment selector.
449  if (!(seg == segment_idx::Tsg || seg == segment_idx::Idtr ||
452  return std::make_shared<GeneralProtection>(0);
453  }
454 
455  bool expandDown = false;
457 
458  if (seg >= segment_idx::Es && seg <= segment_idx::Hs) {
459  if (!attr.writable && (mode == BaseMMU::Write ||
460  storeCheck))
461  return std::make_shared<GeneralProtection>(0);
462 
463  if (!attr.readable && mode == BaseMMU::Read)
464  return std::make_shared<GeneralProtection>(0);
465 
466  expandDown = attr.expandDown;
467 
468  }
469 
472  Addr logSize = (flags >> AddrSizeFlagShift) & AddrSizeFlagMask;
473  int size = 8 << logSize;
474 
475  Addr offset = (vaddr - base) & mask(size);
476  Addr endOffset = offset + req->getSize() - 1;
477 
478  if (expandDown) {
479  DPRINTF(GPUTLB, "Checking an expand down segment.\n");
480  warn_once("Expand down segments are untested.\n");
481 
482  if (offset <= limit || endOffset <= limit)
483  return std::make_shared<GeneralProtection>(0);
484  } else {
485  if (offset > limit || endOffset > limit)
486  return std::make_shared<GeneralProtection>(0);
487  }
488  }
489 
490  // If paging is enabled, do the translation.
491  if (m5Reg.paging) {
492  DPRINTF(GPUTLB, "Paging enabled.\n");
493  // The vaddr already has the segment base applied.
494  TlbEntry *entry = lookup(vaddr);
496 
497  if (!entry) {
499  if (timing) {
500  latency = missLatency1;
501  }
502 
503  if (FullSystem) {
504  fatal("GpuTLB doesn't support full-system mode\n");
505  } else {
506  DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
507  "at pc %#x.\n", vaddr,
508  tc->pcState().instAddr());
509 
510  Process *p = tc->getProcessPtr();
511  const EmulationPageTable::Entry *pte =
512  p->pTable->lookup(vaddr);
513 
514  if (!pte && mode != BaseMMU::Execute) {
515  // penalize a "page fault" more
516  if (timing)
517  latency += missLatency2;
518 
519  if (p->fixupFault(vaddr))
520  pte = p->pTable->lookup(vaddr);
521  }
522 
523  if (!pte) {
524  return std::make_shared<PageFault>(vaddr, true,
525  mode, true,
526  false);
527  } else {
528  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
529 
530  DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
531  alignedVaddr, pte->paddr);
532 
533  TlbEntry gpuEntry(p->pid(), alignedVaddr,
534  pte->paddr, false, false);
535  entry = insert(alignedVaddr, gpuEntry);
536  }
537 
538  DPRINTF(GPUTLB, "Miss was serviced.\n");
539  }
540  } else {
542 
543  if (timing) {
544  latency = hitLatency;
545  }
546  }
547 
548  // Do paging protection checks.
549  bool inUser = m5Reg.cpl == 3 && !(flags & CPL0FlagBit);
550 
551  CR0 cr0 = tc->readMiscRegNoEffect(misc_reg::Cr0);
552  bool badWrite = (!entry->writable && (inUser || cr0.wp));
553 
554  if ((inUser && !entry->user) || (mode == BaseMMU::Write &&
555  badWrite)) {
556  // The page must have been present to get into the TLB in
557  // the first place. We'll assume the reserved bits are
558  // fine even though we're not checking them.
559  return std::make_shared<PageFault>(vaddr, true, mode,
560  inUser, false);
561  }
562 
563  if (storeCheck && badWrite) {
564  // This would fault if this were a write, so return a page
565  // fault that reflects that happening.
566  return std::make_shared<PageFault>(vaddr, true,
568  inUser, false);
569  }
570 
571 
572  DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
573  "checks.\n", entry->paddr);
574 
575  int page_size = entry->size();
576  Addr paddr = entry->paddr | (vaddr & (page_size - 1));
577  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
578  req->setPaddr(paddr);
579 
580  if (entry->uncacheable)
581  req->setFlags(Request::UNCACHEABLE);
582  } else {
583  //Use the address which already has segmentation applied.
584  DPRINTF(GPUTLB, "Paging disabled.\n");
585  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
586  req->setPaddr(vaddr);
587  }
588  } else {
589  // Real mode
590  DPRINTF(GPUTLB, "In real mode.\n");
591  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
592  req->setPaddr(vaddr);
593  }
594 
595  // Check for an access to the local APIC
596  if (FullSystem) {
597  LocalApicBase localApicBase =
599 
600  Addr baseAddr = localApicBase.base * PageBytes;
601  Addr paddr = req->getPaddr();
602 
603  if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
604  // Force the access to be uncacheable.
605  req->setFlags(Request::UNCACHEABLE);
606  req->setPaddr(x86LocalAPICAddress(tc->contextId(),
607  paddr - baseAddr));
608  }
609  }
610 
611  return NoFault;
612  };
613 
614  Fault
616  Mode mode, int &latency)
617  {
618  bool delayedResponse;
619 
620  return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse,
621  false, latency);
622  }
623 
624  void
626  Translation *translation, Mode mode, int &latency)
627  {
628  bool delayedResponse;
629  assert(translation);
630 
631  Fault fault = GpuTLB::translate(req, tc, translation, mode,
632  delayedResponse, true, latency);
633 
634  if (!delayedResponse)
635  translation->finish(fault, req, tc, mode);
636  }
637 
638  Walker*
640  {
641  return walker;
642  }
643 
644 
645  void
647  {
648  }
649 
650  void
652  {
653  }
654 
660  void
662  {
663  assert(pkt);
664  assert(pkt->senderState);
665 
666  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
668 
669  GpuTranslationState *sender_state =
670  safe_cast<GpuTranslationState*>(pkt->senderState);
671 
672  bool update_stats = !sender_state->isPrefetch;
673  ThreadContext * tmp_tc = sender_state->tc;
674 
675  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
676  virt_page_addr);
677 
678  int req_cnt = sender_state->reqCnt.back();
679 
680  if (update_stats) {
681  stats.accessCycles -= (curTick() * req_cnt);
683  updatePageFootprint(virt_page_addr);
684  stats.globalNumTLBAccesses += req_cnt;
685  }
686 
687  tlbOutcome lookup_outcome = TLB_MISS;
688  const RequestPtr &tmp_req = pkt->req;
689 
690  // Access the TLB and figure out if it's a hit or a miss.
691  bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
692 
693  if (success) {
694  lookup_outcome = TLB_HIT;
695  // Put the entry in SenderState
696  TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
697  assert(entry);
698 
699  auto p = sender_state->tc->getProcessPtr();
700  sender_state->tlbEntry =
701  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
702  false, false);
703 
704  if (update_stats) {
705  // the reqCnt has an entry per level, so its size tells us
706  // which level we are in
707  sender_state->hitLevel = sender_state->reqCnt.size();
708  stats.globalNumTLBHits += req_cnt;
709  }
710  } else {
711  if (update_stats)
712  stats.globalNumTLBMisses += req_cnt;
713  }
714 
715  /*
716  * We now know the TLB lookup outcome (if it's a hit or a miss), as
717  * well as the TLB access latency.
718  *
719  * We create and schedule a new TLBEvent which will help us take the
720  * appropriate actions (e.g., update TLB on a hit, send request to
721  * lower level TLB on a miss, or start a page walk if this was the
722  * last-level TLB)
723  */
724  TLBEvent *tlb_event =
725  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
726 
727  if (translationReturnEvent.count(virt_page_addr)) {
728  panic("Virtual Page Address %#x already has a return event\n",
729  virt_page_addr);
730  }
731 
732  translationReturnEvent[virt_page_addr] = tlb_event;
733  assert(tlb_event);
734 
735  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
737 
738  schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
739  }
740 
742  tlbOutcome tlb_outcome, PacketPtr _pkt)
743  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
744  outcome(tlb_outcome), pkt(_pkt)
745  {
746  }
747 
752  void
754  TlbEntry * tlb_entry, Mode mode)
755  {
756  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(misc_reg::M5Reg);
757  uint32_t flags = pkt->req->getFlags();
758  bool storeCheck = flags & Request::READ_MODIFY_WRITE;
759 
760  // Do paging protection checks.
761  bool inUser = m5Reg.cpl == 3 && !(flags & CPL0FlagBit);
762  CR0 cr0 = tc->readMiscRegNoEffect(misc_reg::Cr0);
763 
764  bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
765 
766  if ((inUser && !tlb_entry->user) ||
767  (mode == BaseMMU::Write && badWrite)) {
768  // The page must have been present to get into the TLB in
769  // the first place. We'll assume the reserved bits are
770  // fine even though we're not checking them.
771  panic("Page fault detected");
772  }
773 
774  if (storeCheck && badWrite) {
775  // This would fault if this were a write, so return a page
776  // fault that reflects that happening.
777  panic("Page fault detected");
778  }
779  }
780 
786  void
788  tlbOutcome tlb_outcome, PacketPtr pkt)
789  {
790  assert(pkt);
791  Addr vaddr = pkt->req->getVaddr();
792 
793  GpuTranslationState *sender_state =
794  safe_cast<GpuTranslationState*>(pkt->senderState);
795 
796  ThreadContext *tc = sender_state->tc;
797  Mode mode = sender_state->tlbMode;
798 
799  TlbEntry *local_entry, *new_entry;
800 
801  if (tlb_outcome == TLB_HIT) {
802  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
803  vaddr);
804  local_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
805  } else {
806  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
807  vaddr);
808 
814  new_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
815  assert(new_entry);
816  local_entry = new_entry;
817 
818  if (allocationPolicy) {
819  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
820  virt_page_addr);
821 
822  local_entry = insert(virt_page_addr, *new_entry);
823  }
824 
825  assert(local_entry);
826  }
827 
833  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
834  "while paddr was %#x.\n", local_entry->vaddr,
835  local_entry->paddr);
836 
837  pagingProtectionChecks(tc, pkt, local_entry, mode);
838  int page_size = local_entry->size();
839  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
840  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
841 
842  // Since this packet will be sent through the cpu side port,
843  // it must be converted to a response pkt if it is not one already
844  if (pkt->isRequest()) {
845  pkt->makeTimingResponse();
846  }
847 
848  pkt->req->setPaddr(paddr);
849 
850  if (local_entry->uncacheable) {
851  pkt->req->setFlags(Request::UNCACHEABLE);
852  }
853 
854  //send packet back to coalescer
855  cpuSidePort[0]->sendTimingResp(pkt);
856  //schedule cleanup event
857  cleanupQueue.push(virt_page_addr);
858 
859  // schedule this only once per cycle.
860  // The check is required because we might have multiple translations
861  // returning the same cycle
862  // this is a maximum priority event and must be on the same cycle
863  // as the cleanup event in TLBCoalescer to avoid a race with
864  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
865  if (!cleanupEvent.scheduled())
867  }
868 
873  void
875  PacketPtr pkt)
876  {
877  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
878 
879  assert(translationReturnEvent[virtPageAddr]);
880  assert(pkt);
881 
882  GpuTranslationState *tmp_sender_state =
883  safe_cast<GpuTranslationState*>(pkt->senderState);
884 
885  int req_cnt = tmp_sender_state->reqCnt.back();
886  bool update_stats = !tmp_sender_state->isPrefetch;
887 
888 
889  if (outcome == TLB_HIT) {
890  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
891 
892  if (update_stats) {
893  stats.accessCycles += (req_cnt * curTick());
895  }
896 
897  } else if (outcome == TLB_MISS) {
898 
899  DPRINTF(GPUTLB, "This is a TLB miss\n");
900  if (update_stats) {
901  stats.accessCycles += (req_cnt*curTick());
903  }
904 
905  if (hasMemSidePort) {
906  // the one cyle added here represent the delay from when we get
907  // the reply back till when we propagate it to the coalescer
908  // above.
909  if (update_stats) {
910  stats.accessCycles += (req_cnt * 1);
911  stats.localCycles += 1;
912  }
913 
919  if (!memSidePort[0]->sendTimingReq(pkt)) {
920  DPRINTF(GPUTLB, "Failed sending translation request to "
921  "lower level TLB for addr %#x\n", virtPageAddr);
922 
923  memSidePort[0]->retries.push_back(pkt);
924  } else {
925  DPRINTF(GPUTLB, "Sent translation request to lower level "
926  "TLB for addr %#x\n", virtPageAddr);
927  }
928  } else {
929  //this is the last level TLB. Start a page walk
930  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
931  "addr %#x\n", virtPageAddr);
932 
933  if (update_stats)
934  stats.pageTableCycles -= (req_cnt*curTick());
935 
936  TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
937  assert(tlb_event);
938  tlb_event->updateOutcome(PAGE_WALK);
939  schedule(tlb_event,
941  }
942  } else if (outcome == PAGE_WALK) {
943  if (update_stats)
944  stats.pageTableCycles += (req_cnt*curTick());
945 
946  // Need to access the page table and update the TLB
947  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
948  virtPageAddr);
949 
950  GpuTranslationState *sender_state =
951  safe_cast<GpuTranslationState*>(pkt->senderState);
952 
953  Process *p = sender_state->tc->getProcessPtr();
954  Addr vaddr = pkt->req->getVaddr();
955 
956  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
957  assert(alignedVaddr == virtPageAddr);
958 
959  const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
960  if (!pte && sender_state->tlbMode != BaseMMU::Execute &&
961  p->fixupFault(vaddr)) {
962  pte = p->pTable->lookup(vaddr);
963  }
964 
965  if (pte) {
966  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
967  pte->paddr);
968 
969  sender_state->tlbEntry =
970  new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
971  false);
972  } else {
973  sender_state->tlbEntry = nullptr;
974  }
975 
976  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
977  } else if (outcome == MISS_RETURN) {
981  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
982  } else {
983  panic("Unexpected TLB outcome %d", outcome);
984  }
985  }
986 
987  void
989  {
990  tlb->translationReturn(virtPageAddr, outcome, pkt);
991  }
992 
993  const char*
995  {
996  return "trigger translationDoneEvent";
997  }
998 
999  void
1001  {
1002  outcome = _outcome;
1003  }
1004 
1005  Addr
1007  {
1008  return virtPageAddr;
1009  }
1010 
1017  bool
1019  {
1020  if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1021  tlb->issueTLBLookup(pkt);
1022  // update number of outstanding translation requests
1023  tlb->outstandingReqs++;
1024  return true;
1025  } else {
1026  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1027  tlb->outstandingReqs);
1028  return false;
1029  }
1030  }
1031 
1040  void
1042  {
1043  GpuTranslationState *sender_state =
1044  safe_cast<GpuTranslationState*>(pkt->senderState);
1045 
1046  ThreadContext *tc = sender_state->tc;
1047  Mode mode = sender_state->tlbMode;
1048  Addr vaddr = pkt->req->getVaddr();
1049 
1050  TlbEntry *local_entry, *new_entry;
1051 
1052  if (tlb_outcome == TLB_HIT) {
1053  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1054  "%#x\n", vaddr);
1055 
1056  local_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
1057  } else {
1058  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1059  "%#x\n", vaddr);
1060 
1066  new_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
1067  assert(new_entry);
1068  local_entry = new_entry;
1069 
1070  if (allocationPolicy) {
1071  Addr virt_page_addr = roundDown(vaddr, X86ISA::PageBytes);
1072 
1073  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1074  virt_page_addr);
1075 
1076  local_entry = insert(virt_page_addr, *new_entry);
1077  }
1078 
1079  assert(local_entry);
1080  }
1081 
1082  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1083  "while paddr was %#x.\n", local_entry->vaddr,
1084  local_entry->paddr);
1085 
1097  if (!sender_state->isPrefetch && sender_state->tlbEntry)
1098  pagingProtectionChecks(tc, pkt, local_entry, mode);
1099 
1100  int page_size = local_entry->size();
1101  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1102  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1103 
1104  pkt->req->setPaddr(paddr);
1105 
1106  if (local_entry->uncacheable)
1107  pkt->req->setFlags(Request::UNCACHEABLE);
1108  }
1109 
1110  // This is used for atomic translations. Need to
1111  // make it all happen during the same cycle.
1112  void
1114  {
1115  GpuTranslationState *sender_state =
1116  safe_cast<GpuTranslationState*>(pkt->senderState);
1117 
1118  ThreadContext *tc = sender_state->tc;
1119  bool update_stats = !sender_state->isPrefetch;
1120 
1121  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1123 
1124  if (update_stats)
1125  tlb->updatePageFootprint(virt_page_addr);
1126 
1127  // do the TLB lookup without updating the stats
1128  bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1129  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1130 
1131  // functional mode means no coalescing
1132  // global metrics are the same as the local metrics
1133  if (update_stats) {
1134  tlb->stats.globalNumTLBAccesses++;
1135 
1136  if (success) {
1137  sender_state->hitLevel = sender_state->reqCnt.size();
1138  tlb->stats.globalNumTLBHits++;
1139  }
1140  }
1141 
1142  if (!success) {
1143  if (update_stats)
1144  tlb->stats.globalNumTLBMisses++;
1145  if (tlb->hasMemSidePort) {
1146  // there is a TLB below -> propagate down the TLB hierarchy
1147  tlb->memSidePort[0]->sendFunctional(pkt);
1148  // If no valid translation from a prefetch, then just return
1149  if (sender_state->isPrefetch && !pkt->req->hasPaddr())
1150  return;
1151  } else {
1152  // Need to access the page table and update the TLB
1153  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1154  virt_page_addr);
1155 
1156  Process *p = tc->getProcessPtr();
1157 
1158  Addr vaddr = pkt->req->getVaddr();
1159 
1160  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1161  assert(alignedVaddr == virt_page_addr);
1162 
1163  const EmulationPageTable::Entry *pte =
1164  p->pTable->lookup(vaddr);
1165  if (!pte && sender_state->tlbMode != BaseMMU::Execute &&
1166  p->fixupFault(vaddr)) {
1167  pte = p->pTable->lookup(vaddr);
1168  }
1169 
1170  if (!sender_state->isPrefetch) {
1171  // no PageFaults are permitted after
1172  // the second page table lookup
1173  assert(pte);
1174 
1175  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1176  pte->paddr);
1177 
1178  sender_state->tlbEntry =
1179  new TlbEntry(p->pid(), virt_page_addr,
1180  pte->paddr, false, false);
1181  } else {
1182  // If this was a prefetch, then do the normal thing if it
1183  // was a successful translation. Otherwise, send an empty
1184  // TLB entry back so that it can be figured out as empty
1185  // and handled accordingly.
1186  if (pte) {
1187  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1188  pte->paddr);
1189 
1190  sender_state->tlbEntry =
1191  new TlbEntry(p->pid(), virt_page_addr,
1192  pte->paddr, false, false);
1193  } else {
1194  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1195  alignedVaddr);
1196 
1197  sender_state->tlbEntry = nullptr;
1198 
1199  return;
1200  }
1201  }
1202  }
1203  } else {
1204  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1205  tlb->lookup(pkt->req->getVaddr()));
1206 
1207  TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1208  update_stats);
1209 
1210  assert(entry);
1211 
1212  auto p = sender_state->tc->getProcessPtr();
1213  sender_state->tlbEntry =
1214  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1215  false, false);
1216  }
1217  // This is the function that would populate pkt->req with the paddr of
1218  // the translation. But if no translation happens (i.e Prefetch fails)
1219  // then the early returns in the above code wiill keep this function
1220  // from executing.
1221  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1222  }
1223 
1224  void
1226  {
1227  // The CPUSidePort never sends anything but replies. No retries
1228  // expected.
1229  panic("recvReqRetry called");
1230  }
1231 
1234  {
1235  // currently not checked by the requestor
1236  AddrRangeList ranges;
1237 
1238  return ranges;
1239  }
1240 
1246  bool
1248  {
1249  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1251 
1252  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1253  virt_page_addr);
1254 
1255  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1256  assert(tlb_event);
1257  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1258 
1259  tlb_event->updateOutcome(MISS_RETURN);
1260  tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
1261 
1262  return true;
1263  }
1264 
1265  void
1267  {
1268  // No retries should reach the TLB. The retries
1269  // should only reach the TLBCoalescer.
1270  panic("recvReqRetry called");
1271  }
1272 
1273  void
1275  {
1276  while (!cleanupQueue.empty()) {
1277  Addr cleanup_addr = cleanupQueue.front();
1278  cleanupQueue.pop();
1279 
1280  // delete TLBEvent
1281  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1282  delete old_tlb_event;
1283  translationReturnEvent.erase(cleanup_addr);
1284 
1285  // update number of outstanding requests
1286  outstandingReqs--;
1287  }
1288 
1292  for (int i = 0; i < cpuSidePort.size(); ++i) {
1293  cpuSidePort[i]->sendRetryReq();
1294  }
1295  }
1296 
1297  void
1299  {
1300 
1302 
1303  AccessInfo tmp_access_info;
1304  tmp_access_info.lastTimeAccessed = 0;
1305  tmp_access_info.accessesPerPage = 0;
1306  tmp_access_info.totalReuseDistance = 0;
1307  tmp_access_info.sumDistance = 0;
1308  tmp_access_info.meanDistance = 0;
1309 
1310  ret = TLBFootprint.insert(
1311  AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1312 
1313  bool first_page_access = ret.second;
1314 
1315  if (first_page_access) {
1317  } else {
1318  int accessed_before;
1319  accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1320  ret.first->second.totalReuseDistance += accessed_before;
1321  }
1322 
1323  ret.first->second.accessesPerPage++;
1324  ret.first->second.lastTimeAccessed = curTick();
1325 
1326  if (accessDistance) {
1327  ret.first->second.localTLBAccesses
1328  .push_back(stats.localNumTLBAccesses.value());
1329  }
1330  }
1331 
1332  void
1334  {
1335  std::ostream *page_stat_file = nullptr;
1336 
1337  if (accessDistance) {
1338 
1339  // print per page statistics to a separate file (.csv format)
1340  // simout is the gem5 output directory (default is m5out or the one
1341  // specified with -d
1342  page_stat_file = simout.create(name().c_str())->stream();
1343 
1344  // print header
1345  *page_stat_file
1346  << "page,max_access_distance,mean_access_distance, "
1347  << "stddev_distance" << std::endl;
1348  }
1349 
1350  // update avg. reuse distance footprint
1351  unsigned int sum_avg_reuse_distance_per_page = 0;
1352 
1353  // iterate through all pages seen by this TLB
1354  for (auto &iter : TLBFootprint) {
1355  sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1356  iter.second.accessesPerPage;
1357 
1358  if (accessDistance) {
1359  unsigned int tmp = iter.second.localTLBAccesses[0];
1360  unsigned int prev = tmp;
1361 
1362  for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1363  if (i) {
1364  tmp = prev + 1;
1365  }
1366 
1367  prev = iter.second.localTLBAccesses[i];
1368  // update the localTLBAccesses value
1369  // with the actual differece
1370  iter.second.localTLBAccesses[i] -= tmp;
1371  // compute the sum of AccessDistance per page
1372  // used later for mean
1373  iter.second.sumDistance +=
1374  iter.second.localTLBAccesses[i];
1375  }
1376 
1377  iter.second.meanDistance =
1378  iter.second.sumDistance / iter.second.accessesPerPage;
1379 
1380  // compute std_dev and max (we need a second round because we
1381  // need to know the mean value
1382  unsigned int max_distance = 0;
1383  unsigned int stddev_distance = 0;
1384 
1385  for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1386  unsigned int tmp_access_distance =
1387  iter.second.localTLBAccesses[i];
1388 
1389  if (tmp_access_distance > max_distance) {
1390  max_distance = tmp_access_distance;
1391  }
1392 
1393  unsigned int diff =
1394  tmp_access_distance - iter.second.meanDistance;
1395  stddev_distance += pow(diff, 2);
1396 
1397  }
1398 
1399  stddev_distance =
1400  sqrt(stddev_distance/iter.second.accessesPerPage);
1401 
1402  if (page_stat_file) {
1403  *page_stat_file << std::hex << iter.first << ",";
1404  *page_stat_file << std::dec << max_distance << ",";
1405  *page_stat_file << std::dec << iter.second.meanDistance
1406  << ",";
1407  *page_stat_file << std::dec << stddev_distance;
1408  *page_stat_file << std::endl;
1409  }
1410 
1411  // erase the localTLBAccesses array
1412  iter.second.localTLBAccesses.clear();
1413  }
1414  }
1415 
1416  if (!TLBFootprint.empty()) {
1418  sum_avg_reuse_distance_per_page / TLBFootprint.size();
1419  }
1420 
1421  //clear the TLBFootprint map
1422  TLBFootprint.clear();
1423  }
1424 
1426  : statistics::Group(parent),
1427  ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
1428  ADD_STAT(localNumTLBHits, "Number of TLB hits"),
1429  ADD_STAT(localNumTLBMisses, "Number of TLB misses"),
1430  ADD_STAT(localTLBMissRate, "TLB miss rate"),
1431  ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),
1432  ADD_STAT(globalNumTLBHits, "Number of TLB hits"),
1433  ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),
1434  ADD_STAT(globalTLBMissRate, "TLB miss rate"),
1435  ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),
1436  ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),
1437  ADD_STAT(numUniquePages, "Number of unique pages touched"),
1438  ADD_STAT(localCycles, "Number of cycles spent in queue for all "
1439  "incoming reqs"),
1440  ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs"),
1441  ADD_STAT(avgReuseDistance, "avg. reuse distance over all pages (in "
1442  "ticks)")
1443  {
1445 
1448  }
1449 } // namespace X86ISA
1450 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::X86ISA::mask
mask
Definition: misc.hh:796
gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:245
pagetable.hh
gem5::X86ISA::GpuTLB::configAddress
uint32_t configAddress
Definition: tlb.hh:72
gem5::X86ISA::TlbEntry::writable
bool writable
Definition: pagetable.hh:77
gem5::X86ISA::GpuTLB::GpuTLBStats::numUniquePages
statistics::Scalar numUniquePages
Definition: tlb.hh:377
gem5::X86ISA::segment_idx::Ls
@ Ls
Definition: segment.hh:59
gem5::ArmISA::tlb
Bitfield< 59, 56 > tlb
Definition: misc_types.hh:92
gem5::PCStateBase::instAddr
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
Definition: pcstate.hh:107
gem5::BaseMMU::Read
@ Read
Definition: mmu.hh:56
x86_traits.hh
gem5::ThreadContext::readMiscReg
virtual RegVal readMiscReg(RegIndex misc_reg)=0
gem5::X86ISA::x
Bitfield< 1 > x
Definition: types.hh:108
gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
gem5::Packet::isRequest
bool isRequest() const
Definition: packet.hh:594
gem5::X86ISA::SegmentFlagMask
constexpr Request::FlagsType SegmentFlagMask
Definition: ldstflags.hh:54
gem5::X86ISA::GpuTLB::GpuTLBStats::GpuTLBStats
GpuTLBStats(statistics::Group *parent)
Definition: tlb.cc:1425
gem5::RegVal
uint64_t RegVal
Definition: types.hh:173
gem5::X86ISA::GpuTLB::GpuTLBStats::accessCycles
statistics::Scalar accessCycles
Definition: tlb.hh:374
gem5::cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
gem5::X86ISA::misc_reg::PciConfigAddress
@ PciConfigAddress
Definition: misc.hh:404
gem5::X86ISA::GpuTLB::CpuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: tlb.cc:1113
gem5::X86ISA::GpuTLB::translationReturn
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition: tlb.cc:874
gem5::X86ISA::GpuTLB::stats
gem5::X86ISA::GpuTLB::GpuTLBStats stats
gem5::X86ISA::GpuTLB::memSidePort
std::vector< MemSidePort * > memSidePort
Definition: tlb.hh:262
gem5::X86ISA::GpuTLB::GpuTLBStats::avgReuseDistance
statistics::Scalar avgReuseDistance
Definition: tlb.hh:384
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::X86ISA::GpuTLB::size
int size
Definition: tlb.hh:116
gem5::X86ISA::x86LocalAPICAddress
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:91
gem5::X86ISA::mode
Bitfield< 3 > mode
Definition: types.hh:192
gem5::GpuTranslationState
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition: gpu_translation_state.hh:58
microldstop.hh
warn_once
#define warn_once(...)
Definition: logging.hh:250
gem5::X86ISA::IntAddrPrefixMSR
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:64
gem5::Packet::setData
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1265
gem5::X86ISA::GpuTLB::GpuTLBStats::localNumTLBAccesses
statistics::Scalar localNumTLBAccesses
Definition: tlb.hh:360
gem5::X86ISA::TlbEntry::user
bool user
Definition: pagetable.hh:79
gem5::X86ISA::GpuTLB::numSets
int numSets
Definition: tlb.hh:118
pseudo_inst.hh
gem5::X86ISA::misc_reg::M5Reg
@ M5Reg
Definition: misc.hh:146
gem5::ArmISA::attr
attr
Definition: misc_types.hh:656
gem5::X86ISA::GpuTLB::exitCallback
void exitCallback()
Definition: tlb.cc:1333
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:374
gem5::BaseMMU::Write
@ Write
Definition: mmu.hh:56
gem5::X86ISA::GpuTLB::cleanup
void cleanup()
Definition: tlb.cc:1274
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::X86ISA::GpuTLB::outstandingReqs
int outstandingReqs
Definition: tlb.hh:273
gem5::ThreadContext::pcState
virtual const PCStateBase & pcState() const =0
gem5::ArmISA::set
Bitfield< 12, 11 > set
Definition: misc_types.hh:703
gem5::X86ISA::TlbEntry::size
int size()
Definition: pagetable.hh:112
gem5::X86ISA::GpuTLB::AccessInfo::totalReuseDistance
unsigned int totalReuseDistance
Definition: tlb.hh:330
gem5::X86ISA::GpuTLB::walker
Walker * walker
Definition: tlb.hh:107
gem5::X86ISA::offset
offset
Definition: misc.hh:1024
gem5::X86ISA::GpuTLB::TLB_MISS
@ TLB_MISS
Definition: tlb.hh:194
gem5::X86ISA::GpuTLB::Mode
enum BaseMMU::Mode Mode
Definition: tlb.hh:79
gem5::simout
OutputDirectory simout
Definition: output.cc:62
gem5::X86ISA::GpuTLB::GpuTLBStats::localTLBMissRate
statistics::Formula localTLBMissRate
Definition: tlb.hh:363
pagetable_walker.hh
gem5::X86ISA::misc_reg::ApicBase
@ ApicBase
Definition: misc.hh:401
gem5::X86ISA::GpuTLB::~GpuTLB
~GpuTLB()
Definition: tlb.cc:131
gem5::ThreadContext::contextId
virtual ContextID contextId() const =0
gem5::X86ISA::GpuTLB::getWalker
Walker * getWalker()
Definition: tlb.cc:639
gem5::X86ISA::misc_reg::segAttr
static RegIndex segAttr(int index)
Definition: misc.hh:531
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::OutputDirectory::create
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:210
gem5::X86ISA::GpuTLB::lookup
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: tlb.cc:216
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
gem5::X86ISA::limit
BitfieldType< SegDescriptorLimit > limit
Definition: misc.hh:924
gem5::X86ISA::Walker
Definition: pagetable_walker.hh:60
gem5::X86ISA::PageShift
const Addr PageShift
Definition: page_size.hh:48
gem5::X86ISA::GpuTLB::GpuTLBStats::globalTLBMissRate
statistics::Formula globalTLBMissRate
Definition: tlb.hh:371
gem5::mbits
constexpr T mbits(T val, unsigned first, unsigned last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:103
gem5::X86ISA::GpuTLB::FA
bool FA
true if this is a fully-associative TLB
Definition: tlb.hh:123
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
gem5::X86ISA::GpuTLB::TLBEvent::getTLBEventVaddr
Addr getTLBEventVaddr()
Definition: tlb.cc:1006
gem5::Request::READ_MODIFY_WRITE
@ READ_MODIFY_WRITE
This request is a read which will be followed by a write.
Definition: request.hh:161
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::X86ISA::GpuTLB::insert
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: tlb.cc:160
faults.hh
gem5::VegaISA::baseAddr
Bitfield< 47, 6 > baseAddr
Definition: pagetable.hh:71
gem5::X86ISA::GpuTLB::GpuTLBStats::globalNumTLBAccesses
statistics::Scalar globalNumTLBAccesses
Definition: tlb.hh:368
gem5::X86ISA::GpuTLB::tlbOutcome
tlbOutcome
Definition: tlb.hh:194
output.hh
gem5::X86ISA::GpuTLB::lookupIt
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: tlb.cc:186
request.hh
gem5::X86ISA::GpuTLB::AccessInfo::lastTimeAccessed
unsigned int lastTimeAccessed
Definition: tlb.hh:327
gem5::X86ISA::GpuTLB::allocationPolicy
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: tlb.hh:130
gem5::ArmISA::TlbEntry
Definition: pagetable.hh:165
gem5::X86ISA::GpuTLB::setConfigAddress
void setConfigAddress(uint32_t addr)
Definition: tlb.cc:243
gem5::X86ISA::GpuTLB::Translation::finish
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
gem5::X86ISA::GpuTLB::hasMemSidePort
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: tlb.hh:135
gem5::X86ISA::GpuTLB::missLatency1
int missLatency1
Definition: tlb.hh:171
gem5::X86ISA::AddrSizeFlagShift
constexpr auto AddrSizeFlagShift
Definition: ldstflags.hh:57
gem5::BaseMMU::Execute
@ Execute
Definition: mmu.hh:56
gem5::letoh
T letoh(T value)
Definition: byteswap.hh:173
gem5::X86ISA::GpuTLB::GpuTLBStats::localLatency
statistics::Formula localLatency
Definition: tlb.hh:381
gem5::X86ISA::GpuTLB::cleanupQueue
std::queue< Addr > cleanupQueue
Definition: tlb.hh:312
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::X86ISA::TlbEntry
Definition: pagetable.hh:65
gem5::X86ISA::GpuTLB::CpuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: tlb.cc:1233
gem5::X86ISA::misc_reg::segSel
static RegIndex segSel(int index)
Definition: misc.hh:503
gem5::X86ISA::GpuTLB::translateAtomic
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: tlb.cc:615
gem5::X86ISA::misc_reg::segBase
static RegIndex segBase(int index)
Definition: misc.hh:510
gem5::X86ISA::GpuTLB::serialize
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: tlb.cc:646
gem5::OutputStream::stream
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:62
gem5::X86ISA::GpuTLB::translate
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: tlb.cc:420
gem5::X86ISA::GpuTLB::accessDistance
bool accessDistance
Print out accessDistance stats.
Definition: tlb.hh:141
gem5::Request::UNCACHEABLE
@ UNCACHEABLE
The request is to an uncacheable address.
Definition: request.hh:125
gem5::X86ISA::GpuTLB::unserialize
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: tlb.cc:651
gem5::X86ISA::GpuTLB::handleFuncTranslationReturn
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: tlb.cc:1041
gem5::X86ISA::misc_reg::Cr0
@ Cr0
Definition: misc.hh:114
bitfield.hh
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::X86ISA::GpuTLB::issueTLBLookup
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition: tlb.cc:661
gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:248
gem5::X86ISA::GpuTLB::GpuTLB
GpuTLB(const Params &p)
Definition: tlb.cc:67
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Event
Definition: eventq.hh:251
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
msr.hh
gem5::X86ISA::GpuTLB::Params
X86GPUTLBParams Params
Definition: tlb.hh:75
gem5::X86ISA::GpuTLB::updatePageFootprint
void updatePageFootprint(Addr virt_page_addr)
Definition: tlb.cc:1298
segment.hh
gem5::EventBase::Maximum_Pri
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:241
gem5::X86ISA::GpuTLB::translateInt
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Definition: tlb.cc:302
gem5::X86ISA::GpuTLB::TLBEvent
Definition: tlb.hh:285
gem5::X86ISA::segment_idx::Es
@ Es
Definition: segment.hh:50
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::X86ISA::IntAddrPrefixMask
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:62
gem5::X86ISA::GpuTLB
Definition: tlb.hh:65
process.hh
gem5::GpuTranslationState::tlbEntry
Serializable * tlbEntry
Definition: gpu_translation_state.hh:73
gem5::X86ISA::GpuTLB::Translation
Definition: tlb.hh:81
gem5::X86ISA::TlbEntry::uncacheable
bool uncacheable
Definition: pagetable.hh:84
gem5::X86ISA::GpuTLB::tlbLookup
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
Definition: tlb.cc:371
page_size.hh
gem5::X86ISA::GpuTLB::AccessInfo::accessesPerPage
unsigned int accessesPerPage
Definition: tlb.hh:328
gem5::GpuTranslationState::hitLevel
int hitLevel
Definition: gpu_translation_state.hh:85
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::X86ISA::IntAddrPrefixCPUID
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:63
flags
uint8_t flags
Definition: helpers.cc:66
gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
gem5::X86ISA::segment_idx::Tsg
@ Tsg
Definition: segment.hh:58
gpu_translation_state.hh
gem5::X86ISA::GpuTLB::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: tlb.cc:138
gem5::X86ISA::GpuTLB::GpuTLBStats::globalNumTLBMisses
statistics::Scalar globalNumTLBMisses
Definition: tlb.hh:370
gem5::X86ISA::GpuTLB::translationReturnEvent
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: tlb.hh:308
gem5::ThreadContext::readMiscRegNoEffect
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
gem5::X86ISA::GpuTLB::GpuTLBStats::pageTableCycles
statistics::Scalar pageTableCycles
Definition: tlb.hh:376
std::pair
STL pair class.
Definition: stl.hh:58
gem5::X86ISA::TlbEntry::paddr
Addr paddr
Definition: pagetable.hh:68
gem5::X86ISA::GpuTLB::CpuSidePort::recvReqRetry
virtual void recvReqRetry()
Definition: tlb.cc:1225
gem5::X86ISA::GpuTLB::freeList
std::vector< EntryList > freeList
Definition: tlb.hh:150
gem5::GpuTranslationState::reqCnt
std::vector< int > reqCnt
Definition: gpu_translation_state.hh:83
gem5::X86ISA::PhysAddrPrefixPciConfig
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:68
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
tlb.hh
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:542
gem5::X86ISA::GpuTLB::GpuTLBStats::localNumTLBHits
statistics::Scalar localNumTLBHits
Definition: tlb.hh:361
gem5::X86ISA::GpuTLB::cpuSidePort
std::vector< CpuSidePort * > cpuSidePort
Definition: tlb.hh:260
gem5::X86ISA::GpuTLB::invalidateNonGlobal
void invalidateNonGlobal()
Definition: tlb.cc:249
name
const std::string & name()
Definition: trace.cc:49
gem5::X86ISA::expandDown
Bitfield< 14 > expandDown
Definition: misc.hh:996
gem5::X86ISA::GpuTLB::GpuTLBStats::localCycles
statistics::Scalar localCycles
Definition: tlb.hh:379
packet_access.hh
gem5::ArmISA::va
Bitfield< 8 > va
Definition: misc_types.hh:276
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::X86ISA::GpuTLB::setMask
Addr setMask
Definition: tlb.hh:124
gem5::X86ISA::segment_idx::Ms
@ Ms
Definition: segment.hh:60
gem5::X86ISA::GpuTLB::TLBFootprint
AccessPatternTable TLBFootprint
Definition: tlb.hh:346
gem5::X86ISA::GpuTLB::entryList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition: tlb.hh:159
gem5::X86ISA::GpuTLB::MemSidePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: tlb.cc:1247
gem5::X86ISA::GpuTLB::TLB_HIT
@ TLB_HIT
Definition: tlb.hh:194
gem5::Process
Definition: process.hh:68
gem5::Request::STRICT_ORDER
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
Definition: request.hh:135
gem5::X86ISA::GpuTLB::MISS_RETURN
@ MISS_RETURN
Definition: tlb.hh:194
gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
gem5::X86ISA::GpuTLB::AccessInfo::sumDistance
unsigned int sumDistance
Definition: tlb.hh:341
gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:220
gem5::EmulationPageTable::Entry::paddr
Addr paddr
Definition: page_table.hh:58
gem5::X86ISA::GpuTLB::TLBEvent::process
void process()
Definition: tlb.cc:988
gem5::X86ISA::CPL0FlagBit
constexpr auto CPL0FlagBit
Definition: ldstflags.hh:56
gem5::X86ISA::GpuTLB::TLBEvent::description
const char * description() const
Return a C string describing the event.
Definition: tlb.cc:994
gem5::X86ISA::TlbEntry::vaddr
Addr vaddr
Definition: pagetable.hh:71
gem5::X86ISA::AddrSizeFlagMask
constexpr auto AddrSizeFlagMask
Definition: ldstflags.hh:58
gem5::X86ISA::GpuTLB::hitLatency
int hitLatency
Definition: tlb.hh:170
gem5::Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:1062
gem5::X86ISA::GpuTLB::TLBEvent::TLBEvent
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: tlb.cc:741
gem5::X86ISA::GpuTLB::TLBEvent::updateOutcome
void updateOutcome(tlbOutcome _outcome)
Definition: tlb.cc:1000
gem5::X86ISA::GpuTLB::missLatency2
int missLatency2
Definition: tlb.hh:172
base.hh
gem5::Port
Ports are used to interface objects to each other.
Definition: port.hh:61
gem5::GpuTranslationState::tlbMode
BaseMMU::Mode tlbMode
Definition: gpu_translation_state.hh:61
gem5::X86ISA::msrAddrToIndex
bool msrAddrToIndex(RegIndex &reg_num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
Definition: msr.cc:150
gem5::ThreadContext::setMiscReg
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
gem5::X86ISA::GpuTLB::pagingProtectionChecks
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: tlb.cc:753
gem5::X86ISA::GpuTLB::PAGE_WALK
@ PAGE_WALK
Definition: tlb.hh:194
gem5::X86ISA::seg
Bitfield< 2, 0 > seg
Definition: types.hh:87
gem5::htole
T htole(T value)
Definition: byteswap.hh:172
logging.hh
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::X86ISA::GpuTLB::AccessInfo::meanDistance
unsigned int meanDistance
Definition: tlb.hh:342
gem5::X86ISA::p
Bitfield< 0 > p
Definition: pagetable.hh:151
gem5::X86ISA::GpuTLB::invalidateAll
void invalidateAll()
Definition: tlb.cc:229
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
gem5::X86ISA::IntAddrPrefixIO
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:65
trace.hh
gem5::GpuTranslationState::isPrefetch
bool isPrefetch
Definition: gpu_translation_state.hh:75
gem5::X86ISA::GpuTLB::AccessInfo
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: tlb.hh:325
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::X86ISA::GpuTLB::tlb
std::vector< TlbEntry > tlb
Definition: tlb.hh:143
gem5::X86ISA::GpuTLB::GpuTLBStats::globalNumTLBHits
statistics::Scalar globalNumTLBHits
Definition: tlb.hh:369
gem5::X86ISA::GpuTLB::assoc
int assoc
Definition: tlb.hh:117
gem5::RegIndex
uint16_t RegIndex
Definition: types.hh:176
std::list< AddrRange >
gem5::X86ISA::GpuTLB::CpuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition: tlb.cc:1018
gem5::X86ISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:49
gem5::X86ISA::GpuTLB::translateTiming
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: tlb.cc:625
gem5::X86ISA::GpuTLB::handleTranslationReturn
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition: tlb.cc:787
page_table.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::X86ISA::GpuTLB::MemSidePort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: tlb.cc:1266
gem5::X86ISA::GpuTLB::demapPage
void demapPage(Addr va, uint64_t asn)
Definition: tlb.cc:267
misc.hh
gem5::X86ISA::GpuTLB::GpuTLBStats::localNumTLBMisses
statistics::Scalar localNumTLBMisses
Definition: tlb.hh:362
gem5::EmulationPageTable::Entry
Definition: page_table.hh:56
gem5::X86ISA::misc_reg::segLimit
static RegIndex segLimit(int index)
Definition: misc.hh:524
gem5::X86ISA::PhysAddrPrefixIO
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:67
gem5::X86ISA::segment_idx::Idtr
@ Idtr
Definition: segment.hh:65
thread_context.hh
gem5::X86ISA::GpuTLB::maxCoalescedReqs
int maxCoalescedReqs
Definition: tlb.hh:269
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::X86ISA::GpuTLB::cleanupEvent
EventFunctionWrapper cleanupEvent
Definition: tlb.hh:318
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::statistics::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:622
gem5::X86ISA::segment_idx::Hs
@ Hs
Definition: segment.hh:56
gem5::GpuTranslationState::tc
ThreadContext * tc
Definition: gpu_translation_state.hh:63

Generated on Thu Jul 28 2022 13:32:08 for gem5 by doxygen 1.8.17