gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Lisa Hsu
34  */
35 
36 #include "gpu-compute/gpu_tlb.hh"
37 
38 #include <cmath>
39 #include <cstring>
40 
41 #include "arch/x86/faults.hh"
43 #include "arch/x86/pagetable.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/logging.hh"
49 #include "base/output.hh"
50 #include "base/trace.hh"
51 #include "cpu/base.hh"
52 #include "cpu/thread_context.hh"
53 #include "debug/GPUPrefetch.hh"
54 #include "debug/GPUTLB.hh"
55 #include "mem/packet_access.hh"
56 #include "mem/page_table.hh"
57 #include "mem/request.hh"
58 #include "sim/process.hh"
59 
60 namespace X86ISA
61 {
62 
64  : ClockedObject(p), configAddress(0), size(p->size),
65  cleanupEvent([this]{ cleanup(); }, name(), false,
67  exitEvent([this]{ exitCallback(); }, name())
68  {
69  assoc = p->assoc;
70  assert(assoc <= size);
71  numSets = size/assoc;
72  allocationPolicy = p->allocationPolicy;
73  hasMemSidePort = false;
74  accessDistance = p->accessDistance;
75  clock = p->clk_domain->clockPeriod();
76 
77  tlb.assign(size, TlbEntry());
78 
79  freeList.resize(numSets);
80  entryList.resize(numSets);
81 
82  for (int set = 0; set < numSets; ++set) {
83  for (int way = 0; way < assoc; ++way) {
84  int x = set * assoc + way;
85  freeList[set].push_back(&tlb.at(x));
86  }
87  }
88 
89  FA = (size == assoc);
90 
99  setMask = numSets - 1;
100 
101  maxCoalescedReqs = p->maxOutstandingReqs;
102 
103  // Do not allow maxCoalescedReqs to be more than the TLB associativity
104  if (maxCoalescedReqs > assoc) {
106  cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
107  }
108 
109  outstandingReqs = 0;
110  hitLatency = p->hitLatency;
111  missLatency1 = p->missLatency1;
112  missLatency2 = p->missLatency2;
113 
114  // create the slave ports based on the number of connected ports
115  for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
116  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
117  name(), i), this, i));
118  }
119 
120  // create the master ports based on the number of connected ports
121  for (size_t i = 0; i < p->port_master_connection_count; ++i) {
122  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
123  name(), i), this, i));
124  }
125  }
126 
127  // fixme: this is never called?
129  {
130  // make sure all the hash-maps are empty
131  assert(translationReturnEvent.empty());
132  }
133 
134  Port &
135  GpuTLB::getPort(const std::string &if_name, PortID idx)
136  {
137  if (if_name == "slave") {
138  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
139  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
140  }
141 
142  return *cpuSidePort[idx];
143  } else if (if_name == "master") {
144  if (idx >= static_cast<PortID>(memSidePort.size())) {
145  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
146  }
147 
148  hasMemSidePort = true;
149 
150  return *memSidePort[idx];
151  } else {
152  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
153  }
154  }
155 
156  TlbEntry*
158  {
159  TlbEntry *newEntry = nullptr;
160 
165  int set = (vpn >> TheISA::PageShift) & setMask;
166 
167  if (!freeList[set].empty()) {
168  newEntry = freeList[set].front();
169  freeList[set].pop_front();
170  } else {
171  newEntry = entryList[set].back();
172  entryList[set].pop_back();
173  }
174 
175  *newEntry = entry;
176  newEntry->vaddr = vpn;
177  entryList[set].push_front(newEntry);
178 
179  return newEntry;
180  }
181 
182  GpuTLB::EntryList::iterator
183  GpuTLB::lookupIt(Addr va, bool update_lru)
184  {
185  int set = (va >> TheISA::PageShift) & setMask;
186 
187  if (FA) {
188  assert(!set);
189  }
190 
191  auto entry = entryList[set].begin();
192  for (; entry != entryList[set].end(); ++entry) {
193  int page_size = (*entry)->size();
194 
195  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
196  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
197  "with size %#x.\n", va, (*entry)->vaddr, page_size);
198 
199  if (update_lru) {
200  entryList[set].push_front(*entry);
201  entryList[set].erase(entry);
202  entry = entryList[set].begin();
203  }
204 
205  break;
206  }
207  }
208 
209  return entry;
210  }
211 
212  TlbEntry*
213  GpuTLB::lookup(Addr va, bool update_lru)
214  {
215  int set = (va >> TheISA::PageShift) & setMask;
216 
217  auto entry = lookupIt(va, update_lru);
218 
219  if (entry == entryList[set].end())
220  return nullptr;
221  else
222  return *entry;
223  }
224 
225  void
227  {
228  DPRINTF(GPUTLB, "Invalidating all entries.\n");
229 
230  for (int i = 0; i < numSets; ++i) {
231  while (!entryList[i].empty()) {
232  TlbEntry *entry = entryList[i].front();
233  entryList[i].pop_front();
234  freeList[i].push_back(entry);
235  }
236  }
237  }
238 
239  void
241  {
243  }
244 
245  void
247  {
248  DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
249 
250  for (int i = 0; i < numSets; ++i) {
251  for (auto entryIt = entryList[i].begin();
252  entryIt != entryList[i].end();) {
253  if (!(*entryIt)->global) {
254  freeList[i].push_back(*entryIt);
255  entryList[i].erase(entryIt++);
256  } else {
257  ++entryIt;
258  }
259  }
260  }
261  }
262 
263  void
264  GpuTLB::demapPage(Addr va, uint64_t asn)
265  {
266 
267  int set = (va >> TheISA::PageShift) & setMask;
268  auto entry = lookupIt(va, false);
269 
270  if (entry != entryList[set].end()) {
271  freeList[set].push_back(*entry);
272  entryList[set].erase(entry);
273  }
274  }
275 
276  Fault
278  {
279  DPRINTF(GPUTLB, "Addresses references internal memory.\n");
280  Addr vaddr = req->getVaddr();
281  Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
282 
283  if (prefix == IntAddrPrefixCPUID) {
284  panic("CPUID memory space not yet implemented!\n");
285  } else if (prefix == IntAddrPrefixMSR) {
286  vaddr = vaddr >> 3;
287  req->setFlags(Request::MMAPPED_IPR);
288  Addr regNum = 0;
289 
290  switch (vaddr & ~IntAddrPrefixMask) {
291  case 0x10:
292  regNum = MISCREG_TSC;
293  break;
294  case 0x1B:
295  regNum = MISCREG_APIC_BASE;
296  break;
297  case 0xFE:
298  regNum = MISCREG_MTRRCAP;
299  break;
300  case 0x174:
301  regNum = MISCREG_SYSENTER_CS;
302  break;
303  case 0x175:
304  regNum = MISCREG_SYSENTER_ESP;
305  break;
306  case 0x176:
307  regNum = MISCREG_SYSENTER_EIP;
308  break;
309  case 0x179:
310  regNum = MISCREG_MCG_CAP;
311  break;
312  case 0x17A:
313  regNum = MISCREG_MCG_STATUS;
314  break;
315  case 0x17B:
316  regNum = MISCREG_MCG_CTL;
317  break;
318  case 0x1D9:
319  regNum = MISCREG_DEBUG_CTL_MSR;
320  break;
321  case 0x1DB:
323  break;
324  case 0x1DC:
325  regNum = MISCREG_LAST_BRANCH_TO_IP;
326  break;
327  case 0x1DD:
329  break;
330  case 0x1DE:
332  break;
333  case 0x200:
334  regNum = MISCREG_MTRR_PHYS_BASE_0;
335  break;
336  case 0x201:
337  regNum = MISCREG_MTRR_PHYS_MASK_0;
338  break;
339  case 0x202:
340  regNum = MISCREG_MTRR_PHYS_BASE_1;
341  break;
342  case 0x203:
343  regNum = MISCREG_MTRR_PHYS_MASK_1;
344  break;
345  case 0x204:
346  regNum = MISCREG_MTRR_PHYS_BASE_2;
347  break;
348  case 0x205:
349  regNum = MISCREG_MTRR_PHYS_MASK_2;
350  break;
351  case 0x206:
352  regNum = MISCREG_MTRR_PHYS_BASE_3;
353  break;
354  case 0x207:
355  regNum = MISCREG_MTRR_PHYS_MASK_3;
356  break;
357  case 0x208:
358  regNum = MISCREG_MTRR_PHYS_BASE_4;
359  break;
360  case 0x209:
361  regNum = MISCREG_MTRR_PHYS_MASK_4;
362  break;
363  case 0x20A:
364  regNum = MISCREG_MTRR_PHYS_BASE_5;
365  break;
366  case 0x20B:
367  regNum = MISCREG_MTRR_PHYS_MASK_5;
368  break;
369  case 0x20C:
370  regNum = MISCREG_MTRR_PHYS_BASE_6;
371  break;
372  case 0x20D:
373  regNum = MISCREG_MTRR_PHYS_MASK_6;
374  break;
375  case 0x20E:
376  regNum = MISCREG_MTRR_PHYS_BASE_7;
377  break;
378  case 0x20F:
379  regNum = MISCREG_MTRR_PHYS_MASK_7;
380  break;
381  case 0x250:
383  break;
384  case 0x258:
386  break;
387  case 0x259:
389  break;
390  case 0x268:
391  regNum = MISCREG_MTRR_FIX_4K_C0000;
392  break;
393  case 0x269:
394  regNum = MISCREG_MTRR_FIX_4K_C8000;
395  break;
396  case 0x26A:
397  regNum = MISCREG_MTRR_FIX_4K_D0000;
398  break;
399  case 0x26B:
400  regNum = MISCREG_MTRR_FIX_4K_D8000;
401  break;
402  case 0x26C:
403  regNum = MISCREG_MTRR_FIX_4K_E0000;
404  break;
405  case 0x26D:
406  regNum = MISCREG_MTRR_FIX_4K_E8000;
407  break;
408  case 0x26E:
409  regNum = MISCREG_MTRR_FIX_4K_F0000;
410  break;
411  case 0x26F:
412  regNum = MISCREG_MTRR_FIX_4K_F8000;
413  break;
414  case 0x277:
415  regNum = MISCREG_PAT;
416  break;
417  case 0x2FF:
418  regNum = MISCREG_DEF_TYPE;
419  break;
420  case 0x400:
421  regNum = MISCREG_MC0_CTL;
422  break;
423  case 0x404:
424  regNum = MISCREG_MC1_CTL;
425  break;
426  case 0x408:
427  regNum = MISCREG_MC2_CTL;
428  break;
429  case 0x40C:
430  regNum = MISCREG_MC3_CTL;
431  break;
432  case 0x410:
433  regNum = MISCREG_MC4_CTL;
434  break;
435  case 0x414:
436  regNum = MISCREG_MC5_CTL;
437  break;
438  case 0x418:
439  regNum = MISCREG_MC6_CTL;
440  break;
441  case 0x41C:
442  regNum = MISCREG_MC7_CTL;
443  break;
444  case 0x401:
445  regNum = MISCREG_MC0_STATUS;
446  break;
447  case 0x405:
448  regNum = MISCREG_MC1_STATUS;
449  break;
450  case 0x409:
451  regNum = MISCREG_MC2_STATUS;
452  break;
453  case 0x40D:
454  regNum = MISCREG_MC3_STATUS;
455  break;
456  case 0x411:
457  regNum = MISCREG_MC4_STATUS;
458  break;
459  case 0x415:
460  regNum = MISCREG_MC5_STATUS;
461  break;
462  case 0x419:
463  regNum = MISCREG_MC6_STATUS;
464  break;
465  case 0x41D:
466  regNum = MISCREG_MC7_STATUS;
467  break;
468  case 0x402:
469  regNum = MISCREG_MC0_ADDR;
470  break;
471  case 0x406:
472  regNum = MISCREG_MC1_ADDR;
473  break;
474  case 0x40A:
475  regNum = MISCREG_MC2_ADDR;
476  break;
477  case 0x40E:
478  regNum = MISCREG_MC3_ADDR;
479  break;
480  case 0x412:
481  regNum = MISCREG_MC4_ADDR;
482  break;
483  case 0x416:
484  regNum = MISCREG_MC5_ADDR;
485  break;
486  case 0x41A:
487  regNum = MISCREG_MC6_ADDR;
488  break;
489  case 0x41E:
490  regNum = MISCREG_MC7_ADDR;
491  break;
492  case 0x403:
493  regNum = MISCREG_MC0_MISC;
494  break;
495  case 0x407:
496  regNum = MISCREG_MC1_MISC;
497  break;
498  case 0x40B:
499  regNum = MISCREG_MC2_MISC;
500  break;
501  case 0x40F:
502  regNum = MISCREG_MC3_MISC;
503  break;
504  case 0x413:
505  regNum = MISCREG_MC4_MISC;
506  break;
507  case 0x417:
508  regNum = MISCREG_MC5_MISC;
509  break;
510  case 0x41B:
511  regNum = MISCREG_MC6_MISC;
512  break;
513  case 0x41F:
514  regNum = MISCREG_MC7_MISC;
515  break;
516  case 0xC0000080:
517  regNum = MISCREG_EFER;
518  break;
519  case 0xC0000081:
520  regNum = MISCREG_STAR;
521  break;
522  case 0xC0000082:
523  regNum = MISCREG_LSTAR;
524  break;
525  case 0xC0000083:
526  regNum = MISCREG_CSTAR;
527  break;
528  case 0xC0000084:
529  regNum = MISCREG_SF_MASK;
530  break;
531  case 0xC0000100:
532  regNum = MISCREG_FS_BASE;
533  break;
534  case 0xC0000101:
535  regNum = MISCREG_GS_BASE;
536  break;
537  case 0xC0000102:
538  regNum = MISCREG_KERNEL_GS_BASE;
539  break;
540  case 0xC0000103:
541  regNum = MISCREG_TSC_AUX;
542  break;
543  case 0xC0010000:
544  regNum = MISCREG_PERF_EVT_SEL0;
545  break;
546  case 0xC0010001:
547  regNum = MISCREG_PERF_EVT_SEL1;
548  break;
549  case 0xC0010002:
550  regNum = MISCREG_PERF_EVT_SEL2;
551  break;
552  case 0xC0010003:
553  regNum = MISCREG_PERF_EVT_SEL3;
554  break;
555  case 0xC0010004:
556  regNum = MISCREG_PERF_EVT_CTR0;
557  break;
558  case 0xC0010005:
559  regNum = MISCREG_PERF_EVT_CTR1;
560  break;
561  case 0xC0010006:
562  regNum = MISCREG_PERF_EVT_CTR2;
563  break;
564  case 0xC0010007:
565  regNum = MISCREG_PERF_EVT_CTR3;
566  break;
567  case 0xC0010010:
568  regNum = MISCREG_SYSCFG;
569  break;
570  case 0xC0010016:
571  regNum = MISCREG_IORR_BASE0;
572  break;
573  case 0xC0010017:
574  regNum = MISCREG_IORR_BASE1;
575  break;
576  case 0xC0010018:
577  regNum = MISCREG_IORR_MASK0;
578  break;
579  case 0xC0010019:
580  regNum = MISCREG_IORR_MASK1;
581  break;
582  case 0xC001001A:
583  regNum = MISCREG_TOP_MEM;
584  break;
585  case 0xC001001D:
586  regNum = MISCREG_TOP_MEM2;
587  break;
588  case 0xC0010114:
589  regNum = MISCREG_VM_CR;
590  break;
591  case 0xC0010115:
592  regNum = MISCREG_IGNNE;
593  break;
594  case 0xC0010116:
595  regNum = MISCREG_SMM_CTL;
596  break;
597  case 0xC0010117:
598  regNum = MISCREG_VM_HSAVE_PA;
599  break;
600  default:
601  return std::make_shared<GeneralProtection>(0);
602  }
603  //The index is multiplied by the size of a MiscReg so that
604  //any memory dependence calculations will not see these as
605  //overlapping.
606  req->setPaddr(regNum * sizeof(RegVal));
607  return NoFault;
608  } else if (prefix == IntAddrPrefixIO) {
609  // TODO If CPL > IOPL or in virtual mode, check the I/O permission
610  // bitmap in the TSS.
611 
612  Addr IOPort = vaddr & ~IntAddrPrefixMask;
613  // Make sure the address fits in the expected 16 bit IO address
614  // space.
615  assert(!(IOPort & ~0xFFFF));
616 
617  if (IOPort == 0xCF8 && req->getSize() == 4) {
618  req->setFlags(Request::MMAPPED_IPR);
619  req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
620  } else if ((IOPort & ~mask(2)) == 0xCFC) {
621  req->setFlags(Request::UNCACHEABLE);
622 
625 
626  if (bits(configAddress, 31, 31)) {
627  req->setPaddr(PhysAddrPrefixPciConfig |
628  mbits(configAddress, 30, 2) |
629  (IOPort & mask(2)));
630  } else {
631  req->setPaddr(PhysAddrPrefixIO | IOPort);
632  }
633  } else {
634  req->setFlags(Request::UNCACHEABLE);
635  req->setPaddr(PhysAddrPrefixIO | IOPort);
636  }
637  return NoFault;
638  } else {
639  panic("Access to unrecognized internal address space %#x.\n",
640  prefix);
641  }
642  }
643 
651  bool
653  ThreadContext *tc, bool update_stats)
654  {
655  bool tlb_hit = false;
656  #ifndef NDEBUG
657  uint32_t flags = req->getFlags();
658  int seg = flags & SegmentFlagMask;
659  #endif
660 
661  assert(seg != SEGMENT_REG_MS);
662  Addr vaddr = req->getVaddr();
663  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
664  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
665 
666  if (m5Reg.prot) {
667  DPRINTF(GPUTLB, "In protected mode.\n");
668  // make sure we are in 64-bit mode
669  assert(m5Reg.mode == LongMode);
670 
671  // If paging is enabled, do the translation.
672  if (m5Reg.paging) {
673  DPRINTF(GPUTLB, "Paging enabled.\n");
674  //update LRU stack on a hit
675  TlbEntry *entry = lookup(vaddr, true);
676 
677  if (entry)
678  tlb_hit = true;
679 
680  if (!update_stats) {
681  // functional tlb access for memory initialization
682  // i.e., memory seeding or instr. seeding -> don't update
683  // TLB and stats
684  return tlb_hit;
685  }
686 
688 
689  if (!entry) {
691  } else {
692  localNumTLBHits++;
693  }
694  }
695  }
696 
697  return tlb_hit;
698  }
699 
700  Fault
702  Translation *translation, Mode mode,
703  bool &delayedResponse, bool timing, int &latency)
704  {
705  uint32_t flags = req->getFlags();
706  int seg = flags & SegmentFlagMask;
707  bool storeCheck = flags & (StoreCheck << FlagShift);
708 
709  // If this is true, we're dealing with a request
710  // to a non-memory address space.
711  if (seg == SEGMENT_REG_MS) {
712  return translateInt(req, tc);
713  }
714 
715  delayedResponse = false;
716  Addr vaddr = req->getVaddr();
717  DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
718 
719  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
720 
721  // If protected mode has been enabled...
722  if (m5Reg.prot) {
723  DPRINTF(GPUTLB, "In protected mode.\n");
724  // If we're not in 64-bit mode, do protection/limit checks
725  if (m5Reg.mode != LongMode) {
726  DPRINTF(GPUTLB, "Not in long mode. Checking segment "
727  "protection.\n");
728 
729  // Check for a null segment selector.
730  if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
731  seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
732  && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
733  return std::make_shared<GeneralProtection>(0);
734  }
735 
736  bool expandDown = false;
737  SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
738 
739  if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
740  if (!attr.writable && (mode == BaseTLB::Write ||
741  storeCheck))
742  return std::make_shared<GeneralProtection>(0);
743 
744  if (!attr.readable && mode == BaseTLB::Read)
745  return std::make_shared<GeneralProtection>(0);
746 
747  expandDown = attr.expandDown;
748 
749  }
750 
753  // This assumes we're not in 64 bit mode. If we were, the
754  // default address size is 64 bits, overridable to 32.
755  int size = 32;
756  bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
757  SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
758 
759  if ((csAttr.defaultSize && sizeOverride) ||
760  (!csAttr.defaultSize && !sizeOverride)) {
761  size = 16;
762  }
763 
764  Addr offset = bits(vaddr - base, size - 1, 0);
765  Addr endOffset = offset + req->getSize() - 1;
766 
767  if (expandDown) {
768  DPRINTF(GPUTLB, "Checking an expand down segment.\n");
769  warn_once("Expand down segments are untested.\n");
770 
771  if (offset <= limit || endOffset <= limit)
772  return std::make_shared<GeneralProtection>(0);
773  } else {
774  if (offset > limit || endOffset > limit)
775  return std::make_shared<GeneralProtection>(0);
776  }
777  }
778 
779  // If paging is enabled, do the translation.
780  if (m5Reg.paging) {
781  DPRINTF(GPUTLB, "Paging enabled.\n");
782  // The vaddr already has the segment base applied.
783  TlbEntry *entry = lookup(vaddr);
785 
786  if (!entry) {
788  if (timing) {
789  latency = missLatency1;
790  }
791 
792  if (FullSystem) {
793  fatal("GpuTLB doesn't support full-system mode\n");
794  } else {
795  DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
796  "at pc %#x.\n", vaddr, tc->instAddr());
797 
798  Process *p = tc->getProcessPtr();
799  const EmulationPageTable::Entry *pte =
800  p->pTable->lookup(vaddr);
801 
802  if (!pte && mode != BaseTLB::Execute) {
803  // penalize a "page fault" more
804  if (timing)
805  latency += missLatency2;
806 
807  if (p->fixupStackFault(vaddr))
808  pte = p->pTable->lookup(vaddr);
809  }
810 
811  if (!pte) {
812  return std::make_shared<PageFault>(vaddr, true,
813  mode, true,
814  false);
815  } else {
816  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
817 
818  DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
819  alignedVaddr, pte->paddr);
820 
821  TlbEntry gpuEntry(p->pid(), alignedVaddr,
822  pte->paddr, false, false);
823  entry = insert(alignedVaddr, gpuEntry);
824  }
825 
826  DPRINTF(GPUTLB, "Miss was serviced.\n");
827  }
828  } else {
829  localNumTLBHits++;
830 
831  if (timing) {
832  latency = hitLatency;
833  }
834  }
835 
836  // Do paging protection checks.
837  bool inUser = (m5Reg.cpl == 3 &&
838  !(flags & (CPL0FlagBit << FlagShift)));
839 
840  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
841  bool badWrite = (!entry->writable && (inUser || cr0.wp));
842 
843  if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
844  badWrite)) {
845  // The page must have been present to get into the TLB in
846  // the first place. We'll assume the reserved bits are
847  // fine even though we're not checking them.
848  return std::make_shared<PageFault>(vaddr, true, mode,
849  inUser, false);
850  }
851 
852  if (storeCheck && badWrite) {
853  // This would fault if this were a write, so return a page
854  // fault that reflects that happening.
855  return std::make_shared<PageFault>(vaddr, true,
857  inUser, false);
858  }
859 
860 
861  DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
862  "checks.\n", entry->paddr);
863 
864  int page_size = entry->size();
865  Addr paddr = entry->paddr | (vaddr & (page_size - 1));
866  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
867  req->setPaddr(paddr);
868 
869  if (entry->uncacheable)
870  req->setFlags(Request::UNCACHEABLE);
871  } else {
872  //Use the address which already has segmentation applied.
873  DPRINTF(GPUTLB, "Paging disabled.\n");
874  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
875  req->setPaddr(vaddr);
876  }
877  } else {
878  // Real mode
879  DPRINTF(GPUTLB, "In real mode.\n");
880  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
881  req->setPaddr(vaddr);
882  }
883 
884  // Check for an access to the local APIC
885  if (FullSystem) {
886  LocalApicBase localApicBase =
888 
889  Addr baseAddr = localApicBase.base * PageBytes;
890  Addr paddr = req->getPaddr();
891 
892  if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
893  // Force the access to be uncacheable.
894  req->setFlags(Request::UNCACHEABLE);
895  req->setPaddr(x86LocalAPICAddress(tc->contextId(),
896  paddr - baseAddr));
897  }
898  }
899 
900  return NoFault;
901  };
902 
903  Fault
905  Mode mode, int &latency)
906  {
907  bool delayedResponse;
908 
909  return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
910  latency);
911  }
912 
913  void
915  Translation *translation, Mode mode, int &latency)
916  {
917  bool delayedResponse;
918  assert(translation);
919 
920  Fault fault = GpuTLB::translate(req, tc, translation, mode,
921  delayedResponse, true, latency);
922 
923  if (!delayedResponse)
924  translation->finish(fault, req, tc, mode);
925  }
926 
927  Walker*
929  {
930  return walker;
931  }
932 
933 
934  void
936  {
937  }
938 
939  void
941  {
942  }
943 
944  void
946  {
948 
950  .name(name() + ".local_TLB_accesses")
951  .desc("Number of TLB accesses")
952  ;
953 
955  .name(name() + ".local_TLB_hits")
956  .desc("Number of TLB hits")
957  ;
958 
960  .name(name() + ".local_TLB_misses")
961  .desc("Number of TLB misses")
962  ;
963 
965  .name(name() + ".local_TLB_miss_rate")
966  .desc("TLB miss rate")
967  ;
968 
970  .name(name() + ".access_cycles")
971  .desc("Cycles spent accessing this TLB level")
972  ;
973 
975  .name(name() + ".page_table_cycles")
976  .desc("Cycles spent accessing the page table")
977  ;
978 
980 
982  .name(name() + ".unique_pages")
983  .desc("Number of unique pages touched")
984  ;
985 
987  .name(name() + ".local_cycles")
988  .desc("Number of cycles spent in queue for all incoming reqs")
989  ;
990 
992  .name(name() + ".local_latency")
993  .desc("Avg. latency over incoming coalesced reqs")
994  ;
995 
997 
999  .name(name() + ".global_TLB_accesses")
1000  .desc("Number of TLB accesses")
1001  ;
1002 
1004  .name(name() + ".global_TLB_hits")
1005  .desc("Number of TLB hits")
1006  ;
1007 
1009  .name(name() + ".global_TLB_misses")
1010  .desc("Number of TLB misses")
1011  ;
1012 
1014  .name(name() + ".global_TLB_miss_rate")
1015  .desc("TLB miss rate")
1016  ;
1017 
1019 
1021  .name(name() + ".avg_reuse_distance")
1022  .desc("avg. reuse distance over all pages (in ticks)")
1023  ;
1024 
1025  }
1026 
1032  void
1034  {
1035  assert(pkt);
1036  assert(pkt->senderState);
1037 
1038  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1040 
1041  TranslationState *sender_state =
1043 
1044  bool update_stats = !sender_state->prefetch;
1045  ThreadContext * tmp_tc = sender_state->tc;
1046 
1047  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1048  virt_page_addr);
1049 
1050  int req_cnt = sender_state->reqCnt.back();
1051 
1052  if (update_stats) {
1053  accessCycles -= (curTick() * req_cnt);
1054  localCycles -= curTick();
1055  updatePageFootprint(virt_page_addr);
1056  globalNumTLBAccesses += req_cnt;
1057  }
1058 
1059  tlbOutcome lookup_outcome = TLB_MISS;
1060  const RequestPtr &tmp_req = pkt->req;
1061 
1062  // Access the TLB and figure out if it's a hit or a miss.
1063  bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1064 
1065  if (success) {
1066  lookup_outcome = TLB_HIT;
1067  // Put the entry in SenderState
1068  TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1069  assert(entry);
1070 
1071  auto p = sender_state->tc->getProcessPtr();
1072  sender_state->tlbEntry =
1073  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1074  false, false);
1075 
1076  if (update_stats) {
1077  // the reqCnt has an entry per level, so its size tells us
1078  // which level we are in
1079  sender_state->hitLevel = sender_state->reqCnt.size();
1080  globalNumTLBHits += req_cnt;
1081  }
1082  } else {
1083  if (update_stats)
1084  globalNumTLBMisses += req_cnt;
1085  }
1086 
1087  /*
1088  * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1089  * as the TLB access latency.
1090  *
1091  * We create and schedule a new TLBEvent which will help us take the
1092  * appropriate actions (e.g., update TLB on a hit, send request to lower
1093  * level TLB on a miss, or start a page walk if this was the last-level
1094  * TLB)
1095  */
1096  TLBEvent *tlb_event =
1097  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1098 
1099  if (translationReturnEvent.count(virt_page_addr)) {
1100  panic("Virtual Page Address %#x already has a return event\n",
1101  virt_page_addr);
1102  }
1103 
1104  translationReturnEvent[virt_page_addr] = tlb_event;
1105  assert(tlb_event);
1106 
1107  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1108  curTick() + this->ticks(hitLatency));
1109 
1110  schedule(tlb_event, curTick() + this->ticks(hitLatency));
1111  }
1112 
1114  PacketPtr _pkt)
1115  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1116  outcome(tlb_outcome), pkt(_pkt)
1117  {
1118  }
1119 
1124  void
1126  TlbEntry * tlb_entry, Mode mode)
1127  {
1128  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1129  uint32_t flags = pkt->req->getFlags();
1130  bool storeCheck = flags & (StoreCheck << FlagShift);
1131 
1132  // Do paging protection checks.
1133  bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1134  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1135 
1136  bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1137 
1138  if ((inUser && !tlb_entry->user) ||
1139  (mode == BaseTLB::Write && badWrite)) {
1140  // The page must have been present to get into the TLB in
1141  // the first place. We'll assume the reserved bits are
1142  // fine even though we're not checking them.
1143  panic("Page fault detected");
1144  }
1145 
1146  if (storeCheck && badWrite) {
1147  // This would fault if this were a write, so return a page
1148  // fault that reflects that happening.
1149  panic("Page fault detected");
1150  }
1151  }
1152 
1158  void
1159  GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1160  PacketPtr pkt)
1161  {
1162 
1163  assert(pkt);
1164  Addr vaddr = pkt->req->getVaddr();
1165 
1166  TranslationState *sender_state =
1168 
1169  ThreadContext *tc = sender_state->tc;
1170  Mode mode = sender_state->tlbMode;
1171 
1172  TlbEntry *local_entry, *new_entry;
1173 
1174  if (tlb_outcome == TLB_HIT) {
1175  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1176  local_entry = sender_state->tlbEntry;
1177  } else {
1178  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1179  vaddr);
1180 
1181  // We are returning either from a page walk or from a hit at a lower
1182  // TLB level. The senderState should be "carrying" a pointer to the
1183  // correct TLBEntry.
1184  new_entry = sender_state->tlbEntry;
1185  assert(new_entry);
1186  local_entry = new_entry;
1187 
1188  if (allocationPolicy) {
1189  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1190  virt_page_addr);
1191 
1192  local_entry = insert(virt_page_addr, *new_entry);
1193  }
1194 
1195  assert(local_entry);
1196  }
1197 
1203  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1204  "while paddr was %#x.\n", local_entry->vaddr,
1205  local_entry->paddr);
1206 
1207  pagingProtectionChecks(tc, pkt, local_entry, mode);
1208  int page_size = local_entry->size();
1209  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1210  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1211 
1212  // Since this packet will be sent through the cpu side slave port,
1213  // it must be converted to a response pkt if it is not one already
1214  if (pkt->isRequest()) {
1215  pkt->makeTimingResponse();
1216  }
1217 
1218  pkt->req->setPaddr(paddr);
1219 
1220  if (local_entry->uncacheable) {
1221  pkt->req->setFlags(Request::UNCACHEABLE);
1222  }
1223 
1224  //send packet back to coalescer
1225  cpuSidePort[0]->sendTimingResp(pkt);
1226  //schedule cleanup event
1227  cleanupQueue.push(virt_page_addr);
1228 
1229  // schedule this only once per cycle.
1230  // The check is required because we might have multiple translations
1231  // returning the same cycle
1232  // this is a maximum priority event and must be on the same cycle
1233  // as the cleanup event in TLBCoalescer to avoid a race with
1234  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1235  if (!cleanupEvent.scheduled())
1237  }
1238 
1243  void
1245  PacketPtr pkt)
1246  {
1247  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1248 
1249  assert(translationReturnEvent[virtPageAddr]);
1250  assert(pkt);
1251 
1252  TranslationState *tmp_sender_state =
1254 
1255  int req_cnt = tmp_sender_state->reqCnt.back();
1256  bool update_stats = !tmp_sender_state->prefetch;
1257 
1258 
1259  if (outcome == TLB_HIT) {
1260  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1261 
1262  if (update_stats) {
1263  accessCycles += (req_cnt * curTick());
1264  localCycles += curTick();
1265  }
1266 
1267  } else if (outcome == TLB_MISS) {
1268 
1269  DPRINTF(GPUTLB, "This is a TLB miss\n");
1270  if (update_stats) {
1271  accessCycles += (req_cnt*curTick());
1272  localCycles += curTick();
1273  }
1274 
1275  if (hasMemSidePort) {
1276  // the one cyle added here represent the delay from when we get
1277  // the reply back till when we propagate it to the coalescer
1278  // above.
1279  if (update_stats) {
1280  accessCycles += (req_cnt * 1);
1281  localCycles += 1;
1282  }
1283 
1289  if (!memSidePort[0]->sendTimingReq(pkt)) {
1290  DPRINTF(GPUTLB, "Failed sending translation request to "
1291  "lower level TLB for addr %#x\n", virtPageAddr);
1292 
1293  memSidePort[0]->retries.push_back(pkt);
1294  } else {
1295  DPRINTF(GPUTLB, "Sent translation request to lower level "
1296  "TLB for addr %#x\n", virtPageAddr);
1297  }
1298  } else {
1299  //this is the last level TLB. Start a page walk
1300  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1301  "addr %#x\n", virtPageAddr);
1302 
1303  if (update_stats)
1304  pageTableCycles -= (req_cnt*curTick());
1305 
1307  assert(tlb_event);
1308  tlb_event->updateOutcome(PAGE_WALK);
1309  schedule(tlb_event, curTick() + ticks(missLatency2));
1310  }
1311  } else if (outcome == PAGE_WALK) {
1312  if (update_stats)
1313  pageTableCycles += (req_cnt*curTick());
1314 
1315  // Need to access the page table and update the TLB
1316  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1317  virtPageAddr);
1318 
1319  TranslationState *sender_state =
1321 
1322  Process *p = sender_state->tc->getProcessPtr();
1323  Addr vaddr = pkt->req->getVaddr();
1324  #ifndef NDEBUG
1325  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1326  assert(alignedVaddr == virtPageAddr);
1327  #endif
1328  const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1329  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1330  p->fixupStackFault(vaddr)) {
1331  pte = p->pTable->lookup(vaddr);
1332  }
1333 
1334  if (pte) {
1335  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1336  pte->paddr);
1337 
1338  sender_state->tlbEntry =
1339  new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1340  false);
1341  } else {
1342  sender_state->tlbEntry = nullptr;
1343  }
1344 
1345  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1346  } else if (outcome == MISS_RETURN) {
1350  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1351  } else {
1352  panic("Unexpected TLB outcome %d", outcome);
1353  }
1354  }
1355 
1356  void
1358  {
1360  }
1361 
1362  const char*
1364  {
1365  return "trigger translationDoneEvent";
1366  }
1367 
1368  void
1370  {
1371  outcome = _outcome;
1372  }
1373 
1374  Addr
1376  {
1377  return virtPageAddr;
1378  }
1379 
1380  /*
1381  * recvTiming receives a coalesced timing request from a TLBCoalescer
1382  * and it calls issueTLBLookup()
1383  * It only rejects the packet if we have exceeded the max
1384  * outstanding number of requests for the TLB
1385  */
1386  bool
1388  {
1390  tlb->issueTLBLookup(pkt);
1391  // update number of outstanding translation requests
1392  tlb->outstandingReqs++;
1393  return true;
1394  } else {
1395  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1396  tlb->outstandingReqs);
1397  return false;
1398  }
1399  }
1400 
1409  void
1411  {
1412  TranslationState *sender_state =
1414 
1415  ThreadContext *tc = sender_state->tc;
1416  Mode mode = sender_state->tlbMode;
1417  Addr vaddr = pkt->req->getVaddr();
1418 
1419  TlbEntry *local_entry, *new_entry;
1420 
1421  if (tlb_outcome == TLB_HIT) {
1422  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1423  "%#x\n", vaddr);
1424 
1425  local_entry = sender_state->tlbEntry;
1426  } else {
1427  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1428  "%#x\n", vaddr);
1429 
1430  // We are returning either from a page walk or from a hit at a lower
1431  // TLB level. The senderState should be "carrying" a pointer to the
1432  // correct TLBEntry.
1433  new_entry = sender_state->tlbEntry;
1434  assert(new_entry);
1435  local_entry = new_entry;
1436 
1437  if (allocationPolicy) {
1438  Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1439 
1440  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1441  virt_page_addr);
1442 
1443  local_entry = insert(virt_page_addr, *new_entry);
1444  }
1445 
1446  assert(local_entry);
1447  }
1448 
1449  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1450  "while paddr was %#x.\n", local_entry->vaddr,
1451  local_entry->paddr);
1452 
1464  if (!sender_state->prefetch && sender_state->tlbEntry)
1465  pagingProtectionChecks(tc, pkt, local_entry, mode);
1466 
1467  int page_size = local_entry->size();
1468  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1469  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1470 
1471  pkt->req->setPaddr(paddr);
1472 
1473  if (local_entry->uncacheable)
1474  pkt->req->setFlags(Request::UNCACHEABLE);
1475  }
1476 
1477  // This is used for atomic translations. Need to
1478  // make it all happen during the same cycle.
1479  void
1481  {
1482  TranslationState *sender_state =
1484 
1485  ThreadContext *tc = sender_state->tc;
1486  bool update_stats = !sender_state->prefetch;
1487 
1488  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1490 
1491  if (update_stats)
1492  tlb->updatePageFootprint(virt_page_addr);
1493 
1494  // do the TLB lookup without updating the stats
1495  bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1496  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1497 
1498  // functional mode means no coalescing
1499  // global metrics are the same as the local metrics
1500  if (update_stats) {
1502 
1503  if (success) {
1504  sender_state->hitLevel = sender_state->reqCnt.size();
1505  tlb->globalNumTLBHits++;
1506  }
1507  }
1508 
1509  if (!success) {
1510  if (update_stats)
1512  if (tlb->hasMemSidePort) {
1513  // there is a TLB below -> propagate down the TLB hierarchy
1514  tlb->memSidePort[0]->sendFunctional(pkt);
1515  // If no valid translation from a prefetch, then just return
1516  if (sender_state->prefetch && !pkt->req->hasPaddr())
1517  return;
1518  } else {
1519  // Need to access the page table and update the TLB
1520  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1521  virt_page_addr);
1522 
1523  Process *p = tc->getProcessPtr();
1524 
1525  Addr vaddr = pkt->req->getVaddr();
1526  #ifndef NDEBUG
1527  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1528  assert(alignedVaddr == virt_page_addr);
1529  #endif
1530 
1531  const EmulationPageTable::Entry *pte =
1532  p->pTable->lookup(vaddr);
1533  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1534  p->fixupStackFault(vaddr)) {
1535  pte = p->pTable->lookup(vaddr);
1536  }
1537 
1538  if (!sender_state->prefetch) {
1539  // no PageFaults are permitted after
1540  // the second page table lookup
1541  assert(pte);
1542 
1543  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1544  pte->paddr);
1545 
1546  sender_state->tlbEntry =
1547  new TlbEntry(p->pid(), virt_page_addr,
1548  pte->paddr, false, false);
1549  } else {
1550  // If this was a prefetch, then do the normal thing if it
1551  // was a successful translation. Otherwise, send an empty
1552  // TLB entry back so that it can be figured out as empty and
1553  // handled accordingly.
1554  if (pte) {
1555  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1556  pte->paddr);
1557 
1558  sender_state->tlbEntry =
1559  new TlbEntry(p->pid(), virt_page_addr,
1560  pte->paddr, false, false);
1561  } else {
1562  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1563  alignedVaddr);
1564 
1565  sender_state->tlbEntry = nullptr;
1566 
1567  return;
1568  }
1569  }
1570  }
1571  } else {
1572  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1573  tlb->lookup(pkt->req->getVaddr()));
1574 
1575  TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1576  update_stats);
1577 
1578  assert(entry);
1579 
1580  auto p = sender_state->tc->getProcessPtr();
1581  sender_state->tlbEntry =
1582  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1583  false, false);
1584  }
1585  // This is the function that would populate pkt->req with the paddr of
1586  // the translation. But if no translation happens (i.e Prefetch fails)
1587  // then the early returns in the above code wiill keep this function
1588  // from executing.
1589  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1590  }
1591 
1592  void
1594  {
1595  // The CPUSidePort never sends anything but replies. No retries
1596  // expected.
1597  panic("recvReqRetry called");
1598  }
1599 
1602  {
1603  // currently not checked by the master
1604  AddrRangeList ranges;
1605 
1606  return ranges;
1607  }
1608 
1614  bool
1616  {
1617  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1619 
1620  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1621  virt_page_addr);
1622 
1623  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1624  assert(tlb_event);
1625  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1626 
1627  tlb_event->updateOutcome(MISS_RETURN);
1628  tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1629 
1630  return true;
1631  }
1632 
1633  void
1635  {
1636  // No retries should reach the TLB. The retries
1637  // should only reach the TLBCoalescer.
1638  panic("recvReqRetry called");
1639  }
1640 
1641  void
1643  {
1644  while (!cleanupQueue.empty()) {
1645  Addr cleanup_addr = cleanupQueue.front();
1646  cleanupQueue.pop();
1647 
1648  // delete TLBEvent
1649  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1650  delete old_tlb_event;
1651  translationReturnEvent.erase(cleanup_addr);
1652 
1653  // update number of outstanding requests
1654  outstandingReqs--;
1655  }
1656 
1660  for (int i = 0; i < cpuSidePort.size(); ++i) {
1661  cpuSidePort[i]->sendRetryReq();
1662  }
1663  }
1664 
1665  void
1667  {
1668 
1670 
1671  AccessInfo tmp_access_info;
1672  tmp_access_info.lastTimeAccessed = 0;
1673  tmp_access_info.accessesPerPage = 0;
1674  tmp_access_info.totalReuseDistance = 0;
1675  tmp_access_info.sumDistance = 0;
1676  tmp_access_info.meanDistance = 0;
1677 
1678  ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1679  tmp_access_info));
1680 
1681  bool first_page_access = ret.second;
1682 
1683  if (first_page_access) {
1684  numUniquePages++;
1685  } else {
1686  int accessed_before;
1687  accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1688  ret.first->second.totalReuseDistance += accessed_before;
1689  }
1690 
1691  ret.first->second.accessesPerPage++;
1692  ret.first->second.lastTimeAccessed = curTick();
1693 
1694  if (accessDistance) {
1695  ret.first->second.localTLBAccesses
1696  .push_back(localNumTLBAccesses.value());
1697  }
1698  }
1699 
1700  void
1702  {
1703  std::ostream *page_stat_file = nullptr;
1704 
1705  if (accessDistance) {
1706 
1707  // print per page statistics to a separate file (.csv format)
1708  // simout is the gem5 output directory (default is m5out or the one
1709  // specified with -d
1710  page_stat_file = simout.create(name().c_str())->stream();
1711 
1712  // print header
1713  *page_stat_file << "page,max_access_distance,mean_access_distance, "
1714  << "stddev_distance" << std::endl;
1715  }
1716 
1717  // update avg. reuse distance footprint
1718  AccessPatternTable::iterator iter, iter_begin, iter_end;
1719  unsigned int sum_avg_reuse_distance_per_page = 0;
1720 
1721  // iterate through all pages seen by this TLB
1722  for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1723  sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724  iter->second.accessesPerPage;
1725 
1726  if (accessDistance) {
1727  unsigned int tmp = iter->second.localTLBAccesses[0];
1728  unsigned int prev = tmp;
1729 
1730  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1731  if (i) {
1732  tmp = prev + 1;
1733  }
1734 
1735  prev = iter->second.localTLBAccesses[i];
1736  // update the localTLBAccesses value
1737  // with the actual differece
1738  iter->second.localTLBAccesses[i] -= tmp;
1739  // compute the sum of AccessDistance per page
1740  // used later for mean
1741  iter->second.sumDistance +=
1742  iter->second.localTLBAccesses[i];
1743  }
1744 
1745  iter->second.meanDistance =
1746  iter->second.sumDistance / iter->second.accessesPerPage;
1747 
1748  // compute std_dev and max (we need a second round because we
1749  // need to know the mean value
1750  unsigned int max_distance = 0;
1751  unsigned int stddev_distance = 0;
1752 
1753  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1754  unsigned int tmp_access_distance =
1755  iter->second.localTLBAccesses[i];
1756 
1757  if (tmp_access_distance > max_distance) {
1758  max_distance = tmp_access_distance;
1759  }
1760 
1761  unsigned int diff =
1762  tmp_access_distance - iter->second.meanDistance;
1763  stddev_distance += pow(diff, 2);
1764 
1765  }
1766 
1767  stddev_distance =
1768  sqrt(stddev_distance/iter->second.accessesPerPage);
1769 
1770  if (page_stat_file) {
1771  *page_stat_file << std::hex << iter->first << ",";
1772  *page_stat_file << std::dec << max_distance << ",";
1773  *page_stat_file << std::dec << iter->second.meanDistance
1774  << ",";
1775  *page_stat_file << std::dec << stddev_distance;
1776  *page_stat_file << std::endl;
1777  }
1778 
1779  // erase the localTLBAccesses array
1780  iter->second.localTLBAccesses.clear();
1781  }
1782  }
1783 
1784  if (!TLBFootprint.empty()) {
1786  sum_avg_reuse_distance_per_page / TLBFootprint.size();
1787  }
1788 
1789  //clear the TLBFootprint map
1790  TLBFootprint.clear();
1791  }
1792 } // namespace X86ISA
1793 
1795 X86GPUTLBParams::create()
1796 {
1797  return new X86ISA::GpuTLB(this);
1798 }
1799 
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:443
#define DPRINTF(x,...)
Definition: trace.hh:229
unsigned int accessesPerPage
Definition: gpu_tlb.hh:425
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:73
offset
Definition: misc.hh:1026
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:205
The request is to an uncacheable address.
Definition: request.hh:115
Ports are used to interface objects to each other.
Definition: port.hh:60
OutputDirectory simout
Definition: output.cc:65
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:149
Stats::Scalar localCycles
Definition: gpu_tlb.hh:213
const int FlagShift
Definition: ldstflags.hh:52
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
Bitfield< 7 > i
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: gpu_tlb.cc:935
STL pair class.
Definition: stl.hh:61
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:206
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:330
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:218
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:1244
void makeTimingResponse()
Definition: packet.hh:955
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1410
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:208
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:197
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:1125
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void invalidateAll()
Definition: gpu_tlb.cc:226
const Addr PageShift
Definition: isa_traits.hh:46
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
unsigned int meanDistance
Definition: gpu_tlb.hh:439
void exitCallback()
Definition: gpu_tlb.cc:1701
Walker * getWalker()
Definition: gpu_tlb.cc:928
virtual const std::string name() const
Definition: eventq.cc:86
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:183
std::vector< TlbEntry > tlb
Definition: gpu_tlb.hh:162
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:904
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:136
virtual Process * getProcessPtr()=0
uint64_t RegVal
Definition: types.hh:168
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:286
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:66
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:154
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:427
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:701
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: gpu_tlb.cc:135
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:68
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:424
const Addr PageBytes
Definition: isa_traits.hh:53
Bitfield< 14 > expandDown
Definition: misc.hh:998
Definition: cprintf.cc:42
unsigned int sumDistance
Definition: gpu_tlb.hh:438
Bitfield< 4, 0 > mode
Fault translateInt(const RequestPtr &req, ThreadContext *tc)
Definition: gpu_tlb.cc:277
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:196
int maxCoalescedReqs
Definition: gpu_tlb.hh:366
ThreadContext is the external interface to all thread state for anything outside of the CPU...
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:98
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: gpu_tlb.cc:157
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:64
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:67
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
Walker * walker
Definition: gpu_tlb.hh:126
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: gpu_tlb.cc:1113
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: gpu_tlb.cc:1634
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:211
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:204
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:409
bool isRequest() const
Definition: packet.hh:531
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:309
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:160
Addr pageAlign(Addr a)
Definition: page_table.hh:107
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:914
Flags flags
Definition: eventq.hh:211
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:162
EventFunctionWrapper exitEvent
Definition: gpu_tlb.hh:448
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Definition: misc.hh:535
Stats::Formula localLatency
Definition: gpu_tlb.hh:215
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
Definition: misc.hh:528
int outstandingReqs
Definition: gpu_tlb.hh:370
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it&#39;s used to guide replacement decisions...
Definition: gpu_tlb.hh:178
void invalidateNonGlobal()
Definition: gpu_tlb.cc:246
uint64_t pid()
Definition: process.hh:91
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:240
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: gpu_tlb.cc:1615
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
mask
Definition: misc.hh:798
EventFunctionWrapper cleanupEvent
Definition: gpu_tlb.hh:415
Bitfield< 51, 12 > base
Definition: pagetable.hh:142
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:422
X86GPUTLBParams Params
Definition: gpu_tlb.hh:94
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:264
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: gpu_tlb.cc:1480
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:405
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:69
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:185
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:1159
This request is to a memory mapped register.
Definition: request.hh:127
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:307
virtual Addr instAddr() const =0
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:213
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:202
void updateOutcome(tlbOutcome _outcome)
Definition: gpu_tlb.cc:1369
static MiscRegIndex MISCREG_SEG_SEL(int index)
Definition: misc.hh:507
Bitfield< 2, 0 > seg
Definition: types.hh:84
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:169
virtual const std::string name() const
Definition: sim_object.hh:120
const Request::FlagsType M5_VAR_USED SegmentFlagMask
Definition: ldstflags.hh:51
T safe_cast(U ptr)
Definition: cast.hh:61
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
Bitfield< 8 > va
#define warn_once(...)
Definition: logging.hh:216
const Addr PageBytes
Definition: isa_traits.hh:47
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:70
void regStats() override
Callback to set stat parameters.
Definition: gpu_tlb.cc:945
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: gpu_tlb.cc:940
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
bool fixupStackFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
Definition: process.cc:362
BitfieldType< SegDescriptorLimit > limit
Definition: misc.hh:926
uint32_t configAddress
Definition: gpu_tlb.hh:72
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
Definition: gpu_tlb.cc:1387
EmulationPageTable * pTable
Definition: process.hh:181
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
Declarations of a non-full system Page Table.
static MiscRegIndex MISCREG_SEG_BASE(int index)
Definition: misc.hh:514
std::ostream CheckpointOut
Definition: serialize.hh:68
This is exposed globally, independent of the ISA.
Definition: acpi.hh:57
const char * description() const
Return a C string describing the event.
Definition: gpu_tlb.cc:1363
int missLatency2
Definition: gpu_tlb.hh:190
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:480
Definition: eventq.hh:189
void cleanup()
Definition: gpu_tlb.cc:1642
int missLatency1
Definition: gpu_tlb.hh:189
virtual ContextID contextId() const =0
const Entry * lookup(Addr vaddr)
Lookup function.
Definition: page_table.cc:134
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:210
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:86
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:195
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:72
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
Bitfield< 0 > p
Definition: pagetable.hh:152
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: gpu_tlb.cc:1601
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:63
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:96
T bits(T val, int first, int last)
Extract the bitfield from position &#39;first&#39; to &#39;last&#39; (inclusive) from &#39;val&#39; and right justify it...
Definition: bitfield.hh:72
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:142
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:93
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:194
Bitfield< 1 > x
Definition: types.hh:105
virtual void recvReqRetry()
Definition: gpu_tlb.cc:1593
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:203
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
Bitfield< 3 > addr
Definition: types.hh:81
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:703
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition: gpu_tlb.hh:390
std::vector< int > reqCnt
Definition: gpu_tlb.hh:352
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:1033
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:652
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:179
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:156
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1666

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13