gem5  v20.0.0.3
gpu_tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Lisa Hsu
34  */
35 
36 #include "gpu-compute/gpu_tlb.hh"
37 
38 #include <cmath>
39 #include <cstring>
40 
41 #include "arch/x86/faults.hh"
43 #include "arch/x86/pagetable.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/regs/msr.hh"
47 #include "arch/x86/x86_traits.hh"
48 #include "base/bitfield.hh"
49 #include "base/logging.hh"
50 #include "base/output.hh"
51 #include "base/trace.hh"
52 #include "cpu/base.hh"
53 #include "cpu/thread_context.hh"
54 #include "debug/GPUPrefetch.hh"
55 #include "debug/GPUTLB.hh"
56 #include "mem/packet_access.hh"
57 #include "mem/page_table.hh"
58 #include "mem/request.hh"
59 #include "sim/process.hh"
60 #include "sim/pseudo_inst.hh"
61 
62 namespace X86ISA
63 {
64 
66  : ClockedObject(p), configAddress(0), size(p->size),
67  cleanupEvent([this]{ cleanup(); }, name(), false,
69  exitEvent([this]{ exitCallback(); }, name())
70  {
71  assoc = p->assoc;
72  assert(assoc <= size);
73  numSets = size/assoc;
74  allocationPolicy = p->allocationPolicy;
75  hasMemSidePort = false;
76  accessDistance = p->accessDistance;
77  clock = p->clk_domain->clockPeriod();
78 
79  tlb.assign(size, TlbEntry());
80 
81  freeList.resize(numSets);
82  entryList.resize(numSets);
83 
84  for (int set = 0; set < numSets; ++set) {
85  for (int way = 0; way < assoc; ++way) {
86  int x = set * assoc + way;
87  freeList[set].push_back(&tlb.at(x));
88  }
89  }
90 
91  FA = (size == assoc);
92 
101  setMask = numSets - 1;
102 
103  maxCoalescedReqs = p->maxOutstandingReqs;
104 
105  // Do not allow maxCoalescedReqs to be more than the TLB associativity
106  if (maxCoalescedReqs > assoc) {
108  cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
109  }
110 
111  outstandingReqs = 0;
112  hitLatency = p->hitLatency;
113  missLatency1 = p->missLatency1;
114  missLatency2 = p->missLatency2;
115 
116  // create the slave ports based on the number of connected ports
117  for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
118  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
119  name(), i), this, i));
120  }
121 
122  // create the master ports based on the number of connected ports
123  for (size_t i = 0; i < p->port_master_connection_count; ++i) {
124  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
125  name(), i), this, i));
126  }
127  }
128 
129  // fixme: this is never called?
131  {
132  // make sure all the hash-maps are empty
133  assert(translationReturnEvent.empty());
134  }
135 
136  Port &
137  GpuTLB::getPort(const std::string &if_name, PortID idx)
138  {
139  if (if_name == "slave") {
140  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
141  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
142  }
143 
144  return *cpuSidePort[idx];
145  } else if (if_name == "master") {
146  if (idx >= static_cast<PortID>(memSidePort.size())) {
147  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
148  }
149 
150  hasMemSidePort = true;
151 
152  return *memSidePort[idx];
153  } else {
154  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
155  }
156  }
157 
158  TlbEntry*
160  {
161  TlbEntry *newEntry = nullptr;
162 
167  int set = (vpn >> TheISA::PageShift) & setMask;
168 
169  if (!freeList[set].empty()) {
170  newEntry = freeList[set].front();
171  freeList[set].pop_front();
172  } else {
173  newEntry = entryList[set].back();
174  entryList[set].pop_back();
175  }
176 
177  *newEntry = entry;
178  newEntry->vaddr = vpn;
179  entryList[set].push_front(newEntry);
180 
181  return newEntry;
182  }
183 
184  GpuTLB::EntryList::iterator
185  GpuTLB::lookupIt(Addr va, bool update_lru)
186  {
187  int set = (va >> TheISA::PageShift) & setMask;
188 
189  if (FA) {
190  assert(!set);
191  }
192 
193  auto entry = entryList[set].begin();
194  for (; entry != entryList[set].end(); ++entry) {
195  int page_size = (*entry)->size();
196 
197  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
198  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
199  "with size %#x.\n", va, (*entry)->vaddr, page_size);
200 
201  if (update_lru) {
202  entryList[set].push_front(*entry);
203  entryList[set].erase(entry);
204  entry = entryList[set].begin();
205  }
206 
207  break;
208  }
209  }
210 
211  return entry;
212  }
213 
214  TlbEntry*
215  GpuTLB::lookup(Addr va, bool update_lru)
216  {
217  int set = (va >> TheISA::PageShift) & setMask;
218 
219  auto entry = lookupIt(va, update_lru);
220 
221  if (entry == entryList[set].end())
222  return nullptr;
223  else
224  return *entry;
225  }
226 
227  void
229  {
230  DPRINTF(GPUTLB, "Invalidating all entries.\n");
231 
232  for (int i = 0; i < numSets; ++i) {
233  while (!entryList[i].empty()) {
234  TlbEntry *entry = entryList[i].front();
235  entryList[i].pop_front();
236  freeList[i].push_back(entry);
237  }
238  }
239  }
240 
241  void
243  {
245  }
246 
247  void
249  {
250  DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
251 
252  for (int i = 0; i < numSets; ++i) {
253  for (auto entryIt = entryList[i].begin();
254  entryIt != entryList[i].end();) {
255  if (!(*entryIt)->global) {
256  freeList[i].push_back(*entryIt);
257  entryList[i].erase(entryIt++);
258  } else {
259  ++entryIt;
260  }
261  }
262  }
263  }
264 
265  void
266  GpuTLB::demapPage(Addr va, uint64_t asn)
267  {
268 
269  int set = (va >> TheISA::PageShift) & setMask;
270  auto entry = lookupIt(va, false);
271 
272  if (entry != entryList[set].end()) {
273  freeList[set].push_back(*entry);
274  entryList[set].erase(entry);
275  }
276  }
277 
278 
279 
280  namespace
281  {
282 
283  Cycles
284  localMiscRegAccess(bool read, MiscRegIndex regNum,
285  ThreadContext *tc, PacketPtr pkt)
286  {
287  if (read) {
288  RegVal data = htole(tc->readMiscReg(regNum));
289  // Make sure we don't trot off the end of data.
290  pkt->setData((uint8_t *)&data);
291  } else {
292  RegVal data = htole(tc->readMiscRegNoEffect(regNum));
293  tc->setMiscReg(regNum, letoh(data));
294  }
295  return Cycles(1);
296  }
297 
298  } // anonymous namespace
299 
300  Fault
301  GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
302  {
303  DPRINTF(GPUTLB, "Addresses references internal memory.\n");
304  Addr vaddr = req->getVaddr();
305  Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
306 
307  if (prefix == IntAddrPrefixCPUID) {
308  panic("CPUID memory space not yet implemented!\n");
309  } else if (prefix == IntAddrPrefixMSR) {
310  vaddr = (vaddr >> 3) & ~IntAddrPrefixMask;
311 
312  MiscRegIndex regNum;
313  if (!msrAddrToIndex(regNum, vaddr))
314  return std::make_shared<GeneralProtection>(0);
315 
316  req->setLocalAccessor(
317  [read,regNum](ThreadContext *tc, PacketPtr pkt)
318  {
319  return localMiscRegAccess(read, regNum, tc, pkt);
320  }
321  );
322 
323  return NoFault;
324  } else if (prefix == IntAddrPrefixIO) {
325  // TODO If CPL > IOPL or in virtual mode, check the I/O permission
326  // bitmap in the TSS.
327 
328  Addr IOPort = vaddr & ~IntAddrPrefixMask;
329  // Make sure the address fits in the expected 16 bit IO address
330  // space.
331  assert(!(IOPort & ~0xFFFF));
332  if (IOPort == 0xCF8 && req->getSize() == 4) {
333  req->setLocalAccessor(
334  [read](ThreadContext *tc, PacketPtr pkt)
335  {
336  return localMiscRegAccess(
337  read, MISCREG_PCI_CONFIG_ADDRESS, tc, pkt);
338  }
339  );
340  } else if ((IOPort & ~mask(2)) == 0xCFC) {
344  if (bits(configAddress, 31, 31)) {
345  req->setPaddr(PhysAddrPrefixPciConfig |
346  mbits(configAddress, 30, 2) |
347  (IOPort & mask(2)));
348  } else {
349  req->setPaddr(PhysAddrPrefixIO | IOPort);
350  }
351  } else {
353  req->setPaddr(PhysAddrPrefixIO | IOPort);
354  }
355  return NoFault;
356  } else {
357  panic("Access to unrecognized internal address space %#x.\n",
358  prefix);
359  }
360  }
361 
369  bool
371  ThreadContext *tc, bool update_stats)
372  {
373  bool tlb_hit = false;
374  #ifndef NDEBUG
375  uint32_t flags = req->getFlags();
376  int seg = flags & SegmentFlagMask;
377  #endif
378 
379  assert(seg != SEGMENT_REG_MS);
380  Addr vaddr = req->getVaddr();
381  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
382  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
383 
384  if (m5Reg.prot) {
385  DPRINTF(GPUTLB, "In protected mode.\n");
386  // make sure we are in 64-bit mode
387  assert(m5Reg.mode == LongMode);
388 
389  // If paging is enabled, do the translation.
390  if (m5Reg.paging) {
391  DPRINTF(GPUTLB, "Paging enabled.\n");
392  //update LRU stack on a hit
393  TlbEntry *entry = lookup(vaddr, true);
394 
395  if (entry)
396  tlb_hit = true;
397 
398  if (!update_stats) {
399  // functional tlb access for memory initialization
400  // i.e., memory seeding or instr. seeding -> don't update
401  // TLB and stats
402  return tlb_hit;
403  }
404 
406 
407  if (!entry) {
409  } else {
410  localNumTLBHits++;
411  }
412  }
413  }
414 
415  return tlb_hit;
416  }
417 
418  Fault
420  Translation *translation, Mode mode,
421  bool &delayedResponse, bool timing, int &latency)
422  {
423  uint32_t flags = req->getFlags();
424  int seg = flags & SegmentFlagMask;
425  bool storeCheck = flags & (StoreCheck << FlagShift);
426 
427  // If this is true, we're dealing with a request
428  // to a non-memory address space.
429  if (seg == SEGMENT_REG_MS) {
430  return translateInt(mode == Mode::Read, req, tc);
431  }
432 
433  delayedResponse = false;
434  Addr vaddr = req->getVaddr();
435  DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
436 
437  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
438 
439  // If protected mode has been enabled...
440  if (m5Reg.prot) {
441  DPRINTF(GPUTLB, "In protected mode.\n");
442  // If we're not in 64-bit mode, do protection/limit checks
443  if (m5Reg.mode != LongMode) {
444  DPRINTF(GPUTLB, "Not in long mode. Checking segment "
445  "protection.\n");
446 
447  // Check for a null segment selector.
448  if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
449  seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
450  && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
451  return std::make_shared<GeneralProtection>(0);
452  }
453 
454  bool expandDown = false;
455  SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
456 
457  if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
458  if (!attr.writable && (mode == BaseTLB::Write ||
459  storeCheck))
460  return std::make_shared<GeneralProtection>(0);
461 
462  if (!attr.readable && mode == BaseTLB::Read)
463  return std::make_shared<GeneralProtection>(0);
464 
465  expandDown = attr.expandDown;
466 
467  }
468 
471  // This assumes we're not in 64 bit mode. If we were, the
472  // default address size is 64 bits, overridable to 32.
473  int size = 32;
474  bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
475  SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
476 
477  if ((csAttr.defaultSize && sizeOverride) ||
478  (!csAttr.defaultSize && !sizeOverride)) {
479  size = 16;
480  }
481 
482  Addr offset = bits(vaddr - base, size - 1, 0);
483  Addr endOffset = offset + req->getSize() - 1;
484 
485  if (expandDown) {
486  DPRINTF(GPUTLB, "Checking an expand down segment.\n");
487  warn_once("Expand down segments are untested.\n");
488 
489  if (offset <= limit || endOffset <= limit)
490  return std::make_shared<GeneralProtection>(0);
491  } else {
492  if (offset > limit || endOffset > limit)
493  return std::make_shared<GeneralProtection>(0);
494  }
495  }
496 
497  // If paging is enabled, do the translation.
498  if (m5Reg.paging) {
499  DPRINTF(GPUTLB, "Paging enabled.\n");
500  // The vaddr already has the segment base applied.
501  TlbEntry *entry = lookup(vaddr);
503 
504  if (!entry) {
506  if (timing) {
507  latency = missLatency1;
508  }
509 
510  if (FullSystem) {
511  fatal("GpuTLB doesn't support full-system mode\n");
512  } else {
513  DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
514  "at pc %#x.\n", vaddr, tc->instAddr());
515 
516  Process *p = tc->getProcessPtr();
517  const EmulationPageTable::Entry *pte =
518  p->pTable->lookup(vaddr);
519 
520  if (!pte && mode != BaseTLB::Execute) {
521  // penalize a "page fault" more
522  if (timing)
523  latency += missLatency2;
524 
525  if (p->fixupFault(vaddr))
526  pte = p->pTable->lookup(vaddr);
527  }
528 
529  if (!pte) {
530  return std::make_shared<PageFault>(vaddr, true,
531  mode, true,
532  false);
533  } else {
534  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
535 
536  DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
537  alignedVaddr, pte->paddr);
538 
539  TlbEntry gpuEntry(p->pid(), alignedVaddr,
540  pte->paddr, false, false);
541  entry = insert(alignedVaddr, gpuEntry);
542  }
543 
544  DPRINTF(GPUTLB, "Miss was serviced.\n");
545  }
546  } else {
547  localNumTLBHits++;
548 
549  if (timing) {
550  latency = hitLatency;
551  }
552  }
553 
554  // Do paging protection checks.
555  bool inUser = (m5Reg.cpl == 3 &&
556  !(flags & (CPL0FlagBit << FlagShift)));
557 
558  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
559  bool badWrite = (!entry->writable && (inUser || cr0.wp));
560 
561  if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
562  badWrite)) {
563  // The page must have been present to get into the TLB in
564  // the first place. We'll assume the reserved bits are
565  // fine even though we're not checking them.
566  return std::make_shared<PageFault>(vaddr, true, mode,
567  inUser, false);
568  }
569 
570  if (storeCheck && badWrite) {
571  // This would fault if this were a write, so return a page
572  // fault that reflects that happening.
573  return std::make_shared<PageFault>(vaddr, true,
575  inUser, false);
576  }
577 
578 
579  DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
580  "checks.\n", entry->paddr);
581 
582  int page_size = entry->size();
583  Addr paddr = entry->paddr | (vaddr & (page_size - 1));
584  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
585  req->setPaddr(paddr);
586 
587  if (entry->uncacheable)
588  req->setFlags(Request::UNCACHEABLE);
589  } else {
590  //Use the address which already has segmentation applied.
591  DPRINTF(GPUTLB, "Paging disabled.\n");
592  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
593  req->setPaddr(vaddr);
594  }
595  } else {
596  // Real mode
597  DPRINTF(GPUTLB, "In real mode.\n");
598  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
599  req->setPaddr(vaddr);
600  }
601 
602  // Check for an access to the local APIC
603  if (FullSystem) {
604  LocalApicBase localApicBase =
606 
607  Addr baseAddr = localApicBase.base * PageBytes;
608  Addr paddr = req->getPaddr();
609 
610  if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
611  // Force the access to be uncacheable.
612  req->setFlags(Request::UNCACHEABLE);
613  req->setPaddr(x86LocalAPICAddress(tc->contextId(),
614  paddr - baseAddr));
615  }
616  }
617 
618  return NoFault;
619  };
620 
621  Fault
623  Mode mode, int &latency)
624  {
625  bool delayedResponse;
626 
627  return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
628  latency);
629  }
630 
631  void
633  Translation *translation, Mode mode, int &latency)
634  {
635  bool delayedResponse;
636  assert(translation);
637 
638  Fault fault = GpuTLB::translate(req, tc, translation, mode,
639  delayedResponse, true, latency);
640 
641  if (!delayedResponse)
642  translation->finish(fault, req, tc, mode);
643  }
644 
645  Walker*
647  {
648  return walker;
649  }
650 
651 
652  void
654  {
655  }
656 
657  void
659  {
660  }
661 
662  void
664  {
666 
668  .name(name() + ".local_TLB_accesses")
669  .desc("Number of TLB accesses")
670  ;
671 
673  .name(name() + ".local_TLB_hits")
674  .desc("Number of TLB hits")
675  ;
676 
678  .name(name() + ".local_TLB_misses")
679  .desc("Number of TLB misses")
680  ;
681 
683  .name(name() + ".local_TLB_miss_rate")
684  .desc("TLB miss rate")
685  ;
686 
688  .name(name() + ".access_cycles")
689  .desc("Cycles spent accessing this TLB level")
690  ;
691 
693  .name(name() + ".page_table_cycles")
694  .desc("Cycles spent accessing the page table")
695  ;
696 
698 
700  .name(name() + ".unique_pages")
701  .desc("Number of unique pages touched")
702  ;
703 
705  .name(name() + ".local_cycles")
706  .desc("Number of cycles spent in queue for all incoming reqs")
707  ;
708 
710  .name(name() + ".local_latency")
711  .desc("Avg. latency over incoming coalesced reqs")
712  ;
713 
715 
717  .name(name() + ".global_TLB_accesses")
718  .desc("Number of TLB accesses")
719  ;
720 
722  .name(name() + ".global_TLB_hits")
723  .desc("Number of TLB hits")
724  ;
725 
727  .name(name() + ".global_TLB_misses")
728  .desc("Number of TLB misses")
729  ;
730 
732  .name(name() + ".global_TLB_miss_rate")
733  .desc("TLB miss rate")
734  ;
735 
737 
739  .name(name() + ".avg_reuse_distance")
740  .desc("avg. reuse distance over all pages (in ticks)")
741  ;
742 
743  }
744 
750  void
752  {
753  assert(pkt);
754  assert(pkt->senderState);
755 
756  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
758 
759  TranslationState *sender_state =
761 
762  bool update_stats = !sender_state->prefetch;
763  ThreadContext * tmp_tc = sender_state->tc;
764 
765  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
766  virt_page_addr);
767 
768  int req_cnt = sender_state->reqCnt.back();
769 
770  if (update_stats) {
771  accessCycles -= (curTick() * req_cnt);
772  localCycles -= curTick();
773  updatePageFootprint(virt_page_addr);
774  globalNumTLBAccesses += req_cnt;
775  }
776 
777  tlbOutcome lookup_outcome = TLB_MISS;
778  const RequestPtr &tmp_req = pkt->req;
779 
780  // Access the TLB and figure out if it's a hit or a miss.
781  bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
782 
783  if (success) {
784  lookup_outcome = TLB_HIT;
785  // Put the entry in SenderState
786  TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
787  assert(entry);
788 
789  auto p = sender_state->tc->getProcessPtr();
790  sender_state->tlbEntry =
791  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
792  false, false);
793 
794  if (update_stats) {
795  // the reqCnt has an entry per level, so its size tells us
796  // which level we are in
797  sender_state->hitLevel = sender_state->reqCnt.size();
798  globalNumTLBHits += req_cnt;
799  }
800  } else {
801  if (update_stats)
802  globalNumTLBMisses += req_cnt;
803  }
804 
805  /*
806  * We now know the TLB lookup outcome (if it's a hit or a miss), as well
807  * as the TLB access latency.
808  *
809  * We create and schedule a new TLBEvent which will help us take the
810  * appropriate actions (e.g., update TLB on a hit, send request to lower
811  * level TLB on a miss, or start a page walk if this was the last-level
812  * TLB)
813  */
814  TLBEvent *tlb_event =
815  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
816 
817  if (translationReturnEvent.count(virt_page_addr)) {
818  panic("Virtual Page Address %#x already has a return event\n",
819  virt_page_addr);
820  }
821 
822  translationReturnEvent[virt_page_addr] = tlb_event;
823  assert(tlb_event);
824 
825  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
826  curTick() + this->ticks(hitLatency));
827 
828  schedule(tlb_event, curTick() + this->ticks(hitLatency));
829  }
830 
832  PacketPtr _pkt)
833  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
834  outcome(tlb_outcome), pkt(_pkt)
835  {
836  }
837 
842  void
844  TlbEntry * tlb_entry, Mode mode)
845  {
846  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
847  uint32_t flags = pkt->req->getFlags();
848  bool storeCheck = flags & (StoreCheck << FlagShift);
849 
850  // Do paging protection checks.
851  bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
852  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
853 
854  bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
855 
856  if ((inUser && !tlb_entry->user) ||
857  (mode == BaseTLB::Write && badWrite)) {
858  // The page must have been present to get into the TLB in
859  // the first place. We'll assume the reserved bits are
860  // fine even though we're not checking them.
861  panic("Page fault detected");
862  }
863 
864  if (storeCheck && badWrite) {
865  // This would fault if this were a write, so return a page
866  // fault that reflects that happening.
867  panic("Page fault detected");
868  }
869  }
870 
876  void
877  GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
878  PacketPtr pkt)
879  {
880 
881  assert(pkt);
882  Addr vaddr = pkt->req->getVaddr();
883 
884  TranslationState *sender_state =
886 
887  ThreadContext *tc = sender_state->tc;
888  Mode mode = sender_state->tlbMode;
889 
890  TlbEntry *local_entry, *new_entry;
891 
892  if (tlb_outcome == TLB_HIT) {
893  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
894  local_entry = sender_state->tlbEntry;
895  } else {
896  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
897  vaddr);
898 
899  // We are returning either from a page walk or from a hit at a lower
900  // TLB level. The senderState should be "carrying" a pointer to the
901  // correct TLBEntry.
902  new_entry = sender_state->tlbEntry;
903  assert(new_entry);
904  local_entry = new_entry;
905 
906  if (allocationPolicy) {
907  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
908  virt_page_addr);
909 
910  local_entry = insert(virt_page_addr, *new_entry);
911  }
912 
913  assert(local_entry);
914  }
915 
921  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
922  "while paddr was %#x.\n", local_entry->vaddr,
923  local_entry->paddr);
924 
925  pagingProtectionChecks(tc, pkt, local_entry, mode);
926  int page_size = local_entry->size();
927  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
928  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
929 
930  // Since this packet will be sent through the cpu side slave port,
931  // it must be converted to a response pkt if it is not one already
932  if (pkt->isRequest()) {
933  pkt->makeTimingResponse();
934  }
935 
936  pkt->req->setPaddr(paddr);
937 
938  if (local_entry->uncacheable) {
939  pkt->req->setFlags(Request::UNCACHEABLE);
940  }
941 
942  //send packet back to coalescer
943  cpuSidePort[0]->sendTimingResp(pkt);
944  //schedule cleanup event
945  cleanupQueue.push(virt_page_addr);
946 
947  // schedule this only once per cycle.
948  // The check is required because we might have multiple translations
949  // returning the same cycle
950  // this is a maximum priority event and must be on the same cycle
951  // as the cleanup event in TLBCoalescer to avoid a race with
952  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
953  if (!cleanupEvent.scheduled())
955  }
956 
961  void
963  PacketPtr pkt)
964  {
965  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
966 
967  assert(translationReturnEvent[virtPageAddr]);
968  assert(pkt);
969 
970  TranslationState *tmp_sender_state =
972 
973  int req_cnt = tmp_sender_state->reqCnt.back();
974  bool update_stats = !tmp_sender_state->prefetch;
975 
976 
977  if (outcome == TLB_HIT) {
978  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
979 
980  if (update_stats) {
981  accessCycles += (req_cnt * curTick());
982  localCycles += curTick();
983  }
984 
985  } else if (outcome == TLB_MISS) {
986 
987  DPRINTF(GPUTLB, "This is a TLB miss\n");
988  if (update_stats) {
989  accessCycles += (req_cnt*curTick());
990  localCycles += curTick();
991  }
992 
993  if (hasMemSidePort) {
994  // the one cyle added here represent the delay from when we get
995  // the reply back till when we propagate it to the coalescer
996  // above.
997  if (update_stats) {
998  accessCycles += (req_cnt * 1);
999  localCycles += 1;
1000  }
1001 
1007  if (!memSidePort[0]->sendTimingReq(pkt)) {
1008  DPRINTF(GPUTLB, "Failed sending translation request to "
1009  "lower level TLB for addr %#x\n", virtPageAddr);
1010 
1011  memSidePort[0]->retries.push_back(pkt);
1012  } else {
1013  DPRINTF(GPUTLB, "Sent translation request to lower level "
1014  "TLB for addr %#x\n", virtPageAddr);
1015  }
1016  } else {
1017  //this is the last level TLB. Start a page walk
1018  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1019  "addr %#x\n", virtPageAddr);
1020 
1021  if (update_stats)
1022  pageTableCycles -= (req_cnt*curTick());
1023 
1025  assert(tlb_event);
1026  tlb_event->updateOutcome(PAGE_WALK);
1027  schedule(tlb_event, curTick() + ticks(missLatency2));
1028  }
1029  } else if (outcome == PAGE_WALK) {
1030  if (update_stats)
1031  pageTableCycles += (req_cnt*curTick());
1032 
1033  // Need to access the page table and update the TLB
1034  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1035  virtPageAddr);
1036 
1037  TranslationState *sender_state =
1039 
1040  Process *p = sender_state->tc->getProcessPtr();
1041  Addr vaddr = pkt->req->getVaddr();
1042  #ifndef NDEBUG
1043  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1044  assert(alignedVaddr == virtPageAddr);
1045  #endif
1046  const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1047  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1048  p->fixupFault(vaddr)) {
1049  pte = p->pTable->lookup(vaddr);
1050  }
1051 
1052  if (pte) {
1053  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1054  pte->paddr);
1055 
1056  sender_state->tlbEntry =
1057  new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1058  false);
1059  } else {
1060  sender_state->tlbEntry = nullptr;
1061  }
1062 
1063  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1064  } else if (outcome == MISS_RETURN) {
1068  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1069  } else {
1070  panic("Unexpected TLB outcome %d", outcome);
1071  }
1072  }
1073 
1074  void
1076  {
1078  }
1079 
1080  const char*
1082  {
1083  return "trigger translationDoneEvent";
1084  }
1085 
1086  void
1088  {
1089  outcome = _outcome;
1090  }
1091 
1092  Addr
1094  {
1095  return virtPageAddr;
1096  }
1097 
1098  /*
1099  * recvTiming receives a coalesced timing request from a TLBCoalescer
1100  * and it calls issueTLBLookup()
1101  * It only rejects the packet if we have exceeded the max
1102  * outstanding number of requests for the TLB
1103  */
1104  bool
1106  {
1108  tlb->issueTLBLookup(pkt);
1109  // update number of outstanding translation requests
1110  tlb->outstandingReqs++;
1111  return true;
1112  } else {
1113  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1114  tlb->outstandingReqs);
1115  return false;
1116  }
1117  }
1118 
1127  void
1129  {
1130  TranslationState *sender_state =
1132 
1133  ThreadContext *tc = sender_state->tc;
1134  Mode mode = sender_state->tlbMode;
1135  Addr vaddr = pkt->req->getVaddr();
1136 
1137  TlbEntry *local_entry, *new_entry;
1138 
1139  if (tlb_outcome == TLB_HIT) {
1140  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1141  "%#x\n", vaddr);
1142 
1143  local_entry = sender_state->tlbEntry;
1144  } else {
1145  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1146  "%#x\n", vaddr);
1147 
1148  // We are returning either from a page walk or from a hit at a lower
1149  // TLB level. The senderState should be "carrying" a pointer to the
1150  // correct TLBEntry.
1151  new_entry = sender_state->tlbEntry;
1152  assert(new_entry);
1153  local_entry = new_entry;
1154 
1155  if (allocationPolicy) {
1156  Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1157 
1158  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1159  virt_page_addr);
1160 
1161  local_entry = insert(virt_page_addr, *new_entry);
1162  }
1163 
1164  assert(local_entry);
1165  }
1166 
1167  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1168  "while paddr was %#x.\n", local_entry->vaddr,
1169  local_entry->paddr);
1170 
1182  if (!sender_state->prefetch && sender_state->tlbEntry)
1183  pagingProtectionChecks(tc, pkt, local_entry, mode);
1184 
1185  int page_size = local_entry->size();
1186  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1187  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1188 
1189  pkt->req->setPaddr(paddr);
1190 
1191  if (local_entry->uncacheable)
1192  pkt->req->setFlags(Request::UNCACHEABLE);
1193  }
1194 
1195  // This is used for atomic translations. Need to
1196  // make it all happen during the same cycle.
1197  void
1199  {
1200  TranslationState *sender_state =
1202 
1203  ThreadContext *tc = sender_state->tc;
1204  bool update_stats = !sender_state->prefetch;
1205 
1206  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1208 
1209  if (update_stats)
1210  tlb->updatePageFootprint(virt_page_addr);
1211 
1212  // do the TLB lookup without updating the stats
1213  bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1214  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1215 
1216  // functional mode means no coalescing
1217  // global metrics are the same as the local metrics
1218  if (update_stats) {
1220 
1221  if (success) {
1222  sender_state->hitLevel = sender_state->reqCnt.size();
1223  tlb->globalNumTLBHits++;
1224  }
1225  }
1226 
1227  if (!success) {
1228  if (update_stats)
1230  if (tlb->hasMemSidePort) {
1231  // there is a TLB below -> propagate down the TLB hierarchy
1232  tlb->memSidePort[0]->sendFunctional(pkt);
1233  // If no valid translation from a prefetch, then just return
1234  if (sender_state->prefetch && !pkt->req->hasPaddr())
1235  return;
1236  } else {
1237  // Need to access the page table and update the TLB
1238  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1239  virt_page_addr);
1240 
1241  Process *p = tc->getProcessPtr();
1242 
1243  Addr vaddr = pkt->req->getVaddr();
1244  #ifndef NDEBUG
1245  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1246  assert(alignedVaddr == virt_page_addr);
1247  #endif
1248 
1249  const EmulationPageTable::Entry *pte =
1250  p->pTable->lookup(vaddr);
1251  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1252  p->fixupFault(vaddr)) {
1253  pte = p->pTable->lookup(vaddr);
1254  }
1255 
1256  if (!sender_state->prefetch) {
1257  // no PageFaults are permitted after
1258  // the second page table lookup
1259  assert(pte);
1260 
1261  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1262  pte->paddr);
1263 
1264  sender_state->tlbEntry =
1265  new TlbEntry(p->pid(), virt_page_addr,
1266  pte->paddr, false, false);
1267  } else {
1268  // If this was a prefetch, then do the normal thing if it
1269  // was a successful translation. Otherwise, send an empty
1270  // TLB entry back so that it can be figured out as empty and
1271  // handled accordingly.
1272  if (pte) {
1273  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1274  pte->paddr);
1275 
1276  sender_state->tlbEntry =
1277  new TlbEntry(p->pid(), virt_page_addr,
1278  pte->paddr, false, false);
1279  } else {
1280  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1281  alignedVaddr);
1282 
1283  sender_state->tlbEntry = nullptr;
1284 
1285  return;
1286  }
1287  }
1288  }
1289  } else {
1290  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1291  tlb->lookup(pkt->req->getVaddr()));
1292 
1293  TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1294  update_stats);
1295 
1296  assert(entry);
1297 
1298  auto p = sender_state->tc->getProcessPtr();
1299  sender_state->tlbEntry =
1300  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1301  false, false);
1302  }
1303  // This is the function that would populate pkt->req with the paddr of
1304  // the translation. But if no translation happens (i.e Prefetch fails)
1305  // then the early returns in the above code wiill keep this function
1306  // from executing.
1307  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1308  }
1309 
1310  void
1312  {
1313  // The CPUSidePort never sends anything but replies. No retries
1314  // expected.
1315  panic("recvReqRetry called");
1316  }
1317 
1320  {
1321  // currently not checked by the master
1322  AddrRangeList ranges;
1323 
1324  return ranges;
1325  }
1326 
1332  bool
1334  {
1335  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1337 
1338  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1339  virt_page_addr);
1340 
1341  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1342  assert(tlb_event);
1343  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1344 
1345  tlb_event->updateOutcome(MISS_RETURN);
1346  tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1347 
1348  return true;
1349  }
1350 
1351  void
1353  {
1354  // No retries should reach the TLB. The retries
1355  // should only reach the TLBCoalescer.
1356  panic("recvReqRetry called");
1357  }
1358 
1359  void
1361  {
1362  while (!cleanupQueue.empty()) {
1363  Addr cleanup_addr = cleanupQueue.front();
1364  cleanupQueue.pop();
1365 
1366  // delete TLBEvent
1367  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1368  delete old_tlb_event;
1369  translationReturnEvent.erase(cleanup_addr);
1370 
1371  // update number of outstanding requests
1372  outstandingReqs--;
1373  }
1374 
1378  for (int i = 0; i < cpuSidePort.size(); ++i) {
1379  cpuSidePort[i]->sendRetryReq();
1380  }
1381  }
1382 
1383  void
1385  {
1386 
1388 
1389  AccessInfo tmp_access_info;
1390  tmp_access_info.lastTimeAccessed = 0;
1391  tmp_access_info.accessesPerPage = 0;
1392  tmp_access_info.totalReuseDistance = 0;
1393  tmp_access_info.sumDistance = 0;
1394  tmp_access_info.meanDistance = 0;
1395 
1396  ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1397  tmp_access_info));
1398 
1399  bool first_page_access = ret.second;
1400 
1401  if (first_page_access) {
1402  numUniquePages++;
1403  } else {
1404  int accessed_before;
1405  accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1406  ret.first->second.totalReuseDistance += accessed_before;
1407  }
1408 
1409  ret.first->second.accessesPerPage++;
1410  ret.first->second.lastTimeAccessed = curTick();
1411 
1412  if (accessDistance) {
1413  ret.first->second.localTLBAccesses
1414  .push_back(localNumTLBAccesses.value());
1415  }
1416  }
1417 
1418  void
1420  {
1421  std::ostream *page_stat_file = nullptr;
1422 
1423  if (accessDistance) {
1424 
1425  // print per page statistics to a separate file (.csv format)
1426  // simout is the gem5 output directory (default is m5out or the one
1427  // specified with -d
1428  page_stat_file = simout.create(name().c_str())->stream();
1429 
1430  // print header
1431  *page_stat_file << "page,max_access_distance,mean_access_distance, "
1432  << "stddev_distance" << std::endl;
1433  }
1434 
1435  // update avg. reuse distance footprint
1436  AccessPatternTable::iterator iter, iter_begin, iter_end;
1437  unsigned int sum_avg_reuse_distance_per_page = 0;
1438 
1439  // iterate through all pages seen by this TLB
1440  for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1441  sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1442  iter->second.accessesPerPage;
1443 
1444  if (accessDistance) {
1445  unsigned int tmp = iter->second.localTLBAccesses[0];
1446  unsigned int prev = tmp;
1447 
1448  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1449  if (i) {
1450  tmp = prev + 1;
1451  }
1452 
1453  prev = iter->second.localTLBAccesses[i];
1454  // update the localTLBAccesses value
1455  // with the actual differece
1456  iter->second.localTLBAccesses[i] -= tmp;
1457  // compute the sum of AccessDistance per page
1458  // used later for mean
1459  iter->second.sumDistance +=
1460  iter->second.localTLBAccesses[i];
1461  }
1462 
1463  iter->second.meanDistance =
1464  iter->second.sumDistance / iter->second.accessesPerPage;
1465 
1466  // compute std_dev and max (we need a second round because we
1467  // need to know the mean value
1468  unsigned int max_distance = 0;
1469  unsigned int stddev_distance = 0;
1470 
1471  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1472  unsigned int tmp_access_distance =
1473  iter->second.localTLBAccesses[i];
1474 
1475  if (tmp_access_distance > max_distance) {
1476  max_distance = tmp_access_distance;
1477  }
1478 
1479  unsigned int diff =
1480  tmp_access_distance - iter->second.meanDistance;
1481  stddev_distance += pow(diff, 2);
1482 
1483  }
1484 
1485  stddev_distance =
1486  sqrt(stddev_distance/iter->second.accessesPerPage);
1487 
1488  if (page_stat_file) {
1489  *page_stat_file << std::hex << iter->first << ",";
1490  *page_stat_file << std::dec << max_distance << ",";
1491  *page_stat_file << std::dec << iter->second.meanDistance
1492  << ",";
1493  *page_stat_file << std::dec << stddev_distance;
1494  *page_stat_file << std::endl;
1495  }
1496 
1497  // erase the localTLBAccesses array
1498  iter->second.localTLBAccesses.clear();
1499  }
1500  }
1501 
1502  if (!TLBFootprint.empty()) {
1504  sum_avg_reuse_distance_per_page / TLBFootprint.size();
1505  }
1506 
1507  //clear the TLBFootprint map
1508  TLBFootprint.clear();
1509  }
1510 } // namespace X86ISA
1511 
1513 X86GPUTLBParams::create()
1514 {
1515  return new X86ISA::GpuTLB(this);
1516 }
1517 
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:442
#define DPRINTF(x,...)
Definition: trace.hh:225
unsigned int accessesPerPage
Definition: gpu_tlb.hh:424
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:235
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:73
offset
Definition: misc.hh:1024
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:204
Ports are used to interface objects to each other.
Definition: port.hh:56
OutputDirectory simout
Definition: output.cc:61
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:147
Stats::Scalar localCycles
Definition: gpu_tlb.hh:212
const int FlagShift
Definition: ldstflags.hh:50
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
Bitfield< 7 > i
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: gpu_tlb.cc:653
STL pair class.
Definition: stl.hh:58
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:207
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:329
const Addr PageShift
Definition: isa_traits.hh:55
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:217
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:962
void makeTimingResponse()
Definition: packet.hh:949
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1128
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:207
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:196
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:843
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void invalidateAll()
Definition: gpu_tlb.cc:228
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
unsigned int meanDistance
Definition: gpu_tlb.hh:438
void exitCallback()
Definition: gpu_tlb.cc:1419
Walker * getWalker()
Definition: gpu_tlb.cc:646
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:185
std::vector< TlbEntry > tlb
Definition: gpu_tlb.hh:160
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:622
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:132
virtual Process * getProcessPtr()=0
uint64_t RegVal
Definition: types.hh:166
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:285
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:152
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:426
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:419
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: gpu_tlb.cc:137
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:68
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:423
const Addr PageBytes
Definition: isa_traits.hh:51
T letoh(T value)
Definition: byteswap.hh:141
Bitfield< 14 > expandDown
Definition: misc.hh:996
Definition: cprintf.cc:40
unsigned int sumDistance
Definition: gpu_tlb.hh:437
Bitfield< 4, 0 > mode
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:195
int maxCoalescedReqs
Definition: gpu_tlb.hh:365
ThreadContext is the external interface to all thread state for anything outside of the CPU...
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:96
The request is to an uncacheable address.
Definition: request.hh:113
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: gpu_tlb.cc:159
MiscRegIndex
Definition: misc.hh:100
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:59
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:67
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
Walker * walker
Definition: gpu_tlb.hh:124
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: gpu_tlb.cc:831
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: gpu_tlb.cc:1352
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:210
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:203
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:408
bool isRequest() const
Definition: packet.hh:525
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:308
T htole(T value)
Definition: byteswap.hh:140
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:158
Addr pageAlign(Addr a)
Definition: page_table.hh:105
Tick curTick()
The current simulated tick.
Definition: core.hh:44
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:632
Flags flags
Definition: eventq.hh:267
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
virtual const std::string name() const
Definition: eventq.cc:82
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
EventFunctionWrapper exitEvent
Definition: gpu_tlb.hh:447
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Definition: misc.hh:533
Stats::Formula localLatency
Definition: gpu_tlb.hh:214
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
Definition: misc.hh:526
int outstandingReqs
Definition: gpu_tlb.hh:369
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it&#39;s used to guide replacement decisions...
Definition: gpu_tlb.hh:176
void invalidateNonGlobal()
Definition: gpu_tlb.cc:248
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1152
uint64_t pid()
Definition: process.hh:84
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:242
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: gpu_tlb.cc:1333
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
mask
Definition: misc.hh:796
EventFunctionWrapper cleanupEvent
Definition: gpu_tlb.hh:414
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:421
X86GPUTLBParams Params
Definition: gpu_tlb.hh:92
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:266
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: gpu_tlb.cc:1198
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:404
void schedule(Event &event, Tick when)
Definition: eventq.hh:934
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:69
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:131
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:877
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:306
virtual Addr instAddr() const =0
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:215
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:201
void updateOutcome(tlbOutcome _outcome)
Definition: gpu_tlb.cc:1087
static MiscRegIndex MISCREG_SEG_SEL(int index)
Definition: misc.hh:505
Bitfield< 2, 0 > seg
Definition: types.hh:82
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:167
const Request::FlagsType M5_VAR_USED SegmentFlagMask
Definition: ldstflags.hh:49
T safe_cast(U ptr)
Definition: cast.hh:59
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
Bitfield< 8 > va
#define warn_once(...)
Definition: logging.hh:212
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:70
bool msrAddrToIndex(MiscRegIndex &regNum, Addr addr)
Find and return the misc reg corresponding to an MSR address.
Definition: msr.cc:147
void regStats() override
Callback to set stat parameters.
Definition: gpu_tlb.cc:663
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Definition: gpu_tlb.cc:301
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:459
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: gpu_tlb.cc:658
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
virtual const std::string name() const
Definition: sim_object.hh:129
BitfieldType< SegDescriptorLimit > limit
Definition: misc.hh:924
uint32_t configAddress
Definition: gpu_tlb.hh:70
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
Definition: gpu_tlb.cc:1105
EmulationPageTable * pTable
Definition: process.hh:174
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
Declarations of a non-full system Page Table.
bool fixupFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
Definition: process.cc:355
static MiscRegIndex MISCREG_SEG_BASE(int index)
Definition: misc.hh:512
std::ostream CheckpointOut
Definition: serialize.hh:63
This is exposed globally, independent of the ISA.
Definition: acpi.hh:55
const char * description() const
Return a C string describing the event.
Definition: gpu_tlb.cc:1081
int missLatency2
Definition: gpu_tlb.hh:189
SenderState * senderState
This packet&#39;s sender state.
Definition: packet.hh:474
Definition: eventq.hh:245
void cleanup()
Definition: gpu_tlb.cc:1360
int missLatency1
Definition: gpu_tlb.hh:188
virtual ContextID contextId() const =0
const Entry * lookup(Addr vaddr)
Lookup function.
Definition: page_table.cc:130
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:209
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:84
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:194
const Addr PageBytes
Definition: isa_traits.hh:56
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:72
The request is required to be strictly ordered by CPU models and is non-speculative.
Definition: request.hh:123
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:235
Bitfield< 0 > p
Definition: pagetable.hh:151
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: gpu_tlb.cc:1319
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:65
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:95
T bits(T val, int first, int last)
Extract the bitfield from position &#39;first&#39; to &#39;last&#39; (inclusive) from &#39;val&#39; and right justify it...
Definition: bitfield.hh:71
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:140
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:93
virtual RegVal readMiscReg(RegIndex misc_reg)=0
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:193
Bitfield< 1 > x
Definition: types.hh:103
virtual void recvReqRetry()
Definition: gpu_tlb.cc:1311
const char data[]
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:202
std::shared_ptr< FaultBase > Fault
Definition: types.hh:238
Bitfield< 3 > addr
Definition: types.hh:79
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:700
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition: gpu_tlb.hh:389
std::vector< int > reqCnt
Definition: gpu_tlb.hh:351
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:751
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:370
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:152
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1384

Generated on Fri Jul 3 2020 15:53:03 for gem5 by doxygen 1.8.13