gem5  v20.1.0.0
gpu_tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Lisa Hsu
34  */
35 
36 #include "gpu-compute/gpu_tlb.hh"
37 
38 #include <cmath>
39 #include <cstring>
40 
41 #include "arch/x86/faults.hh"
43 #include "arch/x86/isa_traits.hh"
44 #include "arch/x86/pagetable.hh"
46 #include "arch/x86/regs/misc.hh"
47 #include "arch/x86/regs/msr.hh"
48 #include "arch/x86/x86_traits.hh"
49 #include "base/bitfield.hh"
50 #include "base/logging.hh"
51 #include "base/output.hh"
52 #include "base/trace.hh"
53 #include "cpu/base.hh"
54 #include "cpu/thread_context.hh"
55 #include "debug/GPUPrefetch.hh"
56 #include "debug/GPUTLB.hh"
57 #include "mem/packet_access.hh"
58 #include "mem/page_table.hh"
59 #include "mem/request.hh"
60 #include "sim/process.hh"
61 #include "sim/pseudo_inst.hh"
62 
63 namespace X86ISA
64 {
65 
67  : ClockedObject(p), configAddress(0), size(p->size),
68  cleanupEvent([this]{ cleanup(); }, name(), false,
70  exitEvent([this]{ exitCallback(); }, name())
71  {
72  assoc = p->assoc;
73  assert(assoc <= size);
74  numSets = size/assoc;
75  allocationPolicy = p->allocationPolicy;
76  hasMemSidePort = false;
77  accessDistance = p->accessDistance;
78 
79  tlb.assign(size, TlbEntry());
80 
81  freeList.resize(numSets);
82  entryList.resize(numSets);
83 
84  for (int set = 0; set < numSets; ++set) {
85  for (int way = 0; way < assoc; ++way) {
86  int x = set * assoc + way;
87  freeList[set].push_back(&tlb.at(x));
88  }
89  }
90 
91  FA = (size == assoc);
92 
101  setMask = numSets - 1;
102 
103  maxCoalescedReqs = p->maxOutstandingReqs;
104 
105  // Do not allow maxCoalescedReqs to be more than the TLB associativity
106  if (maxCoalescedReqs > assoc) {
107  maxCoalescedReqs = assoc;
108  cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
109  }
110 
111  outstandingReqs = 0;
112  hitLatency = p->hitLatency;
113  missLatency1 = p->missLatency1;
114  missLatency2 = p->missLatency2;
115 
116  // create the response ports based on the number of connected ports
117  for (size_t i = 0; i < p->port_cpu_side_ports_connection_count; ++i) {
118  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
119  name(), i), this, i));
120  }
121 
122  // create the request ports based on the number of connected ports
123  for (size_t i = 0; i < p->port_mem_side_ports_connection_count; ++i) {
124  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
125  name(), i), this, i));
126  }
127  }
128 
129  // fixme: this is never called?
131  {
132  // make sure all the hash-maps are empty
133  assert(translationReturnEvent.empty());
134  }
135 
136  Port &
137  GpuTLB::getPort(const std::string &if_name, PortID idx)
138  {
139  if (if_name == "cpu_side_ports") {
140  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
141  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
142  }
143 
144  return *cpuSidePort[idx];
145  } else if (if_name == "mem_side_ports") {
146  if (idx >= static_cast<PortID>(memSidePort.size())) {
147  panic("TLBCoalescer::getPort: unknown index %d\n", idx);
148  }
149 
150  hasMemSidePort = true;
151 
152  return *memSidePort[idx];
153  } else {
154  panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
155  }
156  }
157 
158  TlbEntry*
160  {
161  TlbEntry *newEntry = nullptr;
162 
167  int set = (vpn >> TheISA::PageShift) & setMask;
168 
169  if (!freeList[set].empty()) {
170  newEntry = freeList[set].front();
171  freeList[set].pop_front();
172  } else {
173  newEntry = entryList[set].back();
174  entryList[set].pop_back();
175  }
176 
177  *newEntry = entry;
178  newEntry->vaddr = vpn;
179  entryList[set].push_front(newEntry);
180 
181  return newEntry;
182  }
183 
184  GpuTLB::EntryList::iterator
185  GpuTLB::lookupIt(Addr va, bool update_lru)
186  {
187  int set = (va >> TheISA::PageShift) & setMask;
188 
189  if (FA) {
190  assert(!set);
191  }
192 
193  auto entry = entryList[set].begin();
194  for (; entry != entryList[set].end(); ++entry) {
195  int page_size = (*entry)->size();
196 
197  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
198  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
199  "with size %#x.\n", va, (*entry)->vaddr, page_size);
200 
201  if (update_lru) {
202  entryList[set].push_front(*entry);
203  entryList[set].erase(entry);
204  entry = entryList[set].begin();
205  }
206 
207  break;
208  }
209  }
210 
211  return entry;
212  }
213 
214  TlbEntry*
215  GpuTLB::lookup(Addr va, bool update_lru)
216  {
217  int set = (va >> TheISA::PageShift) & setMask;
218 
219  auto entry = lookupIt(va, update_lru);
220 
221  if (entry == entryList[set].end())
222  return nullptr;
223  else
224  return *entry;
225  }
226 
227  void
229  {
230  DPRINTF(GPUTLB, "Invalidating all entries.\n");
231 
232  for (int i = 0; i < numSets; ++i) {
233  while (!entryList[i].empty()) {
234  TlbEntry *entry = entryList[i].front();
235  entryList[i].pop_front();
236  freeList[i].push_back(entry);
237  }
238  }
239  }
240 
241  void
243  {
245  }
246 
247  void
249  {
250  DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
251 
252  for (int i = 0; i < numSets; ++i) {
253  for (auto entryIt = entryList[i].begin();
254  entryIt != entryList[i].end();) {
255  if (!(*entryIt)->global) {
256  freeList[i].push_back(*entryIt);
257  entryList[i].erase(entryIt++);
258  } else {
259  ++entryIt;
260  }
261  }
262  }
263  }
264 
265  void
266  GpuTLB::demapPage(Addr va, uint64_t asn)
267  {
268 
269  int set = (va >> TheISA::PageShift) & setMask;
270  auto entry = lookupIt(va, false);
271 
272  if (entry != entryList[set].end()) {
273  freeList[set].push_back(*entry);
274  entryList[set].erase(entry);
275  }
276  }
277 
278 
279 
280  namespace
281  {
282 
283  Cycles
284  localMiscRegAccess(bool read, MiscRegIndex regNum,
285  ThreadContext *tc, PacketPtr pkt)
286  {
287  if (read) {
288  RegVal data = htole(tc->readMiscReg(regNum));
289  // Make sure we don't trot off the end of data.
290  pkt->setData((uint8_t *)&data);
291  } else {
292  RegVal data = htole(tc->readMiscRegNoEffect(regNum));
293  tc->setMiscReg(regNum, letoh(data));
294  }
295  return Cycles(1);
296  }
297 
298  } // anonymous namespace
299 
300  Fault
301  GpuTLB::translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
302  {
303  DPRINTF(GPUTLB, "Addresses references internal memory.\n");
304  Addr vaddr = req->getVaddr();
305  Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
306 
307  if (prefix == IntAddrPrefixCPUID) {
308  panic("CPUID memory space not yet implemented!\n");
309  } else if (prefix == IntAddrPrefixMSR) {
310  vaddr = (vaddr >> 3) & ~IntAddrPrefixMask;
311 
312  MiscRegIndex regNum;
313  if (!msrAddrToIndex(regNum, vaddr))
314  return std::make_shared<GeneralProtection>(0);
315 
316  req->setLocalAccessor(
317  [read,regNum](ThreadContext *tc, PacketPtr pkt)
318  {
319  return localMiscRegAccess(read, regNum, tc, pkt);
320  }
321  );
322 
323  return NoFault;
324  } else if (prefix == IntAddrPrefixIO) {
325  // TODO If CPL > IOPL or in virtual mode, check the I/O permission
326  // bitmap in the TSS.
327 
328  Addr IOPort = vaddr & ~IntAddrPrefixMask;
329  // Make sure the address fits in the expected 16 bit IO address
330  // space.
331  assert(!(IOPort & ~0xFFFF));
332  if (IOPort == 0xCF8 && req->getSize() == 4) {
333  req->setLocalAccessor(
334  [read](ThreadContext *tc, PacketPtr pkt)
335  {
336  return localMiscRegAccess(
337  read, MISCREG_PCI_CONFIG_ADDRESS, tc, pkt);
338  }
339  );
340  } else if ((IOPort & ~mask(2)) == 0xCFC) {
344  if (bits(configAddress, 31, 31)) {
345  req->setPaddr(PhysAddrPrefixPciConfig |
346  mbits(configAddress, 30, 2) |
347  (IOPort & mask(2)));
348  } else {
349  req->setPaddr(PhysAddrPrefixIO | IOPort);
350  }
351  } else {
353  req->setPaddr(PhysAddrPrefixIO | IOPort);
354  }
355  return NoFault;
356  } else {
357  panic("Access to unrecognized internal address space %#x.\n",
358  prefix);
359  }
360  }
361 
369  bool
371  ThreadContext *tc, bool update_stats)
372  {
373  bool tlb_hit = false;
374  #ifndef NDEBUG
375  uint32_t flags = req->getFlags();
376  int seg = flags & SegmentFlagMask;
377  #endif
378 
379  assert(seg != SEGMENT_REG_MS);
380  Addr vaddr = req->getVaddr();
381  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
382  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
383 
384  if (m5Reg.prot) {
385  DPRINTF(GPUTLB, "In protected mode.\n");
386  // make sure we are in 64-bit mode
387  assert(m5Reg.mode == LongMode);
388 
389  // If paging is enabled, do the translation.
390  if (m5Reg.paging) {
391  DPRINTF(GPUTLB, "Paging enabled.\n");
392  //update LRU stack on a hit
393  TlbEntry *entry = lookup(vaddr, true);
394 
395  if (entry)
396  tlb_hit = true;
397 
398  if (!update_stats) {
399  // functional tlb access for memory initialization
400  // i.e., memory seeding or instr. seeding -> don't update
401  // TLB and stats
402  return tlb_hit;
403  }
404 
406 
407  if (!entry) {
409  } else {
410  localNumTLBHits++;
411  }
412  }
413  }
414 
415  return tlb_hit;
416  }
417 
418  Fault
420  Translation *translation, Mode mode,
421  bool &delayedResponse, bool timing, int &latency)
422  {
423  uint32_t flags = req->getFlags();
424  int seg = flags & SegmentFlagMask;
425  bool storeCheck = flags & (StoreCheck << FlagShift);
426 
427  // If this is true, we're dealing with a request
428  // to a non-memory address space.
429  if (seg == SEGMENT_REG_MS) {
430  return translateInt(mode == Mode::Read, req, tc);
431  }
432 
433  delayedResponse = false;
434  Addr vaddr = req->getVaddr();
435  DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
436 
437  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
438 
439  // If protected mode has been enabled...
440  if (m5Reg.prot) {
441  DPRINTF(GPUTLB, "In protected mode.\n");
442  // If we're not in 64-bit mode, do protection/limit checks
443  if (m5Reg.mode != LongMode) {
444  DPRINTF(GPUTLB, "Not in long mode. Checking segment "
445  "protection.\n");
446 
447  // Check for a null segment selector.
448  if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
451  return std::make_shared<GeneralProtection>(0);
452  }
453 
454  bool expandDown = false;
456 
457  if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
458  if (!attr.writable && (mode == BaseTLB::Write ||
459  storeCheck))
460  return std::make_shared<GeneralProtection>(0);
461 
462  if (!attr.readable && mode == BaseTLB::Read)
463  return std::make_shared<GeneralProtection>(0);
464 
465  expandDown = attr.expandDown;
466 
467  }
468 
471  // This assumes we're not in 64 bit mode. If we were, the
472  // default address size is 64 bits, overridable to 32.
473  int size = 32;
474  bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
475  SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
476 
477  if ((csAttr.defaultSize && sizeOverride) ||
478  (!csAttr.defaultSize && !sizeOverride)) {
479  size = 16;
480  }
481 
482  Addr offset = bits(vaddr - base, size - 1, 0);
483  Addr endOffset = offset + req->getSize() - 1;
484 
485  if (expandDown) {
486  DPRINTF(GPUTLB, "Checking an expand down segment.\n");
487  warn_once("Expand down segments are untested.\n");
488 
489  if (offset <= limit || endOffset <= limit)
490  return std::make_shared<GeneralProtection>(0);
491  } else {
492  if (offset > limit || endOffset > limit)
493  return std::make_shared<GeneralProtection>(0);
494  }
495  }
496 
497  // If paging is enabled, do the translation.
498  if (m5Reg.paging) {
499  DPRINTF(GPUTLB, "Paging enabled.\n");
500  // The vaddr already has the segment base applied.
501  TlbEntry *entry = lookup(vaddr);
503 
504  if (!entry) {
506  if (timing) {
507  latency = missLatency1;
508  }
509 
510  if (FullSystem) {
511  fatal("GpuTLB doesn't support full-system mode\n");
512  } else {
513  DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
514  "at pc %#x.\n", vaddr, tc->instAddr());
515 
516  Process *p = tc->getProcessPtr();
517  const EmulationPageTable::Entry *pte =
518  p->pTable->lookup(vaddr);
519 
520  if (!pte && mode != BaseTLB::Execute) {
521  // penalize a "page fault" more
522  if (timing)
523  latency += missLatency2;
524 
525  if (p->fixupFault(vaddr))
526  pte = p->pTable->lookup(vaddr);
527  }
528 
529  if (!pte) {
530  return std::make_shared<PageFault>(vaddr, true,
531  mode, true,
532  false);
533  } else {
534  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
535 
536  DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
537  alignedVaddr, pte->paddr);
538 
539  TlbEntry gpuEntry(p->pid(), alignedVaddr,
540  pte->paddr, false, false);
541  entry = insert(alignedVaddr, gpuEntry);
542  }
543 
544  DPRINTF(GPUTLB, "Miss was serviced.\n");
545  }
546  } else {
547  localNumTLBHits++;
548 
549  if (timing) {
550  latency = hitLatency;
551  }
552  }
553 
554  // Do paging protection checks.
555  bool inUser = (m5Reg.cpl == 3 &&
556  !(flags & (CPL0FlagBit << FlagShift)));
557 
558  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
559  bool badWrite = (!entry->writable && (inUser || cr0.wp));
560 
561  if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
562  badWrite)) {
563  // The page must have been present to get into the TLB in
564  // the first place. We'll assume the reserved bits are
565  // fine even though we're not checking them.
566  return std::make_shared<PageFault>(vaddr, true, mode,
567  inUser, false);
568  }
569 
570  if (storeCheck && badWrite) {
571  // This would fault if this were a write, so return a page
572  // fault that reflects that happening.
573  return std::make_shared<PageFault>(vaddr, true,
575  inUser, false);
576  }
577 
578 
579  DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
580  "checks.\n", entry->paddr);
581 
582  int page_size = entry->size();
583  Addr paddr = entry->paddr | (vaddr & (page_size - 1));
584  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
585  req->setPaddr(paddr);
586 
587  if (entry->uncacheable)
588  req->setFlags(Request::UNCACHEABLE);
589  } else {
590  //Use the address which already has segmentation applied.
591  DPRINTF(GPUTLB, "Paging disabled.\n");
592  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
593  req->setPaddr(vaddr);
594  }
595  } else {
596  // Real mode
597  DPRINTF(GPUTLB, "In real mode.\n");
598  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
599  req->setPaddr(vaddr);
600  }
601 
602  // Check for an access to the local APIC
603  if (FullSystem) {
604  LocalApicBase localApicBase =
606 
607  Addr baseAddr = localApicBase.base * PageBytes;
608  Addr paddr = req->getPaddr();
609 
610  if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
611  // Force the access to be uncacheable.
612  req->setFlags(Request::UNCACHEABLE);
613  req->setPaddr(x86LocalAPICAddress(tc->contextId(),
614  paddr - baseAddr));
615  }
616  }
617 
618  return NoFault;
619  };
620 
621  Fault
623  Mode mode, int &latency)
624  {
625  bool delayedResponse;
626 
627  return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse,
628  false, latency);
629  }
630 
631  void
633  Translation *translation, Mode mode, int &latency)
634  {
635  bool delayedResponse;
636  assert(translation);
637 
638  Fault fault = GpuTLB::translate(req, tc, translation, mode,
639  delayedResponse, true, latency);
640 
641  if (!delayedResponse)
642  translation->finish(fault, req, tc, mode);
643  }
644 
645  Walker*
647  {
648  return walker;
649  }
650 
651 
652  void
654  {
655  }
656 
657  void
659  {
660  }
661 
662  void
664  {
666 
668  .name(name() + ".local_TLB_accesses")
669  .desc("Number of TLB accesses")
670  ;
671 
673  .name(name() + ".local_TLB_hits")
674  .desc("Number of TLB hits")
675  ;
676 
678  .name(name() + ".local_TLB_misses")
679  .desc("Number of TLB misses")
680  ;
681 
683  .name(name() + ".local_TLB_miss_rate")
684  .desc("TLB miss rate")
685  ;
686 
688  .name(name() + ".access_cycles")
689  .desc("Cycles spent accessing this TLB level")
690  ;
691 
693  .name(name() + ".page_table_cycles")
694  .desc("Cycles spent accessing the page table")
695  ;
696 
698 
700  .name(name() + ".unique_pages")
701  .desc("Number of unique pages touched")
702  ;
703 
705  .name(name() + ".local_cycles")
706  .desc("Number of cycles spent in queue for all incoming reqs")
707  ;
708 
710  .name(name() + ".local_latency")
711  .desc("Avg. latency over incoming coalesced reqs")
712  ;
713 
715 
717  .name(name() + ".global_TLB_accesses")
718  .desc("Number of TLB accesses")
719  ;
720 
722  .name(name() + ".global_TLB_hits")
723  .desc("Number of TLB hits")
724  ;
725 
727  .name(name() + ".global_TLB_misses")
728  .desc("Number of TLB misses")
729  ;
730 
732  .name(name() + ".global_TLB_miss_rate")
733  .desc("TLB miss rate")
734  ;
735 
737 
739  .name(name() + ".avg_reuse_distance")
740  .desc("avg. reuse distance over all pages (in ticks)")
741  ;
742 
743  }
744 
750  void
752  {
753  assert(pkt);
754  assert(pkt->senderState);
755 
756  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
758 
759  TranslationState *sender_state =
760  safe_cast<TranslationState*>(pkt->senderState);
761 
762  bool update_stats = !sender_state->prefetch;
763  ThreadContext * tmp_tc = sender_state->tc;
764 
765  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
766  virt_page_addr);
767 
768  int req_cnt = sender_state->reqCnt.back();
769 
770  if (update_stats) {
771  accessCycles -= (curTick() * req_cnt);
772  localCycles -= curTick();
773  updatePageFootprint(virt_page_addr);
774  globalNumTLBAccesses += req_cnt;
775  }
776 
777  tlbOutcome lookup_outcome = TLB_MISS;
778  const RequestPtr &tmp_req = pkt->req;
779 
780  // Access the TLB and figure out if it's a hit or a miss.
781  bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
782 
783  if (success) {
784  lookup_outcome = TLB_HIT;
785  // Put the entry in SenderState
786  TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
787  assert(entry);
788 
789  auto p = sender_state->tc->getProcessPtr();
790  sender_state->tlbEntry =
791  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
792  false, false);
793 
794  if (update_stats) {
795  // the reqCnt has an entry per level, so its size tells us
796  // which level we are in
797  sender_state->hitLevel = sender_state->reqCnt.size();
798  globalNumTLBHits += req_cnt;
799  }
800  } else {
801  if (update_stats)
802  globalNumTLBMisses += req_cnt;
803  }
804 
805  /*
806  * We now know the TLB lookup outcome (if it's a hit or a miss), as
807  * well as the TLB access latency.
808  *
809  * We create and schedule a new TLBEvent which will help us take the
810  * appropriate actions (e.g., update TLB on a hit, send request to
811  * lower level TLB on a miss, or start a page walk if this was the
812  * last-level TLB)
813  */
814  TLBEvent *tlb_event =
815  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
816 
817  if (translationReturnEvent.count(virt_page_addr)) {
818  panic("Virtual Page Address %#x already has a return event\n",
819  virt_page_addr);
820  }
821 
822  translationReturnEvent[virt_page_addr] = tlb_event;
823  assert(tlb_event);
824 
825  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
827 
828  schedule(tlb_event, curTick() + cyclesToTicks(Cycles(hitLatency)));
829  }
830 
832  tlbOutcome tlb_outcome, PacketPtr _pkt)
833  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
834  outcome(tlb_outcome), pkt(_pkt)
835  {
836  }
837 
842  void
844  TlbEntry * tlb_entry, Mode mode)
845  {
846  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
847  uint32_t flags = pkt->req->getFlags();
848  bool storeCheck = flags & (StoreCheck << FlagShift);
849 
850  // Do paging protection checks.
851  bool inUser
852  = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
853  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854 
855  bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
856 
857  if ((inUser && !tlb_entry->user) ||
858  (mode == BaseTLB::Write && badWrite)) {
859  // The page must have been present to get into the TLB in
860  // the first place. We'll assume the reserved bits are
861  // fine even though we're not checking them.
862  panic("Page fault detected");
863  }
864 
865  if (storeCheck && badWrite) {
866  // This would fault if this were a write, so return a page
867  // fault that reflects that happening.
868  panic("Page fault detected");
869  }
870  }
871 
877  void
879  tlbOutcome tlb_outcome, PacketPtr pkt)
880  {
881  assert(pkt);
882  Addr vaddr = pkt->req->getVaddr();
883 
884  TranslationState *sender_state =
885  safe_cast<TranslationState*>(pkt->senderState);
886 
887  ThreadContext *tc = sender_state->tc;
888  Mode mode = sender_state->tlbMode;
889 
890  TlbEntry *local_entry, *new_entry;
891 
892  if (tlb_outcome == TLB_HIT) {
893  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
894  vaddr);
895  local_entry = sender_state->tlbEntry;
896  } else {
897  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
898  vaddr);
899 
905  new_entry = sender_state->tlbEntry;
906  assert(new_entry);
907  local_entry = new_entry;
908 
909  if (allocationPolicy) {
910  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
911  virt_page_addr);
912 
913  local_entry = insert(virt_page_addr, *new_entry);
914  }
915 
916  assert(local_entry);
917  }
918 
924  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
925  "while paddr was %#x.\n", local_entry->vaddr,
926  local_entry->paddr);
927 
928  pagingProtectionChecks(tc, pkt, local_entry, mode);
929  int page_size = local_entry->size();
930  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
931  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
932 
933  // Since this packet will be sent through the cpu side port,
934  // it must be converted to a response pkt if it is not one already
935  if (pkt->isRequest()) {
936  pkt->makeTimingResponse();
937  }
938 
939  pkt->req->setPaddr(paddr);
940 
941  if (local_entry->uncacheable) {
942  pkt->req->setFlags(Request::UNCACHEABLE);
943  }
944 
945  //send packet back to coalescer
946  cpuSidePort[0]->sendTimingResp(pkt);
947  //schedule cleanup event
948  cleanupQueue.push(virt_page_addr);
949 
950  // schedule this only once per cycle.
951  // The check is required because we might have multiple translations
952  // returning the same cycle
953  // this is a maximum priority event and must be on the same cycle
954  // as the cleanup event in TLBCoalescer to avoid a race with
955  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
956  if (!cleanupEvent.scheduled())
958  }
959 
964  void
966  PacketPtr pkt)
967  {
968  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
969 
970  assert(translationReturnEvent[virtPageAddr]);
971  assert(pkt);
972 
973  TranslationState *tmp_sender_state =
974  safe_cast<TranslationState*>(pkt->senderState);
975 
976  int req_cnt = tmp_sender_state->reqCnt.back();
977  bool update_stats = !tmp_sender_state->prefetch;
978 
979 
980  if (outcome == TLB_HIT) {
981  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
982 
983  if (update_stats) {
984  accessCycles += (req_cnt * curTick());
985  localCycles += curTick();
986  }
987 
988  } else if (outcome == TLB_MISS) {
989 
990  DPRINTF(GPUTLB, "This is a TLB miss\n");
991  if (update_stats) {
992  accessCycles += (req_cnt*curTick());
993  localCycles += curTick();
994  }
995 
996  if (hasMemSidePort) {
997  // the one cyle added here represent the delay from when we get
998  // the reply back till when we propagate it to the coalescer
999  // above.
1000  if (update_stats) {
1001  accessCycles += (req_cnt * 1);
1002  localCycles += 1;
1003  }
1004 
1010  if (!memSidePort[0]->sendTimingReq(pkt)) {
1011  DPRINTF(GPUTLB, "Failed sending translation request to "
1012  "lower level TLB for addr %#x\n", virtPageAddr);
1013 
1014  memSidePort[0]->retries.push_back(pkt);
1015  } else {
1016  DPRINTF(GPUTLB, "Sent translation request to lower level "
1017  "TLB for addr %#x\n", virtPageAddr);
1018  }
1019  } else {
1020  //this is the last level TLB. Start a page walk
1021  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1022  "addr %#x\n", virtPageAddr);
1023 
1024  if (update_stats)
1025  pageTableCycles -= (req_cnt*curTick());
1026 
1027  TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1028  assert(tlb_event);
1029  tlb_event->updateOutcome(PAGE_WALK);
1030  schedule(tlb_event,
1032  }
1033  } else if (outcome == PAGE_WALK) {
1034  if (update_stats)
1035  pageTableCycles += (req_cnt*curTick());
1036 
1037  // Need to access the page table and update the TLB
1038  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1039  virtPageAddr);
1040 
1041  TranslationState *sender_state =
1042  safe_cast<TranslationState*>(pkt->senderState);
1043 
1044  Process *p = sender_state->tc->getProcessPtr();
1045  Addr vaddr = pkt->req->getVaddr();
1046  #ifndef NDEBUG
1047  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1048  assert(alignedVaddr == virtPageAddr);
1049  #endif
1050  const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1051  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1052  p->fixupFault(vaddr)) {
1053  pte = p->pTable->lookup(vaddr);
1054  }
1055 
1056  if (pte) {
1057  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1058  pte->paddr);
1059 
1060  sender_state->tlbEntry =
1061  new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1062  false);
1063  } else {
1064  sender_state->tlbEntry = nullptr;
1065  }
1066 
1067  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1068  } else if (outcome == MISS_RETURN) {
1072  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1073  } else {
1074  panic("Unexpected TLB outcome %d", outcome);
1075  }
1076  }
1077 
1078  void
1080  {
1081  tlb->translationReturn(virtPageAddr, outcome, pkt);
1082  }
1083 
1084  const char*
1086  {
1087  return "trigger translationDoneEvent";
1088  }
1089 
1090  void
1092  {
1093  outcome = _outcome;
1094  }
1095 
1096  Addr
1098  {
1099  return virtPageAddr;
1100  }
1101 
1108  bool
1110  {
1111  if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1112  tlb->issueTLBLookup(pkt);
1113  // update number of outstanding translation requests
1114  tlb->outstandingReqs++;
1115  return true;
1116  } else {
1117  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1118  tlb->outstandingReqs);
1119  return false;
1120  }
1121  }
1122 
1131  void
1133  {
1134  TranslationState *sender_state =
1135  safe_cast<TranslationState*>(pkt->senderState);
1136 
1137  ThreadContext *tc = sender_state->tc;
1138  Mode mode = sender_state->tlbMode;
1139  Addr vaddr = pkt->req->getVaddr();
1140 
1141  TlbEntry *local_entry, *new_entry;
1142 
1143  if (tlb_outcome == TLB_HIT) {
1144  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1145  "%#x\n", vaddr);
1146 
1147  local_entry = sender_state->tlbEntry;
1148  } else {
1149  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1150  "%#x\n", vaddr);
1151 
1157  new_entry = sender_state->tlbEntry;
1158  assert(new_entry);
1159  local_entry = new_entry;
1160 
1161  if (allocationPolicy) {
1162  Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1163 
1164  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1165  virt_page_addr);
1166 
1167  local_entry = insert(virt_page_addr, *new_entry);
1168  }
1169 
1170  assert(local_entry);
1171  }
1172 
1173  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1174  "while paddr was %#x.\n", local_entry->vaddr,
1175  local_entry->paddr);
1176 
1188  if (!sender_state->prefetch && sender_state->tlbEntry)
1189  pagingProtectionChecks(tc, pkt, local_entry, mode);
1190 
1191  int page_size = local_entry->size();
1192  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1193  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1194 
1195  pkt->req->setPaddr(paddr);
1196 
1197  if (local_entry->uncacheable)
1198  pkt->req->setFlags(Request::UNCACHEABLE);
1199  }
1200 
1201  // This is used for atomic translations. Need to
1202  // make it all happen during the same cycle.
1203  void
1205  {
1206  TranslationState *sender_state =
1207  safe_cast<TranslationState*>(pkt->senderState);
1208 
1209  ThreadContext *tc = sender_state->tc;
1210  bool update_stats = !sender_state->prefetch;
1211 
1212  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1214 
1215  if (update_stats)
1216  tlb->updatePageFootprint(virt_page_addr);
1217 
1218  // do the TLB lookup without updating the stats
1219  bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1220  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1221 
1222  // functional mode means no coalescing
1223  // global metrics are the same as the local metrics
1224  if (update_stats) {
1225  tlb->globalNumTLBAccesses++;
1226 
1227  if (success) {
1228  sender_state->hitLevel = sender_state->reqCnt.size();
1229  tlb->globalNumTLBHits++;
1230  }
1231  }
1232 
1233  if (!success) {
1234  if (update_stats)
1235  tlb->globalNumTLBMisses++;
1236  if (tlb->hasMemSidePort) {
1237  // there is a TLB below -> propagate down the TLB hierarchy
1238  tlb->memSidePort[0]->sendFunctional(pkt);
1239  // If no valid translation from a prefetch, then just return
1240  if (sender_state->prefetch && !pkt->req->hasPaddr())
1241  return;
1242  } else {
1243  // Need to access the page table and update the TLB
1244  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1245  virt_page_addr);
1246 
1247  Process *p = tc->getProcessPtr();
1248 
1249  Addr vaddr = pkt->req->getVaddr();
1250  #ifndef NDEBUG
1251  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1252  assert(alignedVaddr == virt_page_addr);
1253  #endif
1254 
1255  const EmulationPageTable::Entry *pte =
1256  p->pTable->lookup(vaddr);
1257  if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1258  p->fixupFault(vaddr)) {
1259  pte = p->pTable->lookup(vaddr);
1260  }
1261 
1262  if (!sender_state->prefetch) {
1263  // no PageFaults are permitted after
1264  // the second page table lookup
1265  assert(pte);
1266 
1267  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1268  pte->paddr);
1269 
1270  sender_state->tlbEntry =
1271  new TlbEntry(p->pid(), virt_page_addr,
1272  pte->paddr, false, false);
1273  } else {
1274  // If this was a prefetch, then do the normal thing if it
1275  // was a successful translation. Otherwise, send an empty
1276  // TLB entry back so that it can be figured out as empty
1277  // and handled accordingly.
1278  if (pte) {
1279  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1280  pte->paddr);
1281 
1282  sender_state->tlbEntry =
1283  new TlbEntry(p->pid(), virt_page_addr,
1284  pte->paddr, false, false);
1285  } else {
1286  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1287  alignedVaddr);
1288 
1289  sender_state->tlbEntry = nullptr;
1290 
1291  return;
1292  }
1293  }
1294  }
1295  } else {
1296  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1297  tlb->lookup(pkt->req->getVaddr()));
1298 
1299  TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1300  update_stats);
1301 
1302  assert(entry);
1303 
1304  auto p = sender_state->tc->getProcessPtr();
1305  sender_state->tlbEntry =
1306  new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1307  false, false);
1308  }
1309  // This is the function that would populate pkt->req with the paddr of
1310  // the translation. But if no translation happens (i.e Prefetch fails)
1311  // then the early returns in the above code wiill keep this function
1312  // from executing.
1313  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1314  }
1315 
1316  void
1318  {
1319  // The CPUSidePort never sends anything but replies. No retries
1320  // expected.
1321  panic("recvReqRetry called");
1322  }
1323 
1326  {
1327  // currently not checked by the requestor
1328  AddrRangeList ranges;
1329 
1330  return ranges;
1331  }
1332 
1338  bool
1340  {
1341  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1343 
1344  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1345  virt_page_addr);
1346 
1347  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1348  assert(tlb_event);
1349  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1350 
1351  tlb_event->updateOutcome(MISS_RETURN);
1352  tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
1353 
1354  return true;
1355  }
1356 
1357  void
1359  {
1360  // No retries should reach the TLB. The retries
1361  // should only reach the TLBCoalescer.
1362  panic("recvReqRetry called");
1363  }
1364 
1365  void
1367  {
1368  while (!cleanupQueue.empty()) {
1369  Addr cleanup_addr = cleanupQueue.front();
1370  cleanupQueue.pop();
1371 
1372  // delete TLBEvent
1373  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1374  delete old_tlb_event;
1375  translationReturnEvent.erase(cleanup_addr);
1376 
1377  // update number of outstanding requests
1378  outstandingReqs--;
1379  }
1380 
1384  for (int i = 0; i < cpuSidePort.size(); ++i) {
1385  cpuSidePort[i]->sendRetryReq();
1386  }
1387  }
1388 
1389  void
1391  {
1392 
1394 
1395  AccessInfo tmp_access_info;
1396  tmp_access_info.lastTimeAccessed = 0;
1397  tmp_access_info.accessesPerPage = 0;
1398  tmp_access_info.totalReuseDistance = 0;
1399  tmp_access_info.sumDistance = 0;
1400  tmp_access_info.meanDistance = 0;
1401 
1402  ret = TLBFootprint.insert(
1403  AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1404 
1405  bool first_page_access = ret.second;
1406 
1407  if (first_page_access) {
1408  numUniquePages++;
1409  } else {
1410  int accessed_before;
1411  accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1412  ret.first->second.totalReuseDistance += accessed_before;
1413  }
1414 
1415  ret.first->second.accessesPerPage++;
1416  ret.first->second.lastTimeAccessed = curTick();
1417 
1418  if (accessDistance) {
1419  ret.first->second.localTLBAccesses
1420  .push_back(localNumTLBAccesses.value());
1421  }
1422  }
1423 
1424  void
1426  {
1427  std::ostream *page_stat_file = nullptr;
1428 
1429  if (accessDistance) {
1430 
1431  // print per page statistics to a separate file (.csv format)
1432  // simout is the gem5 output directory (default is m5out or the one
1433  // specified with -d
1434  page_stat_file = simout.create(name().c_str())->stream();
1435 
1436  // print header
1437  *page_stat_file
1438  << "page,max_access_distance,mean_access_distance, "
1439  << "stddev_distance" << std::endl;
1440  }
1441 
1442  // update avg. reuse distance footprint
1443  unsigned int sum_avg_reuse_distance_per_page = 0;
1444 
1445  // iterate through all pages seen by this TLB
1446  for (auto &iter : TLBFootprint) {
1447  sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1448  iter.second.accessesPerPage;
1449 
1450  if (accessDistance) {
1451  unsigned int tmp = iter.second.localTLBAccesses[0];
1452  unsigned int prev = tmp;
1453 
1454  for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1455  if (i) {
1456  tmp = prev + 1;
1457  }
1458 
1459  prev = iter.second.localTLBAccesses[i];
1460  // update the localTLBAccesses value
1461  // with the actual differece
1462  iter.second.localTLBAccesses[i] -= tmp;
1463  // compute the sum of AccessDistance per page
1464  // used later for mean
1465  iter.second.sumDistance +=
1466  iter.second.localTLBAccesses[i];
1467  }
1468 
1469  iter.second.meanDistance =
1470  iter.second.sumDistance / iter.second.accessesPerPage;
1471 
1472  // compute std_dev and max (we need a second round because we
1473  // need to know the mean value
1474  unsigned int max_distance = 0;
1475  unsigned int stddev_distance = 0;
1476 
1477  for (int i = 0; i < iter.second.localTLBAccesses.size(); ++i) {
1478  unsigned int tmp_access_distance =
1479  iter.second.localTLBAccesses[i];
1480 
1481  if (tmp_access_distance > max_distance) {
1482  max_distance = tmp_access_distance;
1483  }
1484 
1485  unsigned int diff =
1486  tmp_access_distance - iter.second.meanDistance;
1487  stddev_distance += pow(diff, 2);
1488 
1489  }
1490 
1491  stddev_distance =
1492  sqrt(stddev_distance/iter.second.accessesPerPage);
1493 
1494  if (page_stat_file) {
1495  *page_stat_file << std::hex << iter.first << ",";
1496  *page_stat_file << std::dec << max_distance << ",";
1497  *page_stat_file << std::dec << iter.second.meanDistance
1498  << ",";
1499  *page_stat_file << std::dec << stddev_distance;
1500  *page_stat_file << std::endl;
1501  }
1502 
1503  // erase the localTLBAccesses array
1504  iter.second.localTLBAccesses.clear();
1505  }
1506  }
1507 
1508  if (!TLBFootprint.empty()) {
1510  sum_avg_reuse_distance_per_page / TLBFootprint.size();
1511  }
1512 
1513  //clear the TLBFootprint map
1514  TLBFootprint.clear();
1515  }
1516 } // namespace X86ISA
1517 
1519 X86GPUTLBParams::create()
1520 {
1521  return new X86ISA::GpuTLB(this);
1522 }
1523 
X86ISA::GpuTLB::MemSidePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: gpu_tlb.cc:1339
X86ISA::GpuTLB::localNumTLBAccesses
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:174
X86ISA::MISCREG_M5_REG
@ MISCREG_M5_REG
Definition: misc.hh:137
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
pagetable.hh
X86ISA::GpuTLB::cpuSidePort
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:287
ThreadContext::readMiscRegNoEffect
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
X86ISA::GpuTLB::localTLBMissRate
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:177
X86ISA::GpuTLB::unserialize
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: gpu_tlb.cc:658
X86ISA::FlagShift
const int FlagShift
Definition: ldstflags.hh:50
Stats::Group::regStats
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
X86ISA::GpuTLB::cleanupQueue
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:389
x86_traits.hh
roundDown
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:150
X86ISA::GpuTLB::translateTiming
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:632
EmulationPageTable::Entry
Definition: page_table.hh:51
Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:1022
X86ISA::MISCREG_APIC_BASE
@ MISCREG_APIC_BASE
Definition: misc.hh:393
X86ISA::GpuTLB::globalNumTLBHits
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:183
X86ISA::GpuTLB::TranslationState::hitLevel
int hitLevel
Definition: gpu_tlb.hh:334
BaseTLB::Read
@ Read
Definition: tlb.hh:57
data
const char data[]
Definition: circlebuf.test.cc:42
X86ISA::GpuTLB::tlbOutcome
tlbOutcome
Definition: gpu_tlb.hh:221
microldstop.hh
warn_once
#define warn_once(...)
Definition: logging.hh:243
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
Process
Definition: process.hh:65
X86ISA::PhysAddrPrefixPciConfig
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:73
X86ISA::GpuTLB::numSets
int numSets
Definition: gpu_tlb.hh:116
X86ISA::SEGMENT_REG_TSG
@ SEGMENT_REG_TSG
Definition: segment.hh:53
X86ISA::GpuTLB::demapPage
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:266
X86ISA::TlbEntry
Definition: pagetable.hh:65
htole
T htole(T value)
Definition: byteswap.hh:140
X86ISA::MISCREG_SEG_LIMIT
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
Definition: misc.hh:526
X86ISA::GpuTLB::AccessInfo
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:402
X86ISA::GpuTLB::numUniquePages
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:191
X86ISA::PhysAddrPrefixIO
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:72
X86ISA::GpuTLB::getWalker
Walker * getWalker()
Definition: gpu_tlb.cc:646
OutputDirectory::create
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:209
PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:237
X86ISA::GpuTLB::cleanupEvent
EventFunctionWrapper cleanupEvent
Definition: gpu_tlb.hh:395
pagetable_walker.hh
X86ISA::GpuTLB::localNumTLBMisses
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:176
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:82
X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:340
X86ISA::GpuTLB::pagingProtectionChecks
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:843
X86ISA::GpuTLB::TranslationState::prefetch
bool prefetch
Definition: gpu_tlb.hh:324
X86ISA::TlbEntry::vaddr
Addr vaddr
Definition: pagetable.hh:71
X86ISA::GpuTLB::accessDistance
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:139
X86ISA::GpuTLB::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition: gpu_tlb.cc:137
X86ISA::SYS_SEGMENT_REG_IDTR
@ SYS_SEGMENT_REG_IDTR
Definition: segment.hh:60
FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:132
ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
X86ISA::GpuTLB::PAGE_WALK
@ PAGE_WALK
Definition: gpu_tlb.hh:221
mbits
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:104
X86ISA::TlbEntry::uncacheable
bool uncacheable
Definition: pagetable.hh:84
Packet::isRequest
bool isRequest() const
Definition: packet.hh:559
X86ISA::GpuTLB::walker
Walker * walker
Definition: gpu_tlb.hh:105
X86ISA::x
Bitfield< 1 > x
Definition: types.hh:103
X86ISA::GpuTLB::hitLatency
int hitLatency
Definition: gpu_tlb.hh:168
faults.hh
X86ISA::msrAddrToIndex
bool msrAddrToIndex(MiscRegIndex &regNum, Addr addr)
Find and return the misc reg corresponding to an MSR address.
Definition: msr.cc:147
output.hh
X86ISA::IntAddrPrefixIO
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:70
X86ISA::SEGMENT_REG_LS
@ SEGMENT_REG_LS
Definition: segment.hh:54
request.hh
X86ISA::IntAddrPrefixMask
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:67
X86ISA::GpuTLB::missLatency1
int missLatency1
Definition: gpu_tlb.hh:169
X86ISA::GpuTLB::AccessInfo::accessesPerPage
unsigned int accessesPerPage
Definition: gpu_tlb.hh:405
X86ISA::GpuTLB::setMask
Addr setMask
Definition: gpu_tlb.hh:122
X86ISA::GpuTLB::localCycles
Stats::Scalar localCycles
Definition: gpu_tlb.hh:193
Packet::setData
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition: packet.hh:1225
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
X86ISA::GpuTLB::insert
TlbEntry * insert(Addr vpn, TlbEntry &entry)
Definition: gpu_tlb.cc:159
X86ISA::SEGMENT_REG_HS
@ SEGMENT_REG_HS
Definition: segment.hh:51
X86ISA::MISCREG_SEG_SEL
static MiscRegIndex MISCREG_SEG_SEL(int index)
Definition: misc.hh:505
X86ISA::GpuTLB::CpuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: gpu_tlb.cc:1325
X86ISA::GpuTLB::globalNumTLBAccesses
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:182
X86ISA::GpuTLB::TLBEvent::description
const char * description() const
Return a C string describing the event.
Definition: gpu_tlb.cc:1085
X86ISA::GpuTLB::translationReturnEvent
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:385
X86ISA::GpuTLB::TranslationState::tlbEntry
TlbEntry * tlbEntry
Definition: gpu_tlb.hh:322
X86ISA::GpuTLB::translate
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:419
pseudo_inst.hh
Request::STRICT_ORDER
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
Definition: request.hh:124
X86ISA::GpuTLB::handleTranslationReturn
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition: gpu_tlb.cc:878
X86ISA::TlbEntry::paddr
Addr paddr
Definition: pagetable.hh:68
letoh
T letoh(T value)
Definition: byteswap.hh:141
X86ISA::expandDown
Bitfield< 14 > expandDown
Definition: misc.hh:996
X86ISA::GpuTLB::missLatency2
int missLatency2
Definition: gpu_tlb.hh:170
cp
Definition: cprintf.cc:40
X86ISA::GpuTLB::globalNumTLBMisses
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:184
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
X86ISA::GpuTLB::memSidePort
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:289
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:88
Stats::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:698
bitfield.hh
X86ISA::SEGMENT_REG_MS
@ SEGMENT_REG_MS
Definition: segment.hh:55
X86ISA::GpuTLB::CpuSidePort::recvReqRetry
virtual void recvReqRetry()
Definition: gpu_tlb.cc:1317
X86ISA::GpuTLB::MemSidePort::recvReqRetry
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition: gpu_tlb.cc:1358
X86ISA::GpuTLB::translateInt
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Definition: gpu_tlb.cc:301
Event
Definition: eventq.hh:246
X86ISA::GpuTLB::TLBEvent::process
void process()
Definition: gpu_tlb.cc:1079
X86ISA::GpuTLB::TranslationState::reqCnt
std::vector< int > reqCnt
Definition: gpu_tlb.hh:332
cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:152
OutputStream::stream
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:59
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
X86ISA::GpuTLB::FA
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:121
ArmISA::PageShift
const Addr PageShift
Definition: isa_traits.hh:51
Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:240
MipsISA::vaddr
vaddr
Definition: pra_constants.hh:275
msr.hh
X86ISA::SegmentFlagMask
const Request::FlagsType M5_VAR_USED SegmentFlagMask
Definition: ldstflags.hh:49
X86ISA::GpuTLB::allocationPolicy
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:128
X86ISA::GpuTLB::avgReuseDistance
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:198
X86ISA::GpuTLB::regStats
void regStats() override
Callback to set stat parameters.
Definition: gpu_tlb.cc:663
X86ISA::GpuTLB::cleanup
void cleanup()
Definition: gpu_tlb.cc:1366
X86ISA::GpuTLB
Definition: gpu_tlb.hh:63
Port
Ports are used to interface objects to each other.
Definition: port.hh:56
process.hh
X86ISA::TlbEntry::writable
bool writable
Definition: pagetable.hh:77
X86ISA::PageBytes
const Addr PageBytes
Definition: isa_traits.hh:48
ArmISA::mode
Bitfield< 4, 0 > mode
Definition: miscregs_types.hh:70
X86ISA::GpuTLB::invalidateAll
void invalidateAll()
Definition: gpu_tlb.cc:228
X86ISA::Walker
Definition: pagetable_walker.hh:56
X86ISA::GpuTLB::AccessInfo::totalReuseDistance
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:407
X86ISA::GpuTLB::lookupIt
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:185
isa_traits.hh
ArmISA::attr
attr
Definition: miscregs_types.hh:649
X86ISA::GpuTLB::AccessInfo::lastTimeAccessed
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:404
X86ISA::GpuTLB::translateAtomic
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:622
ThreadContext::contextId
virtual ContextID contextId() const =0
EventBase::Maximum_Pri
static const Priority Maximum_Pri
Maximum priority.
Definition: eventq.hh:236
X86ISA::MISCREG_SEG_ATTR
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Definition: misc.hh:533
X86ISA::GpuTLB::MISS_RETURN
@ MISS_RETURN
Definition: gpu_tlb.hh:221
Request::UNCACHEABLE
@ UNCACHEABLE
The request is to an uncacheable address.
Definition: request.hh:114
X86ISA::GpuTLB::CpuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition: gpu_tlb.cc:1109
X86ISA::GpuTLB::entryList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition: gpu_tlb.hh:157
X86ISA::GpuTLB::~GpuTLB
~GpuTLB()
Definition: gpu_tlb.cc:130
std::pair
STL pair class.
Definition: stl.hh:58
gpu_tlb.hh
NoFault
constexpr decltype(nullptr) NoFault
Definition: types.hh:245
X86ISA::MISCREG_PCI_CONFIG_ADDRESS
@ MISCREG_PCI_CONFIG_ADDRESS
Definition: misc.hh:396
X86ISA::GpuTLB::localNumTLBHits
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:175
X86ISA::GpuTLB::setConfigAddress
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:242
X86ISA
This is exposed globally, independent of the ISA.
Definition: acpi.hh:55
X86ISA::GpuTLB::TLB_HIT
@ TLB_HIT
Definition: gpu_tlb.hh:221
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
X86ISA::GpuTLB::tlbLookup
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
Definition: gpu_tlb.cc:370
Stats::DataWrap::name
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:274
X86ISA::MISCREG_CS_ATTR
@ MISCREG_CS_ATTR
Definition: misc.hh:363
X86ISA::GpuTLB::exitCallback
void exitCallback()
Definition: gpu_tlb.cc:1425
name
const std::string & name()
Definition: trace.cc:50
packet_access.hh
X86ISA::GpuTLB::AccessInfo::meanDistance
unsigned int meanDistance
Definition: gpu_tlb.hh:419
X86ISA::x86LocalAPICAddress
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:93
X86ISA::offset
offset
Definition: misc.hh:1024
X86ISA::GpuTLB::TLBFootprint
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:423
X86ISA::IntAddrPrefixMSR
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:69
X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:79
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:133
BaseTLB::Write
@ Write
Definition: tlb.hh:57
X86ISA::GpuTLB::pageTableCycles
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:190
ArmISA::PageBytes
const Addr PageBytes
Definition: isa_traits.hh:52
X86ISA::GpuTLB::TLBEvent::getTLBEventVaddr
Addr getTLBEventVaddr()
Definition: gpu_tlb.cc:1097
X86ISA::GpuTLB::TLBEvent
Definition: gpu_tlb.hh:362
X86ISA::StoreCheck
@ StoreCheck
Definition: ldstflags.hh:54
base.hh
X86ISA::p
Bitfield< 0 > p
Definition: pagetable.hh:151
X86ISA::GpuTLB::TLB_MISS
@ TLB_MISS
Definition: gpu_tlb.hh:221
X86ISA::GpuTLB::size
int size
Definition: gpu_tlb.hh:114
X86ISA::GpuTLB::hasMemSidePort
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:133
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
X86ISA::GpuTLB::handleFuncTranslationReturn
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1132
ThreadContext::readMiscReg
virtual RegVal readMiscReg(RegIndex misc_reg)=0
EmulationPageTable::Entry::paddr
Addr paddr
Definition: page_table.hh:53
X86ISA::MISCREG_CR0
@ MISCREG_CR0
Definition: misc.hh:105
ThreadContext::setMiscReg
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
X86ISA::GpuTLB::Translation::finish
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
X86ISA::GpuTLB::localLatency
Stats::Formula localLatency
Definition: gpu_tlb.hh:195
X86ISA::SEGMENT_REG_ES
@ SEGMENT_REG_ES
Definition: segment.hh:45
logging.hh
X86ISA::GpuTLB::freeList
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:148
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
X86ISA::GpuTLB::issueTLBLookup
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition: gpu_tlb.cc:751
X86ISA::GpuTLB::TLBEvent::TLBEvent
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: gpu_tlb.cc:831
X86ISA::limit
BitfieldType< SegDescriptorLimit > limit
Definition: misc.hh:924
CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:63
X86ISA::mask
mask
Definition: misc.hh:796
X86ISA::AddrSizeFlagBit
@ AddrSizeFlagBit
Definition: ldstflags.hh:53
X86ISA::GpuTLB::updatePageFootprint
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1390
X86ISA::MISCREG_SEG_BASE
static MiscRegIndex MISCREG_SEG_BASE(int index)
Definition: misc.hh:512
trace.hh
X86ISA::GpuTLB::CpuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition: gpu_tlb.cc:1204
Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:508
simout
OutputDirectory simout
Definition: output.cc:61
X86ISA::TlbEntry::size
int size()
Definition: pagetable.hh:112
std::list< AddrRange >
X86ISA::GpuTLB::accessCycles
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:188
page_table.hh
X86ISA::GpuTLB::Translation
Definition: gpu_tlb.hh:79
CheckpointIn
Definition: serialize.hh:67
X86ISA::GpuTLB::configAddress
uint32_t configAddress
Definition: gpu_tlb.hh:70
X86ISA::GpuTLB::outstandingReqs
int outstandingReqs
Definition: gpu_tlb.hh:350
BaseTLB::Execute
@ Execute
Definition: tlb.hh:57
Stats::DataWrap::desc
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:307
X86ISA::GpuTLB::serialize
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: gpu_tlb.cc:653
ThreadContext::instAddr
virtual Addr instAddr() const =0
ArmISA::tlb
Bitfield< 59, 56 > tlb
Definition: miscregs_types.hh:88
misc.hh
csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
X86ISA::GpuTLB::TLBEvent::updateOutcome
void updateOutcome(tlbOutcome _outcome)
Definition: gpu_tlb.cc:1091
X86ISA::GpuTLB::translationReturn
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition: gpu_tlb.cc:965
X86ISA::GpuTLB::Mode
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:77
thread_context.hh
X86ISA::GpuTLB::GpuTLB
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:66
X86ISA::GpuTLB::lookup
TlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:215
X86ISA::CPL0FlagBit
@ CPL0FlagBit
Definition: ldstflags.hh:52
X86ISA::GpuTLB::TranslationState::tlbMode
Mode tlbMode
Definition: gpu_tlb.hh:313
X86ISA::seg
Bitfield< 2, 0 > seg
Definition: types.hh:82
RegVal
uint64_t RegVal
Definition: types.hh:168
X86ISA::GpuTLB::Params
X86GPUTLBParams Params
Definition: gpu_tlb.hh:73
ArmISA::va
Bitfield< 8 > va
Definition: miscregs_types.hh:272
X86ISA::MiscRegIndex
MiscRegIndex
Definition: misc.hh:100
X86ISA::GpuTLB::AccessInfo::sumDistance
unsigned int sumDistance
Definition: gpu_tlb.hh:418
X86ISA::GpuTLB::globalTLBMissRate
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:185
X86ISA::GpuTLB::tlb
std::vector< TlbEntry > tlb
Definition: gpu_tlb.hh:141
X86ISA::GpuTLB::TranslationState::tc
ThreadContext * tc
Definition: gpu_tlb.hh:315
X86ISA::TlbEntry::user
bool user
Definition: pagetable.hh:79
X86ISA::GpuTLB::invalidateNonGlobal
void invalidateNonGlobal()
Definition: gpu_tlb.cc:248
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
X86ISA::IntAddrPrefixCPUID
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:68
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
X86ISA::GpuTLB::TranslationState
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition: gpu_tlb.hh:310
bits
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:75

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17