Go to the documentation of this file.
55 #include "debug/GPUPrefetch.hh"
56 #include "debug/GPUTLB.hh"
70 exitEvent([
this]{ exitCallback(); },
name())
73 assert(assoc <= size);
75 allocationPolicy =
p->allocationPolicy;
76 hasMemSidePort =
false;
77 accessDistance =
p->accessDistance;
79 tlb.assign(size, TlbEntry());
81 freeList.resize(numSets);
82 entryList.resize(numSets);
84 for (
int set = 0; set < numSets; ++set) {
85 for (
int way = 0; way < assoc; ++way) {
86 int x = set * assoc + way;
87 freeList[set].push_back(&
tlb.at(
x));
101 setMask = numSets - 1;
103 maxCoalescedReqs =
p->maxOutstandingReqs;
106 if (maxCoalescedReqs > assoc) {
107 maxCoalescedReqs = assoc;
108 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
112 hitLatency =
p->hitLatency;
113 missLatency1 =
p->missLatency1;
114 missLatency2 =
p->missLatency2;
117 for (
size_t i = 0;
i <
p->port_cpu_side_ports_connection_count; ++
i) {
118 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
123 for (
size_t i = 0;
i <
p->port_mem_side_ports_connection_count; ++
i) {
124 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
139 if (if_name ==
"cpu_side_ports") {
141 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
145 }
else if (if_name ==
"mem_side_ports") {
147 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
154 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
178 newEntry->
vaddr = vpn;
184 GpuTLB::EntryList::iterator
194 for (; entry !=
entryList[set].end(); ++entry) {
195 int page_size = (*entry)->size();
197 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
198 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
199 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
230 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
250 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
255 if (!(*entryIt)->global) {
303 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
308 panic(
"CPUID memory space not yet implemented!\n");
314 return std::make_shared<GeneralProtection>(0);
316 req->setLocalAccessor(
319 return localMiscRegAccess(read, regNum, tc, pkt);
331 assert(!(IOPort & ~0xFFFF));
332 if (IOPort == 0xCF8 && req->getSize() == 4) {
333 req->setLocalAccessor(
336 return localMiscRegAccess(
340 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
357 panic(
"Access to unrecognized internal address space %#x.\n",
373 bool tlb_hit =
false;
375 uint32_t flags = req->getFlags();
385 DPRINTF(GPUTLB,
"In protected mode.\n");
387 assert(m5Reg.mode == LongMode);
391 DPRINTF(GPUTLB,
"Paging enabled.\n");
421 bool &delayedResponse,
bool timing,
int &latency)
423 uint32_t flags = req->getFlags();
433 delayedResponse =
false;
441 DPRINTF(GPUTLB,
"In protected mode.\n");
443 if (m5Reg.mode != LongMode) {
444 DPRINTF(GPUTLB,
"Not in long mode. Checking segment "
451 return std::make_shared<GeneralProtection>(0);
460 return std::make_shared<GeneralProtection>(0);
463 return std::make_shared<GeneralProtection>(0);
477 if ((csAttr.defaultSize && sizeOverride) ||
478 (!csAttr.defaultSize && !sizeOverride)) {
486 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
487 warn_once(
"Expand down segments are untested.\n");
490 return std::make_shared<GeneralProtection>(0);
493 return std::make_shared<GeneralProtection>(0);
499 DPRINTF(GPUTLB,
"Paging enabled.\n");
511 fatal(
"GpuTLB doesn't support full-system mode\n");
513 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x "
526 pte =
p->pTable->lookup(
vaddr);
530 return std::make_shared<PageFault>(
vaddr,
true,
534 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
536 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
537 alignedVaddr, pte->
paddr);
539 TlbEntry gpuEntry(
p->pid(), alignedVaddr,
540 pte->
paddr,
false,
false);
541 entry =
insert(alignedVaddr, gpuEntry);
544 DPRINTF(GPUTLB,
"Miss was serviced.\n");
555 bool inUser = (m5Reg.cpl == 3 &&
559 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
566 return std::make_shared<PageFault>(
vaddr,
true,
mode,
570 if (storeCheck && badWrite) {
573 return std::make_shared<PageFault>(
vaddr,
true,
579 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection "
580 "checks.\n", entry->
paddr);
582 int page_size = entry->
size();
584 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
585 req->setPaddr(paddr);
591 DPRINTF(GPUTLB,
"Paging disabled.\n");
593 req->setPaddr(
vaddr);
597 DPRINTF(GPUTLB,
"In real mode.\n");
599 req->setPaddr(
vaddr);
604 LocalApicBase localApicBase =
608 Addr paddr = req->getPaddr();
610 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
625 bool delayedResponse;
635 bool delayedResponse;
639 delayedResponse,
true, latency);
641 if (!delayedResponse)
668 .
name(
name() +
".local_TLB_accesses")
669 .
desc(
"Number of TLB accesses")
674 .
desc(
"Number of TLB hits")
679 .
desc(
"Number of TLB misses")
683 .
name(
name() +
".local_TLB_miss_rate")
684 .
desc(
"TLB miss rate")
689 .
desc(
"Cycles spent accessing this TLB level")
693 .
name(
name() +
".page_table_cycles")
694 .
desc(
"Cycles spent accessing the page table")
701 .
desc(
"Number of unique pages touched")
706 .
desc(
"Number of cycles spent in queue for all incoming reqs")
711 .
desc(
"Avg. latency over incoming coalesced reqs")
717 .
name(
name() +
".global_TLB_accesses")
718 .
desc(
"Number of TLB accesses")
723 .
desc(
"Number of TLB hits")
727 .
name(
name() +
".global_TLB_misses")
728 .
desc(
"Number of TLB misses")
732 .
name(
name() +
".global_TLB_miss_rate")
733 .
desc(
"TLB miss rate")
739 .
name(
name() +
".avg_reuse_distance")
740 .
desc(
"avg. reuse distance over all pages (in ticks)")
762 bool update_stats = !sender_state->
prefetch;
765 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
768 int req_cnt = sender_state->
reqCnt.back();
781 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
815 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
818 panic(
"Virtual Page Address %#x already has a return event\n",
825 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
833 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
834 outcome(tlb_outcome), pkt(_pkt)
847 uint32_t flags = pkt->
req->getFlags();
855 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
857 if ((inUser && !tlb_entry->
user) ||
862 panic(
"Page fault detected");
865 if (storeCheck && badWrite) {
868 panic(
"Page fault detected");
893 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
895 local_entry = sender_state->
tlbEntry;
897 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
907 local_entry = new_entry;
910 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
913 local_entry =
insert(virt_page_addr, *new_entry);
924 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
925 "while paddr was %#x.\n", local_entry->
vaddr,
929 int page_size = local_entry->
size();
931 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
939 pkt->
req->setPaddr(paddr);
968 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
976 int req_cnt = tmp_sender_state->
reqCnt.back();
977 bool update_stats = !tmp_sender_state->
prefetch;
990 DPRINTF(GPUTLB,
"This is a TLB miss\n");
1011 DPRINTF(GPUTLB,
"Failed sending translation request to "
1012 "lower level TLB for addr %#x\n", virtPageAddr);
1016 DPRINTF(GPUTLB,
"Sent translation request to lower level "
1017 "TLB for addr %#x\n", virtPageAddr);
1021 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
1022 "addr %#x\n", virtPageAddr);
1038 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1047 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
1048 assert(alignedVaddr == virtPageAddr);
1053 pte =
p->pTable->lookup(
vaddr);
1057 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1074 panic(
"Unexpected TLB outcome %d", outcome);
1081 tlb->translationReturn(virtPageAddr, outcome, pkt);
1087 return "trigger translationDoneEvent";
1099 return virtPageAddr;
1111 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
1112 tlb->issueTLBLookup(pkt);
1114 tlb->outstandingReqs++;
1117 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1118 tlb->outstandingReqs);
1144 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
1147 local_entry = sender_state->
tlbEntry;
1149 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
1157 new_entry = sender_state->
tlbEntry;
1159 local_entry = new_entry;
1164 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1167 local_entry =
insert(virt_page_addr, *new_entry);
1170 assert(local_entry);
1173 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1174 "while paddr was %#x.\n", local_entry->
vaddr,
1175 local_entry->
paddr);
1191 int page_size = local_entry->
size();
1193 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
1195 pkt->
req->setPaddr(paddr);
1210 bool update_stats = !sender_state->
prefetch;
1216 tlb->updatePageFootprint(virt_page_addr);
1219 bool success =
tlb->tlbLookup(pkt->
req, tc, update_stats);
1225 tlb->globalNumTLBAccesses++;
1229 tlb->globalNumTLBHits++;
1235 tlb->globalNumTLBMisses++;
1236 if (
tlb->hasMemSidePort) {
1238 tlb->memSidePort[0]->sendFunctional(pkt);
1240 if (sender_state->
prefetch && !pkt->
req->hasPaddr())
1244 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1251 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
1252 assert(alignedVaddr == virt_page_addr);
1256 p->pTable->lookup(
vaddr);
1259 pte =
p->pTable->lookup(
vaddr);
1267 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1272 pte->
paddr,
false,
false);
1279 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1284 pte->
paddr,
false,
false);
1286 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1296 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1297 tlb->lookup(pkt->
req->getVaddr()));
1313 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1321 panic(
"recvReqRetry called");
1344 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1347 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
1362 panic(
"recvReqRetry called");
1374 delete old_tlb_event;
1403 AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1405 bool first_page_access = ret.second;
1407 if (first_page_access) {
1410 int accessed_before;
1411 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1412 ret.first->second.totalReuseDistance += accessed_before;
1415 ret.first->second.accessesPerPage++;
1416 ret.first->second.lastTimeAccessed =
curTick();
1419 ret.first->second.localTLBAccesses
1427 std::ostream *page_stat_file =
nullptr;
1438 <<
"page,max_access_distance,mean_access_distance, "
1439 <<
"stddev_distance" << std::endl;
1443 unsigned int sum_avg_reuse_distance_per_page = 0;
1447 sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1448 iter.second.accessesPerPage;
1451 unsigned int tmp = iter.second.localTLBAccesses[0];
1452 unsigned int prev = tmp;
1454 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1459 prev = iter.second.localTLBAccesses[
i];
1462 iter.second.localTLBAccesses[
i] -= tmp;
1465 iter.second.sumDistance +=
1466 iter.second.localTLBAccesses[
i];
1469 iter.second.meanDistance =
1470 iter.second.sumDistance / iter.second.accessesPerPage;
1474 unsigned int max_distance = 0;
1475 unsigned int stddev_distance = 0;
1477 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1478 unsigned int tmp_access_distance =
1479 iter.second.localTLBAccesses[
i];
1481 if (tmp_access_distance > max_distance) {
1482 max_distance = tmp_access_distance;
1486 tmp_access_distance - iter.second.meanDistance;
1487 stddev_distance += pow(diff, 2);
1492 sqrt(stddev_distance/iter.second.accessesPerPage);
1494 if (page_stat_file) {
1495 *page_stat_file << std::hex << iter.first <<
",";
1496 *page_stat_file << std::dec << max_distance <<
",";
1497 *page_stat_file << std::dec << iter.second.meanDistance
1499 *page_stat_file << std::dec << stddev_distance;
1500 *page_stat_file << std::endl;
1504 iter.second.localTLBAccesses.clear();
1519 X86GPUTLBParams::create()
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Stats::Scalar localNumTLBAccesses
#define fatal(...)
This implements a cprintf based fatal() function.
std::vector< CpuSidePort * > cpuSidePort
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
Stats::Formula localTLBMissRate
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
virtual void regStats()
Callback to set stat parameters.
bool scheduled() const
Determine if the current event is scheduled.
std::queue< Addr > cleanupQueue
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
void makeTimingResponse()
Stats::Scalar globalNumTLBHits
const Addr PhysAddrPrefixPciConfig
void demapPage(Addr va, uint64_t asn)
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Stats::Scalar numUniquePages
const Addr PhysAddrPrefixIO
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
EventFunctionWrapper cleanupEvent
Stats::Scalar localNumTLBMisses
std::shared_ptr< Request > RequestPtr
RequestPtr req
A pointer to the original request.
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
bool accessDistance
Print out accessDistance stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
virtual Process * getProcessPtr()=0
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
bool msrAddrToIndex(MiscRegIndex ®Num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
const Addr IntAddrPrefixIO
const Addr IntAddrPrefixMask
unsigned int accessesPerPage
Stats::Scalar localCycles
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TlbEntry * insert(Addr vpn, TlbEntry &entry)
static MiscRegIndex MISCREG_SEG_SEL(int index)
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Stats::Scalar globalNumTLBAccesses
const char * description() const
Return a C string describing the event.
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Bitfield< 14 > expandDown
Stats::Scalar globalNumTLBMisses
void schedule(Event &event, Tick when)
std::vector< MemSidePort * > memSidePort
Tick cyclesToTicks(Cycles c) const
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Counter value() const
Return the current value of this stat as its base type.
virtual void recvReqRetry()
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
std::vector< int > reqCnt
void cprintf(const char *format, const Args &...args)
std::ostream * stream() const
Get the output underlying output stream.
bool FA
true if this is a fully-associative TLB
std::shared_ptr< FaultBase > Fault
const Request::FlagsType M5_VAR_USED SegmentFlagMask
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Stats::Scalar avgReuseDistance
void regStats() override
Callback to set stat parameters.
Ports are used to interface objects to each other.
unsigned int totalReuseDistance
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
unsigned int lastTimeAccessed
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
virtual ContextID contextId() const =0
static const Priority Maximum_Pri
Maximum priority.
static MiscRegIndex MISCREG_SEG_ATTR(int index)
@ UNCACHEABLE
The request is to an uncacheable address.
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
constexpr decltype(nullptr) NoFault
@ MISCREG_PCI_CONFIG_ADDRESS
Stats::Scalar localNumTLBHits
void setConfigAddress(uint32_t addr)
This is exposed globally, independent of the ISA.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
const std::string & name()
unsigned int meanDistance
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
AccessPatternTable TLBFootprint
const Addr IntAddrPrefixMSR
virtual const std::string name() const
Stats::Scalar pageTableCycles
bool hasMemSidePort
if true, then this is not the last level TLB
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
virtual RegVal readMiscReg(RegIndex misc_reg)=0
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
Stats::Formula localLatency
std::vector< EntryList > freeList
Cycles is a wrapper class for representing cycle counts, i.e.
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
BitfieldType< SegDescriptorLimit > limit
std::ostream CheckpointOut
void updatePageFootprint(Addr virt_page_addr)
static MiscRegIndex MISCREG_SEG_BASE(int index)
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
SenderState * senderState
This packet's sender state.
Stats::Scalar accessCycles
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
virtual Addr instAddr() const =0
std::string csprintf(const char *format, const Args &...args)
void updateOutcome(tlbOutcome _outcome)
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
TlbEntry * lookup(Addr va, bool update_lru=true)
Stats::Formula globalTLBMissRate
std::vector< TlbEntry > tlb
void invalidateNonGlobal()
#define panic(...)
This implements a cprintf based panic() function.
const Addr IntAddrPrefixCPUID
Tick curTick()
The current simulated tick.
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17