Go to the documentation of this file.
55 #include "debug/GPUPrefetch.hh"
56 #include "debug/GPUTLB.hh"
70 exitEvent([
this]{ exitCallback(); },
name()), stats(
this)
84 for (
int set = 0; set <
numSets; ++set) {
85 for (
int way = 0; way <
assoc; ++way) {
108 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
117 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
123 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
139 if (if_name ==
"cpu_side_ports") {
141 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
145 }
else if (if_name ==
"mem_side_ports") {
147 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
154 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
178 newEntry->
vaddr = vpn;
184 GpuTLB::EntryList::iterator
194 for (; entry !=
entryList[set].end(); ++entry) {
195 int page_size = (*entry)->size();
197 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
198 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
199 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
230 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
250 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
255 if (!(*entryIt)->global) {
303 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
308 panic(
"CPUID memory space not yet implemented!\n");
314 return std::make_shared<GeneralProtection>(0);
316 req->setLocalAccessor(
319 return localMiscRegAccess(read, regNum, tc, pkt);
331 assert(!(IOPort & ~0xFFFF));
332 if (IOPort == 0xCF8 && req->getSize() == 4) {
333 req->setLocalAccessor(
336 return localMiscRegAccess(
340 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
357 panic(
"Access to unrecognized internal address space %#x.\n",
373 bool tlb_hit =
false;
375 uint32_t flags = req->getFlags();
385 DPRINTF(GPUTLB,
"In protected mode.\n");
387 assert(m5Reg.mode == LongMode);
391 DPRINTF(GPUTLB,
"Paging enabled.\n");
421 bool &delayedResponse,
bool timing,
int &latency)
423 uint32_t flags = req->getFlags();
433 delayedResponse =
false;
441 DPRINTF(GPUTLB,
"In protected mode.\n");
443 if (m5Reg.mode != LongMode) {
444 DPRINTF(GPUTLB,
"Not in long mode. Checking segment "
451 return std::make_shared<GeneralProtection>(0);
460 return std::make_shared<GeneralProtection>(0);
463 return std::make_shared<GeneralProtection>(0);
477 if ((csAttr.defaultSize && sizeOverride) ||
478 (!csAttr.defaultSize && !sizeOverride)) {
486 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
487 warn_once(
"Expand down segments are untested.\n");
490 return std::make_shared<GeneralProtection>(0);
493 return std::make_shared<GeneralProtection>(0);
499 DPRINTF(GPUTLB,
"Paging enabled.\n");
511 fatal(
"GpuTLB doesn't support full-system mode\n");
513 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x "
526 pte =
p->pTable->lookup(
vaddr);
530 return std::make_shared<PageFault>(
vaddr,
true,
534 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
536 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
537 alignedVaddr, pte->
paddr);
539 TlbEntry gpuEntry(
p->pid(), alignedVaddr,
540 pte->
paddr,
false,
false);
541 entry =
insert(alignedVaddr, gpuEntry);
544 DPRINTF(GPUTLB,
"Miss was serviced.\n");
555 bool inUser = (m5Reg.cpl == 3 &&
559 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
566 return std::make_shared<PageFault>(
vaddr,
true,
mode,
570 if (storeCheck && badWrite) {
573 return std::make_shared<PageFault>(
vaddr,
true,
579 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection "
580 "checks.\n", entry->
paddr);
582 int page_size = entry->
size();
584 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
585 req->setPaddr(paddr);
591 DPRINTF(GPUTLB,
"Paging disabled.\n");
593 req->setPaddr(
vaddr);
597 DPRINTF(GPUTLB,
"In real mode.\n");
599 req->setPaddr(
vaddr);
604 LocalApicBase localApicBase =
608 Addr paddr = req->getPaddr();
610 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
625 bool delayedResponse;
635 bool delayedResponse;
639 delayedResponse,
true, latency);
641 if (!delayedResponse)
679 bool update_stats = !sender_state->
prefetch;
682 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
685 int req_cnt = sender_state->
reqCnt.back();
698 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
732 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
735 panic(
"Virtual Page Address %#x already has a return event\n",
742 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
750 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
751 outcome(tlb_outcome), pkt(_pkt)
764 uint32_t flags = pkt->
req->getFlags();
772 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
774 if ((inUser && !tlb_entry->
user) ||
779 panic(
"Page fault detected");
782 if (storeCheck && badWrite) {
785 panic(
"Page fault detected");
810 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
812 local_entry = sender_state->
tlbEntry;
814 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
824 local_entry = new_entry;
827 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
830 local_entry =
insert(virt_page_addr, *new_entry);
841 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
842 "while paddr was %#x.\n", local_entry->
vaddr,
846 int page_size = local_entry->
size();
848 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
856 pkt->
req->setPaddr(paddr);
885 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
893 int req_cnt = tmp_sender_state->
reqCnt.back();
894 bool update_stats = !tmp_sender_state->
prefetch;
907 DPRINTF(GPUTLB,
"This is a TLB miss\n");
928 DPRINTF(GPUTLB,
"Failed sending translation request to "
929 "lower level TLB for addr %#x\n", virtPageAddr);
933 DPRINTF(GPUTLB,
"Sent translation request to lower level "
934 "TLB for addr %#x\n", virtPageAddr);
938 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
939 "addr %#x\n", virtPageAddr);
955 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
964 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
965 assert(alignedVaddr == virtPageAddr);
970 pte =
p->pTable->lookup(
vaddr);
974 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
991 panic(
"Unexpected TLB outcome %d", outcome);
998 tlb->translationReturn(virtPageAddr, outcome, pkt);
1004 return "trigger translationDoneEvent";
1016 return virtPageAddr;
1028 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
1029 tlb->issueTLBLookup(pkt);
1031 tlb->outstandingReqs++;
1034 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1035 tlb->outstandingReqs);
1061 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
1064 local_entry = sender_state->
tlbEntry;
1066 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
1074 new_entry = sender_state->
tlbEntry;
1076 local_entry = new_entry;
1081 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1084 local_entry =
insert(virt_page_addr, *new_entry);
1087 assert(local_entry);
1090 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1091 "while paddr was %#x.\n", local_entry->
vaddr,
1092 local_entry->
paddr);
1108 int page_size = local_entry->
size();
1110 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
1112 pkt->
req->setPaddr(paddr);
1127 bool update_stats = !sender_state->
prefetch;
1133 tlb->updatePageFootprint(virt_page_addr);
1136 bool success =
tlb->tlbLookup(pkt->
req, tc, update_stats);
1142 tlb->stats.globalNumTLBAccesses++;
1146 tlb->stats.globalNumTLBHits++;
1152 tlb->stats.globalNumTLBMisses++;
1153 if (
tlb->hasMemSidePort) {
1155 tlb->memSidePort[0]->sendFunctional(pkt);
1157 if (sender_state->
prefetch && !pkt->
req->hasPaddr())
1161 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1168 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
1169 assert(alignedVaddr == virt_page_addr);
1173 p->pTable->lookup(
vaddr);
1176 pte =
p->pTable->lookup(
vaddr);
1184 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1189 pte->
paddr,
false,
false);
1196 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1201 pte->
paddr,
false,
false);
1203 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1213 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1214 tlb->lookup(pkt->
req->getVaddr()));
1230 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1238 panic(
"recvReqRetry called");
1261 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1264 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
1279 panic(
"recvReqRetry called");
1291 delete old_tlb_event;
1320 AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1322 bool first_page_access = ret.second;
1324 if (first_page_access) {
1327 int accessed_before;
1328 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1329 ret.first->second.totalReuseDistance += accessed_before;
1332 ret.first->second.accessesPerPage++;
1333 ret.first->second.lastTimeAccessed =
curTick();
1336 ret.first->second.localTLBAccesses
1344 std::ostream *page_stat_file =
nullptr;
1355 <<
"page,max_access_distance,mean_access_distance, "
1356 <<
"stddev_distance" << std::endl;
1360 unsigned int sum_avg_reuse_distance_per_page = 0;
1364 sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1365 iter.second.accessesPerPage;
1368 unsigned int tmp = iter.second.localTLBAccesses[0];
1369 unsigned int prev = tmp;
1371 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1376 prev = iter.second.localTLBAccesses[
i];
1379 iter.second.localTLBAccesses[
i] -= tmp;
1382 iter.second.sumDistance +=
1383 iter.second.localTLBAccesses[
i];
1386 iter.second.meanDistance =
1387 iter.second.sumDistance / iter.second.accessesPerPage;
1391 unsigned int max_distance = 0;
1392 unsigned int stddev_distance = 0;
1394 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1395 unsigned int tmp_access_distance =
1396 iter.second.localTLBAccesses[
i];
1398 if (tmp_access_distance > max_distance) {
1399 max_distance = tmp_access_distance;
1403 tmp_access_distance - iter.second.meanDistance;
1404 stddev_distance += pow(diff, 2);
1409 sqrt(stddev_distance/iter.second.accessesPerPage);
1411 if (page_stat_file) {
1412 *page_stat_file << std::hex << iter.first <<
",";
1413 *page_stat_file << std::dec << max_distance <<
",";
1414 *page_stat_file << std::dec << iter.second.meanDistance
1416 *page_stat_file << std::dec << stddev_distance;
1417 *page_stat_file << std::endl;
1421 iter.second.localTLBAccesses.clear();
1436 ADD_STAT(localNumTLBAccesses,
"Number of TLB accesses"),
1437 ADD_STAT(localNumTLBHits,
"Number of TLB hits"),
1438 ADD_STAT(localNumTLBMisses,
"Number of TLB misses"),
1439 ADD_STAT(localTLBMissRate,
"TLB miss rate"),
1440 ADD_STAT(globalNumTLBAccesses,
"Number of TLB accesses"),
1441 ADD_STAT(globalNumTLBHits,
"Number of TLB hits"),
1442 ADD_STAT(globalNumTLBMisses,
"Number of TLB misses"),
1443 ADD_STAT(globalTLBMissRate,
"TLB miss rate"),
1444 ADD_STAT(accessCycles,
"Cycles spent accessing this TLB level"),
1445 ADD_STAT(pageTableCycles,
"Cycles spent accessing the page table"),
1446 ADD_STAT(numUniquePages,
"Number of unique pages touched"),
1447 ADD_STAT(localCycles,
"Number of cycles spent in queue for all "
1449 ADD_STAT(localLatency,
"Avg. latency over incoming coalesced reqs"),
1450 ADD_STAT(avgReuseDistance,
"avg. reuse distance over all pages (in "
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
#define fatal(...)
This implements a cprintf based fatal() function.
std::vector< CpuSidePort * > cpuSidePort
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Stats::Formula localTLBMissRate
bool scheduled() const
Determine if the current event is scheduled.
std::queue< Addr > cleanupQueue
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Stats::Scalar avgReuseDistance
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
void makeTimingResponse()
const Addr PhysAddrPrefixPciConfig
Stats::Formula localLatency
void demapPage(Addr va, uint64_t asn)
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
This hash map will use the virtual page address as a key and will keep track of total number of acces...
const Addr PhysAddrPrefixIO
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
EventFunctionWrapper cleanupEvent
std::shared_ptr< Request > RequestPtr
RequestPtr req
A pointer to the original request.
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
const M5_VAR_USED Request::FlagsType SegmentFlagMask
bool accessDistance
Print out accessDistance stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
virtual Process * getProcessPtr()=0
Stats::Scalar pageTableCycles
bool msrAddrToIndex(MiscRegIndex ®Num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
const Addr IntAddrPrefixIO
const Addr IntAddrPrefixMask
unsigned int accessesPerPage
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TlbEntry * insert(Addr vpn, TlbEntry &entry)
static MiscRegIndex MISCREG_SEG_SEL(int index)
Stats::Scalar localCycles
Stats::Scalar numUniquePages
X86ISA::GpuTLB::GpuTLBStats stats
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
const char * description() const
Return a C string describing the event.
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Bitfield< 14 > expandDown
Stats::Scalar localNumTLBAccesses
void schedule(Event &event, Tick when)
std::vector< MemSidePort * > memSidePort
Tick cyclesToTicks(Cycles c) const
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Counter value() const
Return the current value of this stat as its base type.
virtual void recvReqRetry()
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
std::vector< int > reqCnt
void cprintf(const char *format, const Args &...args)
std::ostream * stream() const
Get the output underlying output stream.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
bool FA
true if this is a fully-associative TLB
std::shared_ptr< FaultBase > Fault
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Ports are used to interface objects to each other.
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
unsigned int totalReuseDistance
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
unsigned int lastTimeAccessed
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
virtual ContextID contextId() const =0
static const Priority Maximum_Pri
Maximum priority.
static MiscRegIndex MISCREG_SEG_ATTR(int index)
constexpr T mbits(T val, unsigned first, unsigned last)
Mask off the given bits in place like bits() but without shifting.
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
constexpr decltype(nullptr) NoFault
@ MISCREG_PCI_CONFIG_ADDRESS
void setConfigAddress(uint32_t addr)
This is exposed globally, independent of the ISA.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
const std::string & name()
Stats::Scalar accessCycles
unsigned int meanDistance
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
AccessPatternTable TLBFootprint
Stats::Scalar globalNumTLBMisses
const Addr IntAddrPrefixMSR
Stats::Scalar localNumTLBMisses
GpuTLBStats(Stats::Group *parent)
virtual const std::string name() const
bool hasMemSidePort
if true, then this is not the last level TLB
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
@ UNCACHEABLE
The request is to an uncacheable address.
virtual RegVal readMiscReg(RegIndex misc_reg)=0
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
std::vector< EntryList > freeList
Cycles is a wrapper class for representing cycle counts, i.e.
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
BitfieldType< SegDescriptorLimit > limit
std::ostream CheckpointOut
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
void updatePageFootprint(Addr virt_page_addr)
static MiscRegIndex MISCREG_SEG_BASE(int index)
Tick curTick()
The universal simulation clock.
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
SenderState * senderState
This packet's sender state.
Stats::Scalar globalNumTLBHits
Stats::Scalar globalNumTLBAccesses
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
virtual Addr instAddr() const =0
std::string csprintf(const char *format, const Args &...args)
void updateOutcome(tlbOutcome _outcome)
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
TlbEntry * lookup(Addr va, bool update_lru=true)
Stats::Scalar localNumTLBHits
std::vector< TlbEntry > tlb
void invalidateNonGlobal()
#define panic(...)
This implements a cprintf based panic() function.
const Addr IntAddrPrefixCPUID
Stats::Formula globalTLBMissRate
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17