54 #include "debug/GPUPrefetch.hh" 55 #include "debug/GPUTLB.hh" 77 clock =
p->clk_domain->clockPeriod();
84 for (
int set = 0;
set <
numSets; ++
set) {
85 for (
int way = 0; way <
assoc; ++way) {
86 int x =
set * assoc + way;
108 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
117 for (
size_t i = 0;
i <
p->port_slave_connection_count; ++
i) {
123 for (
size_t i = 0;
i <
p->port_master_connection_count; ++
i) {
139 if (if_name ==
"slave") {
140 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
141 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
145 }
else if (if_name ==
"master") {
146 if (idx >= static_cast<PortID>(
memSidePort.size())) {
147 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
154 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
178 newEntry->
vaddr = vpn;
184 GpuTLB::EntryList::iterator
194 for (; entry !=
entryList[
set].end(); ++entry) {
195 int page_size = (*entry)->size();
197 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
198 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x " 199 "with size %#x.\n", va, (*entry)->vaddr, page_size);
219 auto entry =
lookupIt(va, update_lru);
230 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
250 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
255 if (!(*entryIt)->global) {
290 pkt->
setData((uint8_t *)&data);
303 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
308 panic(
"CPUID memory space not yet implemented!\n");
314 return std::make_shared<GeneralProtection>(0);
316 req->setLocalAccessor(
319 return localMiscRegAccess(read, regNum, tc, pkt);
331 assert(!(IOPort & ~0xFFFF));
332 if (IOPort == 0xCF8 && req->getSize() == 4) {
333 req->setLocalAccessor(
336 return localMiscRegAccess(
340 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
344 if (
bits(configAddress, 31, 31)) {
346 mbits(configAddress, 30, 2) |
357 panic(
"Access to unrecognized internal address space %#x.\n",
373 bool tlb_hit =
false;
375 uint32_t flags = req->getFlags();
381 DPRINTF(GPUTLB,
"TLB Lookup for vaddr %#x.\n", vaddr);
385 DPRINTF(GPUTLB,
"In protected mode.\n");
387 assert(m5Reg.mode == LongMode);
391 DPRINTF(GPUTLB,
"Paging enabled.\n");
421 bool &delayedResponse,
bool timing,
int &latency)
423 uint32_t flags = req->getFlags();
433 delayedResponse =
false;
435 DPRINTF(GPUTLB,
"Translating vaddr %#x.\n", vaddr);
441 DPRINTF(GPUTLB,
"In protected mode.\n");
443 if (m5Reg.mode != LongMode) {
444 DPRINTF(GPUTLB,
"Not in long mode. Checking segment " 451 return std::make_shared<GeneralProtection>(0);
460 return std::make_shared<GeneralProtection>(0);
463 return std::make_shared<GeneralProtection>(0);
465 expandDown = attr.expandDown;
477 if ((csAttr.defaultSize && sizeOverride) ||
478 (!csAttr.defaultSize && !sizeOverride)) {
483 Addr endOffset = offset + req->getSize() - 1;
486 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
487 warn_once(
"Expand down segments are untested.\n");
489 if (offset <= limit || endOffset <= limit)
490 return std::make_shared<GeneralProtection>(0);
492 if (offset > limit || endOffset > limit)
493 return std::make_shared<GeneralProtection>(0);
499 DPRINTF(GPUTLB,
"Paging enabled.\n");
511 fatal(
"GpuTLB doesn't support full-system mode\n");
513 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x " 514 "at pc %#x.\n", vaddr, tc->
instAddr());
518 p->pTable->lookup(vaddr);
525 if (p->fixupFault(vaddr))
526 pte = p->pTable->lookup(vaddr);
530 return std::make_shared<PageFault>(
vaddr,
true,
534 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
536 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
537 alignedVaddr, pte->
paddr);
539 TlbEntry gpuEntry(p->pid(), alignedVaddr,
540 pte->
paddr,
false,
false);
541 entry =
insert(alignedVaddr, gpuEntry);
544 DPRINTF(GPUTLB,
"Miss was serviced.\n");
555 bool inUser = (m5Reg.cpl == 3 &&
559 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
566 return std::make_shared<PageFault>(
vaddr,
true,
mode,
570 if (storeCheck && badWrite) {
573 return std::make_shared<PageFault>(
vaddr,
true,
579 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection " 580 "checks.\n", entry->
paddr);
582 int page_size = entry->
size();
583 Addr paddr = entry->
paddr | (vaddr & (page_size - 1));
584 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
585 req->setPaddr(paddr);
591 DPRINTF(GPUTLB,
"Paging disabled.\n");
592 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
593 req->setPaddr(vaddr);
597 DPRINTF(GPUTLB,
"In real mode.\n");
598 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
599 req->setPaddr(vaddr);
604 LocalApicBase localApicBase =
608 Addr paddr = req->getPaddr();
610 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
625 bool delayedResponse;
635 bool delayedResponse;
639 delayedResponse,
true, latency);
641 if (!delayedResponse)
642 translation->
finish(fault, req, tc, mode);
668 .
name(
name() +
".local_TLB_accesses")
669 .
desc(
"Number of TLB accesses")
674 .
desc(
"Number of TLB hits")
679 .
desc(
"Number of TLB misses")
683 .
name(
name() +
".local_TLB_miss_rate")
684 .
desc(
"TLB miss rate")
689 .
desc(
"Cycles spent accessing this TLB level")
693 .
name(
name() +
".page_table_cycles")
694 .
desc(
"Cycles spent accessing the page table")
701 .
desc(
"Number of unique pages touched")
706 .
desc(
"Number of cycles spent in queue for all incoming reqs")
711 .
desc(
"Avg. latency over incoming coalesced reqs")
717 .
name(
name() +
".global_TLB_accesses")
718 .
desc(
"Number of TLB accesses")
723 .
desc(
"Number of TLB hits")
727 .
name(
name() +
".global_TLB_misses")
728 .
desc(
"Number of TLB misses")
732 .
name(
name() +
".global_TLB_miss_rate")
733 .
desc(
"TLB miss rate")
739 .
name(
name() +
".avg_reuse_distance")
740 .
desc(
"avg. reuse distance over all pages (in ticks)")
762 bool update_stats = !sender_state->
prefetch;
765 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
768 int req_cnt = sender_state->reqCnt.back();
781 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
789 auto p = sender_state->tc->getProcessPtr();
790 sender_state->tlbEntry =
797 sender_state->hitLevel = sender_state->reqCnt.
size();
815 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
818 panic(
"Virtual Page Address %#x already has a return event\n",
825 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
833 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
834 outcome(tlb_outcome), pkt(_pkt)
847 uint32_t
flags = pkt->
req->getFlags();
854 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
856 if ((inUser && !tlb_entry->
user) ||
861 panic(
"Page fault detected");
864 if (storeCheck && badWrite) {
867 panic(
"Page fault detected");
893 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n", vaddr);
894 local_entry = sender_state->
tlbEntry;
896 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
904 local_entry = new_entry;
907 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
910 local_entry =
insert(virt_page_addr, *new_entry);
921 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks " 922 "while paddr was %#x.\n", local_entry->
vaddr,
926 int page_size = local_entry->
size();
927 Addr paddr = local_entry->
paddr | (vaddr & (page_size - 1));
928 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
936 pkt->
req->setPaddr(paddr);
965 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
973 int req_cnt = tmp_sender_state->
reqCnt.back();
974 bool update_stats = !tmp_sender_state->
prefetch;
987 DPRINTF(GPUTLB,
"This is a TLB miss\n");
1008 DPRINTF(GPUTLB,
"Failed sending translation request to " 1009 "lower level TLB for addr %#x\n", virtPageAddr);
1013 DPRINTF(GPUTLB,
"Sent translation request to lower level " 1014 "TLB for addr %#x\n", virtPageAddr);
1018 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for " 1019 "addr %#x\n", virtPageAddr);
1034 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1043 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1044 assert(alignedVaddr == virtPageAddr);
1048 p->fixupFault(vaddr)) {
1049 pte = p->pTable->lookup(vaddr);
1053 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1070 panic(
"Unexpected TLB outcome %d", outcome);
1083 return "trigger translationDoneEvent";
1113 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1140 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr " 1143 local_entry = sender_state->
tlbEntry;
1145 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr " 1151 new_entry = sender_state->
tlbEntry;
1153 local_entry = new_entry;
1158 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1161 local_entry =
insert(virt_page_addr, *new_entry);
1164 assert(local_entry);
1167 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks " 1168 "while paddr was %#x.\n", local_entry->
vaddr,
1169 local_entry->
paddr);
1185 int page_size = local_entry->
size();
1186 Addr paddr = local_entry->
paddr | (vaddr & (page_size - 1));
1187 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
1189 pkt->
req->setPaddr(paddr);
1204 bool update_stats = !sender_state->
prefetch;
1234 if (sender_state->
prefetch && !pkt->
req->hasPaddr())
1238 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1246 assert(alignedVaddr == virt_page_addr);
1261 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1266 pte->
paddr,
false,
false);
1273 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1278 pte->
paddr,
false,
false);
1280 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1290 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1315 panic(
"recvReqRetry called");
1338 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1356 panic(
"recvReqRetry called");
1368 delete old_tlb_event;
1396 ret =
TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1399 bool first_page_access = ret.second;
1401 if (first_page_access) {
1404 int accessed_before;
1405 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1406 ret.first->second.totalReuseDistance += accessed_before;
1409 ret.first->second.accessesPerPage++;
1410 ret.first->second.lastTimeAccessed =
curTick();
1413 ret.first->second.localTLBAccesses
1421 std::ostream *page_stat_file =
nullptr;
1431 *page_stat_file <<
"page,max_access_distance,mean_access_distance, " 1432 <<
"stddev_distance" << std::endl;
1436 AccessPatternTable::iterator iter, iter_begin, iter_end;
1437 unsigned int sum_avg_reuse_distance_per_page = 0;
1441 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1442 iter->second.accessesPerPage;
1445 unsigned int tmp = iter->second.localTLBAccesses[0];
1446 unsigned int prev = tmp;
1448 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1453 prev = iter->second.localTLBAccesses[
i];
1456 iter->second.localTLBAccesses[
i] -= tmp;
1459 iter->second.sumDistance +=
1460 iter->second.localTLBAccesses[
i];
1463 iter->second.meanDistance =
1464 iter->second.sumDistance / iter->second.accessesPerPage;
1468 unsigned int max_distance = 0;
1469 unsigned int stddev_distance = 0;
1471 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1472 unsigned int tmp_access_distance =
1473 iter->second.localTLBAccesses[
i];
1475 if (tmp_access_distance > max_distance) {
1476 max_distance = tmp_access_distance;
1480 tmp_access_distance - iter->second.meanDistance;
1481 stddev_distance += pow(diff, 2);
1486 sqrt(stddev_distance/iter->second.accessesPerPage);
1488 if (page_stat_file) {
1489 *page_stat_file << std::hex << iter->first <<
",";
1490 *page_stat_file << std::dec << max_distance <<
",";
1491 *page_stat_file << std::dec << iter->second.meanDistance
1493 *page_stat_file << std::dec << stddev_distance;
1494 *page_stat_file << std::endl;
1498 iter->second.localTLBAccesses.clear();
1513 X86GPUTLBParams::create()
#define panic(...)
This implements a cprintf based panic() function.
AccessPatternTable TLBFootprint
unsigned int accessesPerPage
static const Priority Maximum_Pri
Maximum priority.
const Addr PhysAddrPrefixPciConfig
Stats::Formula globalTLBMissRate
Ports are used to interface objects to each other.
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Stats::Scalar localCycles
decltype(nullptr) constexpr NoFault
Cycles is a wrapper class for representing cycle counts, i.e.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Stats::Scalar avgReuseDistance
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
void makeTimingResponse()
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Stats::Scalar accessCycles
Stats::Formula localTLBMissRate
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
std::shared_ptr< Request > RequestPtr
unsigned int meanDistance
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
std::vector< TlbEntry > tlb
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
virtual Process * getProcessPtr()=0
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
bool hasMemSidePort
if true, then this is not the last level TLB
unsigned int totalReuseDistance
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
const Addr IntAddrPrefixCPUID
unsigned int lastTimeAccessed
Bitfield< 14 > expandDown
Stats::Scalar localNumTLBMisses
ThreadContext is the external interface to all thread state for anything outside of the CPU...
The request is to an uncacheable address.
TlbEntry * insert(Addr vpn, TlbEntry &entry)
std::ostream * stream() const
Get the output underlying output stream.
const Addr IntAddrPrefixMask
RequestPtr req
A pointer to the original request.
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Stats::Scalar numUniquePages
Stats::Scalar globalNumTLBMisses
std::queue< Addr > cleanupQueue
std::vector< MemSidePort * > memSidePort
bool accessDistance
Print out accessDistance stats.
Tick curTick()
The current simulated tick.
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
virtual const std::string name() const
std::string csprintf(const char *format, const Args &...args)
EventFunctionWrapper exitEvent
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Stats::Formula localLatency
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions...
void invalidateNonGlobal()
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
void setConfigAddress(uint32_t addr)
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
EventFunctionWrapper cleanupEvent
This hash map will use the virtual page address as a key and will keep track of total number of acces...
void demapPage(Addr va, uint64_t asn)
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
void schedule(Event &event, Tick when)
const Addr IntAddrPrefixMSR
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
std::vector< CpuSidePort * > cpuSidePort
virtual Addr instAddr() const =0
TlbEntry * lookup(Addr va, bool update_lru=true)
Stats::Scalar globalNumTLBAccesses
void updateOutcome(tlbOutcome _outcome)
static MiscRegIndex MISCREG_SEG_SEL(int index)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::vector< EntryList > freeList
const Request::FlagsType M5_VAR_USED SegmentFlagMask
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
const Addr IntAddrPrefixIO
bool msrAddrToIndex(MiscRegIndex ®Num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
void regStats() override
Callback to set stat parameters.
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
bool scheduled() const
Determine if the current event is scheduled.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
virtual const std::string name() const
BitfieldType< SegDescriptorLimit > limit
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
EmulationPageTable * pTable
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
Declarations of a non-full system Page Table.
bool fixupFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
static MiscRegIndex MISCREG_SEG_BASE(int index)
std::ostream CheckpointOut
This is exposed globally, independent of the ISA.
const char * description() const
Return a C string describing the event.
SenderState * senderState
This packet's sender state.
virtual ContextID contextId() const =0
const Entry * lookup(Addr vaddr)
Lookup function.
Stats::Scalar pageTableCycles
Tick ticks(int numCycles) const
Stats::Scalar localNumTLBHits
const Addr PhysAddrPrefixIO
The request is required to be strictly ordered by CPU models and is non-speculative.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void regStats()
Callback to set stat parameters.
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
bool FA
true if this is a fully-associative TLB
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
virtual RegVal readMiscReg(RegIndex misc_reg)=0
Stats::Scalar localNumTLBAccesses
virtual void recvReqRetry()
Stats::Scalar globalNumTLBHits
std::shared_ptr< FaultBase > Fault
Counter value() const
Return the current value of this stat as its base type.
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
std::vector< int > reqCnt
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
void cprintf(const char *format, const Args &...args)
void updatePageFootprint(Addr virt_page_addr)