53 #include "debug/GPUPrefetch.hh" 54 #include "debug/GPUTLB.hh" 75 clock =
p->clk_domain->clockPeriod();
82 for (
int set = 0;
set <
numSets; ++
set) {
83 for (
int way = 0; way <
assoc; ++way) {
84 int x =
set * assoc + way;
106 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
115 for (
size_t i = 0;
i <
p->port_slave_connection_count; ++
i) {
121 for (
size_t i = 0;
i <
p->port_master_connection_count; ++
i) {
137 if (if_name ==
"slave") {
138 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
139 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
143 }
else if (if_name ==
"master") {
144 if (idx >= static_cast<PortID>(
memSidePort.size())) {
145 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
152 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
176 newEntry->
vaddr = vpn;
182 GpuTLB::EntryList::iterator
192 for (; entry !=
entryList[
set].end(); ++entry) {
193 int page_size = (*entry)->size();
195 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
196 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x " 197 "with size %#x.\n", va, (*entry)->vaddr, page_size);
217 auto entry =
lookupIt(va, update_lru);
228 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
248 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
253 if (!(*entryIt)->global) {
279 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
284 panic(
"CPUID memory space not yet implemented!\n");
601 return std::make_shared<GeneralProtection>(0);
606 req->setPaddr(regNum *
sizeof(
RegVal));
615 assert(!(IOPort & ~0xFFFF));
617 if (IOPort == 0xCF8 && req->getSize() == 4) {
620 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
626 if (
bits(configAddress, 31, 31)) {
628 mbits(configAddress, 30, 2) |
639 panic(
"Access to unrecognized internal address space %#x.\n",
655 bool tlb_hit =
false;
657 uint32_t flags = req->getFlags();
663 DPRINTF(GPUTLB,
"TLB Lookup for vaddr %#x.\n", vaddr);
667 DPRINTF(GPUTLB,
"In protected mode.\n");
669 assert(m5Reg.mode == LongMode);
673 DPRINTF(GPUTLB,
"Paging enabled.\n");
703 bool &delayedResponse,
bool timing,
int &latency)
705 uint32_t flags = req->getFlags();
715 delayedResponse =
false;
717 DPRINTF(GPUTLB,
"Translating vaddr %#x.\n", vaddr);
723 DPRINTF(GPUTLB,
"In protected mode.\n");
725 if (m5Reg.mode != LongMode) {
726 DPRINTF(GPUTLB,
"Not in long mode. Checking segment " 733 return std::make_shared<GeneralProtection>(0);
742 return std::make_shared<GeneralProtection>(0);
745 return std::make_shared<GeneralProtection>(0);
747 expandDown = attr.expandDown;
759 if ((csAttr.defaultSize && sizeOverride) ||
760 (!csAttr.defaultSize && !sizeOverride)) {
765 Addr endOffset = offset + req->getSize() - 1;
768 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
769 warn_once(
"Expand down segments are untested.\n");
771 if (offset <= limit || endOffset <= limit)
772 return std::make_shared<GeneralProtection>(0);
774 if (offset > limit || endOffset > limit)
775 return std::make_shared<GeneralProtection>(0);
781 DPRINTF(GPUTLB,
"Paging enabled.\n");
793 fatal(
"GpuTLB doesn't support full-system mode\n");
795 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x " 796 "at pc %#x.\n", vaddr, tc->
instAddr());
800 p->pTable->lookup(vaddr);
807 if (p->fixupStackFault(vaddr))
808 pte = p->pTable->lookup(vaddr);
812 return std::make_shared<PageFault>(
vaddr,
true,
816 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
818 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
819 alignedVaddr, pte->
paddr);
821 TlbEntry gpuEntry(p->pid(), alignedVaddr,
822 pte->
paddr,
false,
false);
823 entry =
insert(alignedVaddr, gpuEntry);
826 DPRINTF(GPUTLB,
"Miss was serviced.\n");
837 bool inUser = (m5Reg.cpl == 3 &&
841 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
848 return std::make_shared<PageFault>(
vaddr,
true,
mode,
852 if (storeCheck && badWrite) {
855 return std::make_shared<PageFault>(
vaddr,
true,
861 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection " 862 "checks.\n", entry->
paddr);
864 int page_size = entry->
size();
865 Addr paddr = entry->
paddr | (vaddr & (page_size - 1));
866 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
867 req->setPaddr(paddr);
873 DPRINTF(GPUTLB,
"Paging disabled.\n");
874 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
875 req->setPaddr(vaddr);
879 DPRINTF(GPUTLB,
"In real mode.\n");
880 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
881 req->setPaddr(vaddr);
886 LocalApicBase localApicBase =
890 Addr paddr = req->getPaddr();
892 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907 bool delayedResponse;
917 bool delayedResponse;
921 delayedResponse,
true, latency);
923 if (!delayedResponse)
924 translation->
finish(fault, req, tc, mode);
950 .
name(
name() +
".local_TLB_accesses")
951 .
desc(
"Number of TLB accesses")
956 .
desc(
"Number of TLB hits")
961 .
desc(
"Number of TLB misses")
965 .
name(
name() +
".local_TLB_miss_rate")
966 .
desc(
"TLB miss rate")
971 .
desc(
"Cycles spent accessing this TLB level")
975 .
name(
name() +
".page_table_cycles")
976 .
desc(
"Cycles spent accessing the page table")
983 .
desc(
"Number of unique pages touched")
988 .
desc(
"Number of cycles spent in queue for all incoming reqs")
993 .
desc(
"Avg. latency over incoming coalesced reqs")
999 .
name(
name() +
".global_TLB_accesses")
1000 .
desc(
"Number of TLB accesses")
1005 .
desc(
"Number of TLB hits")
1009 .
name(
name() +
".global_TLB_misses")
1010 .
desc(
"Number of TLB misses")
1014 .
name(
name() +
".global_TLB_miss_rate")
1015 .
desc(
"TLB miss rate")
1021 .
name(
name() +
".avg_reuse_distance")
1022 .
desc(
"avg. reuse distance over all pages (in ticks)")
1044 bool update_stats = !sender_state->
prefetch;
1047 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
1050 int req_cnt = sender_state->reqCnt.back();
1063 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
1071 auto p = sender_state->tc->getProcessPtr();
1072 sender_state->tlbEntry =
1079 sender_state->hitLevel = sender_state->reqCnt.
size();
1097 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
1100 panic(
"Virtual Page Address %#x already has a return event\n",
1107 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
1115 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
1116 outcome(tlb_outcome), pkt(_pkt)
1129 uint32_t
flags = pkt->
req->getFlags();
1136 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
1138 if ((inUser && !tlb_entry->
user) ||
1143 panic(
"Page fault detected");
1146 if (storeCheck && badWrite) {
1149 panic(
"Page fault detected");
1175 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n", vaddr);
1176 local_entry = sender_state->
tlbEntry;
1178 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
1184 new_entry = sender_state->
tlbEntry;
1186 local_entry = new_entry;
1189 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1192 local_entry =
insert(virt_page_addr, *new_entry);
1195 assert(local_entry);
1203 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks " 1204 "while paddr was %#x.\n", local_entry->
vaddr,
1205 local_entry->
paddr);
1208 int page_size = local_entry->
size();
1209 Addr paddr = local_entry->
paddr | (vaddr & (page_size - 1));
1210 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
1218 pkt->
req->setPaddr(paddr);
1247 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
1255 int req_cnt = tmp_sender_state->
reqCnt.back();
1256 bool update_stats = !tmp_sender_state->
prefetch;
1269 DPRINTF(GPUTLB,
"This is a TLB miss\n");
1290 DPRINTF(GPUTLB,
"Failed sending translation request to " 1291 "lower level TLB for addr %#x\n", virtPageAddr);
1295 DPRINTF(GPUTLB,
"Sent translation request to lower level " 1296 "TLB for addr %#x\n", virtPageAddr);
1300 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for " 1301 "addr %#x\n", virtPageAddr);
1316 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1325 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1326 assert(alignedVaddr == virtPageAddr);
1330 p->fixupStackFault(vaddr)) {
1331 pte = p->pTable->lookup(vaddr);
1335 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1352 panic(
"Unexpected TLB outcome %d", outcome);
1365 return "trigger translationDoneEvent";
1395 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1422 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr " 1425 local_entry = sender_state->
tlbEntry;
1427 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr " 1433 new_entry = sender_state->
tlbEntry;
1435 local_entry = new_entry;
1440 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1443 local_entry =
insert(virt_page_addr, *new_entry);
1446 assert(local_entry);
1449 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks " 1450 "while paddr was %#x.\n", local_entry->
vaddr,
1451 local_entry->
paddr);
1467 int page_size = local_entry->
size();
1468 Addr paddr = local_entry->
paddr | (vaddr & (page_size - 1));
1469 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
1471 pkt->
req->setPaddr(paddr);
1486 bool update_stats = !sender_state->
prefetch;
1516 if (sender_state->
prefetch && !pkt->
req->hasPaddr())
1520 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1528 assert(alignedVaddr == virt_page_addr);
1543 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1548 pte->
paddr,
false,
false);
1555 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1560 pte->
paddr,
false,
false);
1562 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1572 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1597 panic(
"recvReqRetry called");
1620 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1638 panic(
"recvReqRetry called");
1650 delete old_tlb_event;
1678 ret =
TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1681 bool first_page_access = ret.second;
1683 if (first_page_access) {
1686 int accessed_before;
1687 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1688 ret.first->second.totalReuseDistance += accessed_before;
1691 ret.first->second.accessesPerPage++;
1692 ret.first->second.lastTimeAccessed =
curTick();
1695 ret.first->second.localTLBAccesses
1703 std::ostream *page_stat_file =
nullptr;
1713 *page_stat_file <<
"page,max_access_distance,mean_access_distance, " 1714 <<
"stddev_distance" << std::endl;
1718 AccessPatternTable::iterator iter, iter_begin, iter_end;
1719 unsigned int sum_avg_reuse_distance_per_page = 0;
1723 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724 iter->second.accessesPerPage;
1727 unsigned int tmp = iter->second.localTLBAccesses[0];
1728 unsigned int prev = tmp;
1730 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1735 prev = iter->second.localTLBAccesses[
i];
1738 iter->second.localTLBAccesses[
i] -= tmp;
1741 iter->second.sumDistance +=
1742 iter->second.localTLBAccesses[
i];
1745 iter->second.meanDistance =
1746 iter->second.sumDistance / iter->second.accessesPerPage;
1750 unsigned int max_distance = 0;
1751 unsigned int stddev_distance = 0;
1753 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1754 unsigned int tmp_access_distance =
1755 iter->second.localTLBAccesses[
i];
1757 if (tmp_access_distance > max_distance) {
1758 max_distance = tmp_access_distance;
1762 tmp_access_distance - iter->second.meanDistance;
1763 stddev_distance += pow(diff, 2);
1768 sqrt(stddev_distance/iter->second.accessesPerPage);
1770 if (page_stat_file) {
1771 *page_stat_file << std::hex << iter->first <<
",";
1772 *page_stat_file << std::dec << max_distance <<
",";
1773 *page_stat_file << std::dec << iter->second.meanDistance
1775 *page_stat_file << std::dec << stddev_distance;
1776 *page_stat_file << std::endl;
1780 iter->second.localTLBAccesses.clear();
1795 X86GPUTLBParams::create()
#define panic(...)
This implements a cprintf based panic() function.
AccessPatternTable TLBFootprint
unsigned int accessesPerPage
const Addr PhysAddrPrefixPciConfig
Stats::Formula globalTLBMissRate
The request is to an uncacheable address.
Ports are used to interface objects to each other.
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Stats::Scalar localCycles
decltype(nullptr) constexpr NoFault
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Stats::Scalar avgReuseDistance
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
void makeTimingResponse()
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Stats::Scalar accessCycles
Stats::Formula localTLBMissRate
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
std::shared_ptr< Request > RequestPtr
unsigned int meanDistance
virtual const std::string name() const
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
std::vector< TlbEntry > tlb
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
virtual Process * getProcessPtr()=0
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
virtual void regStats()
Callback to set stat parameters.
bool hasMemSidePort
if true, then this is not the last level TLB
unsigned int totalReuseDistance
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
const Addr IntAddrPrefixCPUID
unsigned int lastTimeAccessed
Bitfield< 14 > expandDown
Fault translateInt(const RequestPtr &req, ThreadContext *tc)
Stats::Scalar localNumTLBMisses
ThreadContext is the external interface to all thread state for anything outside of the CPU...
TlbEntry * insert(Addr vpn, TlbEntry &entry)
std::ostream * stream() const
Get the output underlying output stream.
const Addr IntAddrPrefixMask
RequestPtr req
A pointer to the original request.
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Stats::Scalar numUniquePages
Stats::Scalar globalNumTLBMisses
std::queue< Addr > cleanupQueue
std::vector< MemSidePort * > memSidePort
bool accessDistance
Print out accessDistance stats.
Tick curTick()
The current simulated tick.
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
std::string csprintf(const char *format, const Args &...args)
EventFunctionWrapper exitEvent
bool scheduled() const
Determine if the current event is scheduled.
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Stats::Formula localLatency
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions...
void invalidateNonGlobal()
void setConfigAddress(uint32_t addr)
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
EventFunctionWrapper cleanupEvent
This hash map will use the virtual page address as a key and will keep track of total number of acces...
void demapPage(Addr va, uint64_t asn)
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
const Addr IntAddrPrefixMSR
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
This request is to a memory mapped register.
std::vector< CpuSidePort * > cpuSidePort
virtual Addr instAddr() const =0
TlbEntry * lookup(Addr va, bool update_lru=true)
Stats::Scalar globalNumTLBAccesses
void updateOutcome(tlbOutcome _outcome)
static MiscRegIndex MISCREG_SEG_SEL(int index)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::vector< EntryList > freeList
virtual const std::string name() const
const Request::FlagsType M5_VAR_USED SegmentFlagMask
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
const Addr IntAddrPrefixIO
void regStats() override
Callback to set stat parameters.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
bool fixupStackFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
BitfieldType< SegDescriptorLimit > limit
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
EmulationPageTable * pTable
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
Declarations of a non-full system Page Table.
static MiscRegIndex MISCREG_SEG_BASE(int index)
std::ostream CheckpointOut
This is exposed globally, independent of the ISA.
const char * description() const
Return a C string describing the event.
SenderState * senderState
This packet's sender state.
virtual ContextID contextId() const =0
const Entry * lookup(Addr vaddr)
Lookup function.
void schedule(Event &event, Tick when)
Stats::Scalar pageTableCycles
Tick ticks(int numCycles) const
Stats::Scalar localNumTLBHits
const Addr PhysAddrPrefixIO
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
bool FA
true if this is a fully-associative TLB
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Stats::Scalar localNumTLBAccesses
virtual void recvReqRetry()
Stats::Scalar globalNumTLBHits
std::shared_ptr< FaultBase > Fault
Counter value() const
Return the current value of this stat as its base type.
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
std::vector< int > reqCnt
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
static const Priority Maximum_Pri
Maximum priority.
void cprintf(const char *format, const Args &...args)
void updatePageFootprint(Addr virt_page_addr)