54#include "debug/GPUPrefetch.hh"
55#include "debug/GPUTLB.hh"
71 exitEvent([
this]{ exitCallback(); },
name()), stats(
this)
86 for (
int way = 0; way <
assoc; ++way) {
109 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
118 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
124 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
140 if (if_name ==
"cpu_side_ports") {
142 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
146 }
else if (if_name ==
"mem_side_ports") {
148 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
155 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
179 newEntry->
vaddr = vpn;
185 GpuTLB::EntryList::iterator
196 int page_size = (*entry)->size();
198 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
199 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
200 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
231 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
251 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
256 if (!(*entryIt)->global) {
285 localMiscRegAccess(
bool read,
RegIndex regNum,
304 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
309 panic(
"CPUID memory space not yet implemented!\n");
315 return std::make_shared<GeneralProtection>(0);
317 req->setLocalAccessor(
320 return localMiscRegAccess(read, regNum, tc, pkt);
329 Addr IOPort =
vaddr & ~IntAddrPrefixMask;
332 assert(!(IOPort & ~0xFFFF));
333 if (IOPort == 0xCF8 && req->getSize() == 4) {
334 req->setLocalAccessor(
337 return localMiscRegAccess(
341 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
358 panic(
"Access to unrecognized internal address space %#x.\n",
374 bool tlb_hit =
false;
376 uint32_t
flags = req->getFlags();
386 DPRINTF(GPUTLB,
"In protected mode.\n");
388 assert(m5Reg.mode == LongMode);
392 DPRINTF(GPUTLB,
"Paging enabled.\n");
422 bool &delayedResponse,
bool timing,
int &latency)
424 uint32_t
flags = req->getFlags();
434 delayedResponse =
false;
442 DPRINTF(GPUTLB,
"In protected mode.\n");
444 if (m5Reg.mode != LongMode) {
445 DPRINTF(GPUTLB,
"Not in long mode. Checking segment "
452 return std::make_shared<GeneralProtection>(0);
461 return std::make_shared<GeneralProtection>(0);
464 return std::make_shared<GeneralProtection>(0);
473 int size = 8 << logSize;
479 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
480 warn_once(
"Expand down segments are untested.\n");
483 return std::make_shared<GeneralProtection>(0);
486 return std::make_shared<GeneralProtection>(0);
492 DPRINTF(GPUTLB,
"Paging enabled.\n");
504 fatal(
"GpuTLB doesn't support full-system mode\n");
506 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x "
507 "at pc %#x.\n",
vaddr,
520 pte =
p->pTable->lookup(
vaddr);
524 return std::make_shared<PageFault>(
vaddr,
true,
528 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
530 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
531 alignedVaddr, pte->
paddr);
533 TlbEntry gpuEntry(
p->pid(), alignedVaddr,
534 pte->
paddr,
false,
false);
535 entry =
insert(alignedVaddr, gpuEntry);
538 DPRINTF(GPUTLB,
"Miss was serviced.\n");
552 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
559 return std::make_shared<PageFault>(
vaddr,
true,
mode,
563 if (storeCheck && badWrite) {
566 return std::make_shared<PageFault>(
vaddr,
true,
572 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection "
573 "checks.\n", entry->
paddr);
575 int page_size = entry->
size();
577 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
578 req->setPaddr(paddr);
584 DPRINTF(GPUTLB,
"Paging disabled.\n");
586 req->setPaddr(
vaddr);
590 DPRINTF(GPUTLB,
"In real mode.\n");
592 req->setPaddr(
vaddr);
597 LocalApicBase localApicBase =
601 Addr paddr = req->getPaddr();
603 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
618 bool delayedResponse;
628 bool delayedResponse;
632 delayedResponse,
true, latency);
634 if (!delayedResponse)
672 bool update_stats = !sender_state->
isPrefetch;
675 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
678 int req_cnt = sender_state->
reqCnt.back();
691 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
725 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
728 panic(
"Virtual Page Address %#x already has a return event\n",
735 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
743 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
744 outcome(tlb_outcome), pkt(_pkt)
757 uint32_t
flags = pkt->
req->getFlags();
764 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
766 if ((inUser && !tlb_entry->
user) ||
771 panic(
"Page fault detected");
774 if (storeCheck && badWrite) {
777 panic(
"Page fault detected");
802 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
806 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
816 local_entry = new_entry;
819 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
822 local_entry =
insert(virt_page_addr, *new_entry);
833 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
834 "while paddr was %#x.\n", local_entry->
vaddr,
838 int page_size = local_entry->
size();
840 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
848 pkt->
req->setPaddr(paddr);
877 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
885 int req_cnt = tmp_sender_state->
reqCnt.back();
886 bool update_stats = !tmp_sender_state->
isPrefetch;
899 DPRINTF(GPUTLB,
"This is a TLB miss\n");
920 DPRINTF(GPUTLB,
"Failed sending translation request to "
921 "lower level TLB for addr %#x\n", virtPageAddr);
925 DPRINTF(GPUTLB,
"Sent translation request to lower level "
926 "TLB for addr %#x\n", virtPageAddr);
930 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
931 "addr %#x\n", virtPageAddr);
947 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
956 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
957 assert(alignedVaddr == virtPageAddr);
962 pte =
p->pTable->lookup(
vaddr);
966 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
983 panic(
"Unexpected TLB outcome %d", outcome);
990 tlb->translationReturn(virtPageAddr, outcome, pkt);
996 return "trigger translationDoneEvent";
1008 return virtPageAddr;
1020 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
1021 tlb->issueTLBLookup(pkt);
1023 tlb->outstandingReqs++;
1026 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1027 tlb->outstandingReqs);
1053 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
1058 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
1068 local_entry = new_entry;
1073 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1076 local_entry =
insert(virt_page_addr, *new_entry);
1079 assert(local_entry);
1082 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1083 "while paddr was %#x.\n", local_entry->
vaddr,
1084 local_entry->
paddr);
1100 int page_size = local_entry->
size();
1102 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
1104 pkt->
req->setPaddr(paddr);
1119 bool update_stats = !sender_state->
isPrefetch;
1125 tlb->updatePageFootprint(virt_page_addr);
1128 bool success =
tlb->tlbLookup(pkt->
req, tc, update_stats);
1134 tlb->stats.globalNumTLBAccesses++;
1138 tlb->stats.globalNumTLBHits++;
1144 tlb->stats.globalNumTLBMisses++;
1145 if (
tlb->hasMemSidePort) {
1147 tlb->memSidePort[0]->sendFunctional(pkt);
1153 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1160 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
1161 assert(alignedVaddr == virt_page_addr);
1164 p->pTable->lookup(
vaddr);
1167 pte =
p->pTable->lookup(
vaddr);
1175 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1180 pte->
paddr,
false,
false);
1187 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1192 pte->
paddr,
false,
false);
1194 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1204 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1205 tlb->lookup(pkt->
req->getVaddr()));
1221 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1229 panic(
"recvReqRetry called");
1252 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1255 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
1270 panic(
"recvReqRetry called");
1282 delete old_tlb_event;
1311 AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1313 bool first_page_access = ret.second;
1315 if (first_page_access) {
1318 int accessed_before;
1319 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1320 ret.first->second.totalReuseDistance += accessed_before;
1323 ret.first->second.accessesPerPage++;
1324 ret.first->second.lastTimeAccessed =
curTick();
1327 ret.first->second.localTLBAccesses
1335 std::ostream *page_stat_file =
nullptr;
1346 <<
"page,max_access_distance,mean_access_distance, "
1347 <<
"stddev_distance" << std::endl;
1351 unsigned int sum_avg_reuse_distance_per_page = 0;
1355 sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1356 iter.second.accessesPerPage;
1359 unsigned int tmp = iter.second.localTLBAccesses[0];
1360 unsigned int prev = tmp;
1362 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1367 prev = iter.second.localTLBAccesses[
i];
1370 iter.second.localTLBAccesses[
i] -= tmp;
1373 iter.second.sumDistance +=
1374 iter.second.localTLBAccesses[
i];
1377 iter.second.meanDistance =
1378 iter.second.sumDistance / iter.second.accessesPerPage;
1382 unsigned int max_distance = 0;
1383 unsigned int stddev_distance = 0;
1385 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1386 unsigned int tmp_access_distance =
1387 iter.second.localTLBAccesses[
i];
1389 if (tmp_access_distance > max_distance) {
1390 max_distance = tmp_access_distance;
1394 tmp_access_distance - iter.second.meanDistance;
1395 stddev_distance += pow(diff, 2);
1400 sqrt(stddev_distance/iter.second.accessesPerPage);
1402 if (page_stat_file) {
1403 *page_stat_file << std::hex << iter.first <<
",";
1404 *page_stat_file << std::dec << max_distance <<
",";
1405 *page_stat_file << std::dec << iter.second.meanDistance
1407 *page_stat_file << std::dec << stddev_distance;
1408 *page_stat_file << std::endl;
1412 iter.second.localTLBAccesses.clear();
1426 : statistics::
Group(parent),
1427 ADD_STAT(localNumTLBAccesses,
"Number of TLB accesses"),
1428 ADD_STAT(localNumTLBHits,
"Number of TLB hits"),
1429 ADD_STAT(localNumTLBMisses,
"Number of TLB misses"),
1430 ADD_STAT(localTLBMissRate,
"TLB miss rate"),
1431 ADD_STAT(globalNumTLBAccesses,
"Number of TLB accesses"),
1432 ADD_STAT(globalNumTLBHits,
"Number of TLB hits"),
1433 ADD_STAT(globalNumTLBMisses,
"Number of TLB misses"),
1434 ADD_STAT(globalTLBMissRate,
"TLB miss rate"),
1435 ADD_STAT(accessCycles,
"Cycles spent accessing this TLB level"),
1436 ADD_STAT(pageTableCycles,
"Cycles spent accessing the page table"),
1437 ADD_STAT(numUniquePages,
"Number of unique pages touched"),
1438 ADD_STAT(localCycles,
"Number of cycles spent in queue for all "
1440 ADD_STAT(localLatency,
"Avg. latency over incoming coalesced reqs"),
1441 ADD_STAT(avgReuseDistance,
"avg. reuse distance over all pages (in "
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
std::ostream * stream() const
Get the output underlying output stream.
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
@ UNCACHEABLE
The request is to an uncacheable address.
@ READ_MODIFY_WRITE
This request is a read which will be followed by a write.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual RegVal readMiscReg(RegIndex misc_reg)=0
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
virtual const PCStateBase & pcState() const =0
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
virtual Process * getProcessPtr()=0
virtual ContextID contextId() const =0
virtual void recvReqRetry()
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
void updateOutcome(tlbOutcome _outcome)
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
const char * description() const
Return a C string describing the event.
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
void invalidateNonGlobal()
EventFunctionWrapper cleanupEvent
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
AccessPatternTable TLBFootprint
void demapPage(Addr va, uint64_t asn)
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
TlbEntry * lookup(Addr va, bool update_lru=true)
std::vector< TlbEntry > tlb
std::vector< MemSidePort * > memSidePort
bool accessDistance
Print out accessDistance stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::queue< Addr > cleanupQueue
bool FA
true if this is a fully-associative TLB
TlbEntry * insert(Addr vpn, TlbEntry &entry)
gem5::X86ISA::GpuTLB::GpuTLBStats stats
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
std::vector< EntryList > freeList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
void setConfigAddress(uint32_t addr)
bool hasMemSidePort
if true, then this is not the last level TLB
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
void updatePageFootprint(Addr virt_page_addr)
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
std::vector< CpuSidePort * > cpuSidePort
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Counter value() const
Return the current value of this stat as its base type.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T mbits(T val, unsigned first, unsigned last)
Mask off the given bits in place like bits() but without shifting.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
static RegIndex segSel(int index)
static RegIndex segAttr(int index)
static RegIndex segBase(int index)
static RegIndex segLimit(int index)
constexpr auto AddrSizeFlagMask
const Addr PhysAddrPrefixPciConfig
Bitfield< 14 > expandDown
const Addr IntAddrPrefixIO
constexpr auto AddrSizeFlagShift
constexpr Request::FlagsType SegmentFlagMask
BitfieldType< SegDescriptorLimit > limit
const Addr PhysAddrPrefixIO
constexpr auto CPL0FlagBit
const Addr IntAddrPrefixCPUID
const Addr IntAddrPrefixMSR
const Addr IntAddrPrefixMask
bool msrAddrToIndex(RegIndex ®_num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< FaultBase > Fault
T safe_cast(U &&ref_or_ptr)
std::shared_ptr< Request > RequestPtr
void cprintf(const char *format, const Args &...args)
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
std::string csprintf(const char *format, const Args &...args)
constexpr decltype(nullptr) NoFault
Declarations of a non-full system Page Table.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< int > reqCnt
This hash map will use the virtual page address as a key and will keep track of total number of acces...
unsigned int accessesPerPage
unsigned int totalReuseDistance
unsigned int lastTimeAccessed
unsigned int meanDistance
GpuTLBStats(statistics::Group *parent)
statistics::Formula localTLBMissRate
statistics::Scalar globalNumTLBMisses
statistics::Scalar localCycles
statistics::Formula globalTLBMissRate
statistics::Scalar localNumTLBHits
statistics::Scalar numUniquePages
statistics::Scalar avgReuseDistance
statistics::Scalar globalNumTLBHits
statistics::Formula localLatency
statistics::Scalar localNumTLBMisses
statistics::Scalar globalNumTLBAccesses
statistics::Scalar accessCycles
statistics::Scalar localNumTLBAccesses
statistics::Scalar pageTableCycles
const std::string & name()