54#include "debug/GPUPrefetch.hh"
55#include "debug/GPUTLB.hh"
71 exitEvent([
this]{ exitCallback(); },
name()), stats(
this)
86 for (
int way = 0; way <
assoc; ++way) {
109 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
118 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
124 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
140 if (if_name ==
"cpu_side_ports") {
142 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
146 }
else if (if_name ==
"mem_side_ports") {
148 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
155 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
179 newEntry->
vaddr = vpn;
185 GpuTLB::EntryList::iterator
196 int page_size = (*entry)->size();
198 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
199 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
200 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
231 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
251 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
256 if (!(*entryIt)->global) {
285 localMiscRegAccess(
bool read,
RegIndex regNum,
304 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
309 panic(
"CPUID memory space not yet implemented!\n");
315 return std::make_shared<GeneralProtection>(0);
317 req->setLocalAccessor(
320 return localMiscRegAccess(read, regNum, tc, pkt);
329 Addr IOPort =
vaddr & ~IntAddrPrefixMask;
332 assert(!(IOPort & ~0xFFFF));
333 if (IOPort == 0xCF8 && req->getSize() == 4) {
334 req->setLocalAccessor(
337 return localMiscRegAccess(
341 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
358 panic(
"Access to unrecognized internal address space %#x.\n",
374 bool tlb_hit =
false;
376 uint32_t
flags = req->getFlags();
382 if (req->hasNoAddr()) {
390 DPRINTF(GPUTLB,
"In protected mode.\n");
392 assert(m5Reg.mode == LongMode);
396 DPRINTF(GPUTLB,
"Paging enabled.\n");
426 bool &delayedResponse,
bool timing,
int &latency)
428 uint32_t
flags = req->getFlags();
438 delayedResponse =
false;
446 DPRINTF(GPUTLB,
"In protected mode.\n");
448 if (m5Reg.mode != LongMode) {
449 DPRINTF(GPUTLB,
"Not in long mode. Checking segment "
456 return std::make_shared<GeneralProtection>(0);
465 return std::make_shared<GeneralProtection>(0);
468 return std::make_shared<GeneralProtection>(0);
477 int size = 8 << logSize;
483 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
484 warn_once(
"Expand down segments are untested.\n");
487 return std::make_shared<GeneralProtection>(0);
490 return std::make_shared<GeneralProtection>(0);
496 DPRINTF(GPUTLB,
"Paging enabled.\n");
508 fatal(
"GpuTLB doesn't support full-system mode\n");
510 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x "
511 "at pc %#x.\n",
vaddr,
524 pte =
p->pTable->lookup(
vaddr);
528 return std::make_shared<PageFault>(
vaddr,
true,
532 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
534 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
535 alignedVaddr, pte->
paddr);
537 TlbEntry gpuEntry(
p->pid(), alignedVaddr,
538 pte->
paddr,
false,
false);
539 entry =
insert(alignedVaddr, gpuEntry);
542 DPRINTF(GPUTLB,
"Miss was serviced.\n");
556 bool badWrite = (!entry->
writable && (inUser || cr0.wp));
563 return std::make_shared<PageFault>(
vaddr,
true,
mode,
567 if (storeCheck && badWrite) {
570 return std::make_shared<PageFault>(
vaddr,
true,
576 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection "
577 "checks.\n", entry->
paddr);
579 int page_size = entry->
size();
581 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
582 req->setPaddr(paddr);
588 DPRINTF(GPUTLB,
"Paging disabled.\n");
590 req->setPaddr(
vaddr);
594 DPRINTF(GPUTLB,
"In real mode.\n");
596 req->setPaddr(
vaddr);
601 LocalApicBase localApicBase =
605 Addr paddr = req->getPaddr();
607 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
622 bool delayedResponse;
632 bool delayedResponse;
636 delayedResponse,
true, latency);
638 if (!delayedResponse)
676 bool update_stats = !sender_state->
isPrefetch;
679 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
682 int req_cnt = sender_state->
reqCnt.back();
695 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
701 if (pkt->
req->hasNoAddr()) {
735 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
738 panic(
"Virtual Page Address %#x already has a return event\n",
745 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
753 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
754 outcome(tlb_outcome), pkt(_pkt)
767 uint32_t
flags = pkt->
req->getFlags();
774 bool badWrite = (!tlb_entry->
writable && (inUser || cr0.wp));
776 if ((inUser && !tlb_entry->
user) ||
781 panic(
"Page fault detected");
784 if (storeCheck && badWrite) {
787 panic(
"Page fault detected");
812 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
814 local_entry = safe_cast<TlbEntry *>(sender_state->
tlbEntry);
816 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
824 new_entry = safe_cast<TlbEntry *>(sender_state->
tlbEntry);
826 local_entry = new_entry;
829 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
832 local_entry =
insert(virt_page_addr, *new_entry);
843 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
844 "while paddr was %#x.\n", local_entry->
vaddr,
848 int page_size = local_entry->
size();
850 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
858 pkt->
req->setPaddr(paddr);
887 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
895 int req_cnt = tmp_sender_state->
reqCnt.back();
896 bool update_stats = !tmp_sender_state->
isPrefetch;
909 DPRINTF(GPUTLB,
"This is a TLB miss\n");
930 DPRINTF(GPUTLB,
"Failed sending translation request to "
931 "lower level TLB for addr %#x\n", virtPageAddr);
935 DPRINTF(GPUTLB,
"Sent translation request to lower level "
936 "TLB for addr %#x\n", virtPageAddr);
940 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
941 "addr %#x\n", virtPageAddr);
957 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
966 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
967 assert(alignedVaddr == virtPageAddr);
972 pte =
p->pTable->lookup(
vaddr);
976 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
993 panic(
"Unexpected TLB outcome %d", outcome);
1000 tlb->translationReturn(virtPageAddr, outcome, pkt);
1006 return "trigger translationDoneEvent";
1018 return virtPageAddr;
1030 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
1031 tlb->issueTLBLookup(pkt);
1033 tlb->outstandingReqs++;
1036 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1037 tlb->outstandingReqs);
1054 safe_cast<GpuTranslationState*>(pkt->
senderState);
1063 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
1066 local_entry = safe_cast<TlbEntry *>(sender_state->
tlbEntry);
1068 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
1076 new_entry = safe_cast<TlbEntry *>(sender_state->
tlbEntry);
1078 local_entry = new_entry;
1083 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1086 local_entry =
insert(virt_page_addr, *new_entry);
1089 assert(local_entry);
1092 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1093 "while paddr was %#x.\n", local_entry->
vaddr,
1094 local_entry->
paddr);
1110 int page_size = local_entry->
size();
1112 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
1114 pkt->
req->setPaddr(paddr);
1126 safe_cast<GpuTranslationState*>(pkt->
senderState);
1129 bool update_stats = !sender_state->
isPrefetch;
1135 tlb->updatePageFootprint(virt_page_addr);
1138 bool success =
tlb->tlbLookup(pkt->
req, tc, update_stats);
1144 tlb->stats.globalNumTLBAccesses++;
1148 tlb->stats.globalNumTLBHits++;
1154 tlb->stats.globalNumTLBMisses++;
1155 if (
tlb->hasMemSidePort) {
1157 tlb->memSidePort[0]->sendFunctional(pkt);
1163 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1170 Addr alignedVaddr =
p->pTable->pageAlign(
vaddr);
1171 assert(alignedVaddr == virt_page_addr);
1174 p->pTable->lookup(
vaddr);
1177 pte =
p->pTable->lookup(
vaddr);
1185 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1190 pte->
paddr,
false,
false);
1197 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1202 pte->
paddr,
false,
false);
1204 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1214 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1215 tlb->lookup(pkt->
req->getVaddr()));
1231 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1239 panic(
"recvReqRetry called");
1262 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1265 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
1280 panic(
"recvReqRetry called");
1292 delete old_tlb_event;
1321 AccessPatternTable::value_type(virt_page_addr, tmp_access_info));
1323 bool first_page_access = ret.second;
1325 if (first_page_access) {
1328 int accessed_before;
1329 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1330 ret.first->second.totalReuseDistance += accessed_before;
1333 ret.first->second.accessesPerPage++;
1334 ret.first->second.lastTimeAccessed =
curTick();
1337 ret.first->second.localTLBAccesses
1345 std::ostream *page_stat_file =
nullptr;
1356 <<
"page,max_access_distance,mean_access_distance, "
1357 <<
"stddev_distance" << std::endl;
1361 unsigned int sum_avg_reuse_distance_per_page = 0;
1365 sum_avg_reuse_distance_per_page += iter.second.totalReuseDistance /
1366 iter.second.accessesPerPage;
1369 unsigned int tmp = iter.second.localTLBAccesses[0];
1370 unsigned int prev = tmp;
1372 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1377 prev = iter.second.localTLBAccesses[
i];
1380 iter.second.localTLBAccesses[
i] -= tmp;
1383 iter.second.sumDistance +=
1384 iter.second.localTLBAccesses[
i];
1387 iter.second.meanDistance =
1388 iter.second.sumDistance / iter.second.accessesPerPage;
1392 unsigned int max_distance = 0;
1393 unsigned int stddev_distance = 0;
1395 for (
int i = 0;
i < iter.second.localTLBAccesses.size(); ++
i) {
1396 unsigned int tmp_access_distance =
1397 iter.second.localTLBAccesses[
i];
1399 if (tmp_access_distance > max_distance) {
1400 max_distance = tmp_access_distance;
1404 tmp_access_distance - iter.second.meanDistance;
1405 stddev_distance += pow(diff, 2);
1410 sqrt(stddev_distance/iter.second.accessesPerPage);
1412 if (page_stat_file) {
1413 *page_stat_file << std::hex << iter.first <<
",";
1414 *page_stat_file << std::dec << max_distance <<
",";
1415 *page_stat_file << std::dec << iter.second.meanDistance
1417 *page_stat_file << std::dec << stddev_distance;
1418 *page_stat_file << std::endl;
1422 iter.second.localTLBAccesses.clear();
1436 : statistics::
Group(parent),
1437 ADD_STAT(localNumTLBAccesses,
"Number of TLB accesses"),
1438 ADD_STAT(localNumTLBHits,
"Number of TLB hits"),
1439 ADD_STAT(localNumTLBMisses,
"Number of TLB misses"),
1440 ADD_STAT(localTLBMissRate,
"TLB miss rate"),
1441 ADD_STAT(globalNumTLBAccesses,
"Number of TLB accesses"),
1442 ADD_STAT(globalNumTLBHits,
"Number of TLB hits"),
1443 ADD_STAT(globalNumTLBMisses,
"Number of TLB misses"),
1444 ADD_STAT(globalTLBMissRate,
"TLB miss rate"),
1445 ADD_STAT(accessCycles,
"Cycles spent accessing this TLB level"),
1446 ADD_STAT(pageTableCycles,
"Cycles spent accessing the page table"),
1447 ADD_STAT(numUniquePages,
"Number of unique pages touched"),
1448 ADD_STAT(localCycles,
"Number of cycles spent in queue for all "
1450 ADD_STAT(localLatency,
"Avg. latency over incoming coalesced reqs"),
1451 ADD_STAT(avgReuseDistance,
"avg. reuse distance over all pages (in "
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
std::ostream * stream() const
Get the output underlying output stream.
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ STRICT_ORDER
The request is required to be strictly ordered by CPU models and is non-speculative.
@ UNCACHEABLE
The request is to an uncacheable address.
@ READ_MODIFY_WRITE
This request is a read which will be followed by a write.
ThreadContext is the external interface to all thread state for anything outside of the CPU.
virtual RegVal readMiscReg(RegIndex misc_reg)=0
virtual void setMiscReg(RegIndex misc_reg, RegVal val)=0
virtual const PCStateBase & pcState() const =0
virtual RegVal readMiscRegNoEffect(RegIndex misc_reg) const =0
virtual Process * getProcessPtr()=0
virtual ContextID contextId() const =0
virtual void recvReqRetry()
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
void updateOutcome(tlbOutcome _outcome)
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
const char * description() const
Return a C string describing the event.
virtual void finish(Fault fault, const RequestPtr &req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
void invalidateNonGlobal()
EventFunctionWrapper cleanupEvent
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Fault translateInt(bool read, const RequestPtr &req, ThreadContext *tc)
Fault translate(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
AccessPatternTable TLBFootprint
void demapPage(Addr va, uint64_t asn)
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
TlbEntry * lookup(Addr va, bool update_lru=true)
std::vector< TlbEntry > tlb
std::vector< MemSidePort * > memSidePort
bool accessDistance
Print out accessDistance stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::queue< Addr > cleanupQueue
bool FA
true if this is a fully-associative TLB
TlbEntry * insert(Addr vpn, TlbEntry &entry)
gem5::X86ISA::GpuTLB::GpuTLBStats stats
void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
std::vector< EntryList > freeList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
void setConfigAddress(uint32_t addr)
bool hasMemSidePort
if true, then this is not the last level TLB
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
void updatePageFootprint(Addr virt_page_addr)
bool tlbLookup(const RequestPtr &req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss.
std::vector< CpuSidePort * > cpuSidePort
Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, Mode mode, int &latency)
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, TlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
size_type size() const
Return the number of elements, always 1 for a scalar.
Counter value() const
Return the current value of this stat as its base type.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T mbits(T val, unsigned first, unsigned last)
Mask off the given bits in place like bits() but without shifting.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
static RegIndex segSel(int index)
static RegIndex segAttr(int index)
static RegIndex segBase(int index)
static RegIndex segLimit(int index)
constexpr auto AddrSizeFlagMask
const Addr PhysAddrPrefixPciConfig
Bitfield< 14 > expandDown
const Addr IntAddrPrefixIO
constexpr auto AddrSizeFlagShift
constexpr Request::FlagsType SegmentFlagMask
BitfieldType< SegDescriptorLimit > limit
const Addr PhysAddrPrefixIO
constexpr auto CPL0FlagBit
const Addr IntAddrPrefixCPUID
const Addr IntAddrPrefixMSR
const Addr IntAddrPrefixMask
bool msrAddrToIndex(RegIndex ®_num, Addr addr)
Find and return the misc reg corresponding to an MSR address.
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< FaultBase > Fault
std::shared_ptr< Request > RequestPtr
void cprintf(const char *format, const Args &...args)
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
std::string csprintf(const char *format, const Args &...args)
constexpr decltype(nullptr) NoFault
Declarations of a non-full system Page Table.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< int > reqCnt
This hash map will use the virtual page address as a key and will keep track of total number of acces...
unsigned int accessesPerPage
unsigned int totalReuseDistance
unsigned int lastTimeAccessed
unsigned int meanDistance
GpuTLBStats(statistics::Group *parent)
statistics::Formula localTLBMissRate
statistics::Scalar globalNumTLBMisses
statistics::Scalar localCycles
statistics::Formula globalTLBMissRate
statistics::Scalar localNumTLBHits
statistics::Scalar numUniquePages
statistics::Scalar avgReuseDistance
statistics::Scalar globalNumTLBHits
statistics::Formula localLatency
statistics::Scalar localNumTLBMisses
statistics::Scalar globalNumTLBAccesses
statistics::Scalar accessCycles
statistics::Scalar localNumTLBAccesses
statistics::Scalar pageTableCycles
const std::string & name()