40#include "debug/GPUPrefetch.hh"
41#include "debug/GPUTLB.hh"
54 gpuDevice(
p.gpu_device), size(
p.size), stats(this),
59 assert(assoc <= size);
61 allocationPolicy =
p.allocationPolicy;
62 hasMemSidePort =
false;
64 tlb.assign(size, VegaTlbEntry());
66 freeList.resize(numSets);
67 entryList.resize(numSets);
70 for (
int way = 0; way < assoc; ++way) {
71 int x =
set * assoc + way;
72 freeList[
set].push_back(&
tlb.at(
x));
77 setMask = numSets - 1;
79 maxCoalescedReqs =
p.maxOutstandingReqs;
83 hitLatency =
p.hitLatency;
84 missLatency1 =
p.missLatency1;
85 missLatency2 =
p.missLatency2;
88 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
89 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
94 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
95 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
100 walker->setTLB(
this);
105 gpuDevice->getVM().registerTLB(
this);
116 if (if_name ==
"cpu_side_ports") {
118 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
122 }
else if (if_name ==
"mem_side_ports") {
124 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
131 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
138 DPRINTF(GPUTLB,
"GPUTLB: Raising page fault.\n");
146 return std::make_shared<PageFault>(
vaddr, code,
true,
mode,
true);
153 return (
vaddr & ~pageMask);
159 VegaTlbEntry *newEntry =
nullptr;
174 DPRINTF(GPUTLB,
"Inserted %#lx -> %#lx of size %#lx into set %d\n",
175 newEntry->vaddr, newEntry->paddr, entry.size(),
set);
180GpuTLB::EntryList::iterator
191 int page_size = (*entry)->size();
193 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
194 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
195 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
226 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
262 if (req->hasNoAddr()) {
270 VegaTlbEntry *entry =
lookup(alignedVaddr,
true);
330 bool update_stats = !sender_state->
isPrefetch;
332 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
335 int req_cnt = sender_state->
reqCnt.back();
347 auto entry =
tlbLookup(tmp_req, update_stats);
348 if (entry || pkt->
req->hasNoAddr()) {
351 if (pkt->
req->hasNoAddr()) {
353 new VegaTlbEntry(1 , 0, 0, 0, 0);
356 pkt->
req->setSystemReq(
false);
358 VegaTlbEntry *entry =
lookup(virt_page_addr,
false);
362 pkt->
req->setSystemReq(entry->pte.s);
366 new VegaTlbEntry(1 , virt_page_addr, alignedPaddr,
367 entry->logBytes, entry->pte);
391 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
394 panic(
"Virtual Page Address %#x already has a return event\n",
401 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
409 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
410 outcome(tlb_outcome), pkt(_pkt)
423 bool badWrite = (!tlb_entry->writable());
429 fatal(
"Page fault on addr %lx PTE=%#lx", pkt->
req->getVaddr(),
430 (uint64_t)tlb_entry->pte);
437 DPRINTF(GPUTLB,
"WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
438 pkt->
req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
444 Addr paddr = page_addr + (entry.vaddr &
mask(entry.logBytes));
445 pkt->
req->setPaddr(paddr);
446 pkt->
req->setSystemReq(entry.pte.s);
450 sender_state->
tlbEntry =
new VegaTlbEntry(entry);
472 VegaTlbEntry *local_entry, *new_entry;
474 int req_cnt = sender_state->
reqCnt.back();
475 bool update_stats = !sender_state->
isPrefetch;
483 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
485 local_entry = safe_cast<VegaTlbEntry *>(sender_state->
tlbEntry);
487 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
495 new_entry = safe_cast<VegaTlbEntry *>(sender_state->
tlbEntry);
497 local_entry = new_entry;
500 assert(new_entry->pte);
501 DPRINTF(GPUTLB,
"allocating entry w/ addr %#lx of size %#lx\n",
502 virt_page_addr, new_entry->size());
504 local_entry =
insert(virt_page_addr, *new_entry);
515 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
516 "while paddr was %#x.\n", local_entry->vaddr,
520 int page_size = local_entry->size();
521 Addr paddr = local_entry->paddr + (
vaddr & (page_size - 1));
522 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
530 pkt->
req->setPaddr(paddr);
532 if (local_entry->uncacheable()) {
541 DPRINTF(GPUTLB,
"Scheduled %#lx for cleanup\n", virt_page_addr);
561 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
569 int req_cnt = tmp_sender_state->
reqCnt.back();
570 bool update_stats = !tmp_sender_state->
isPrefetch;
578 DPRINTF(GPUTLB,
"This is a TLB miss\n");
590 tmp_sender_state->
pasId = 0;
593 DPRINTF(GPUTLB,
"Failed sending translation request to "
594 "lower level TLB for addr %#x\n", virtPageAddr);
598 DPRINTF(GPUTLB,
"Sent translation request to lower level "
599 "TLB for addr %#x\n", virtPageAddr);
603 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
604 "addr %#x\n", virtPageAddr);
620 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
635 panic(
"Unexpected TLB outcome %d", outcome);
642 tlb->translationReturn(virtPageAddr, outcome, pkt);
648 return "trigger translationDoneEvent";
676 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
677 assert(!
tlb->translationReturnEvent.count(virt_page_addr));
678 tlb->issueTLBLookup(pkt);
680 tlb->outstandingReqs++;
683 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
684 tlb->outstandingReqs);
685 tlb->stats.maxDownstreamReached++;
690 if (
tlb->outstandingReqs >
tlb->stats.outstandingReqsMax.value())
691 tlb->stats.outstandingReqsMax =
tlb->outstandingReqs;
713 VegaTlbEntry *local_entry, *new_entry;
716 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
719 local_entry = safe_cast<VegaTlbEntry *>(sender_state->
tlbEntry);
721 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
729 new_entry = safe_cast<VegaTlbEntry *>(sender_state->
tlbEntry);
731 local_entry = new_entry;
736 DPRINTF(GPUTLB,
"allocating entry w/ addr %#lx\n",
739 local_entry =
insert(virt_page_addr, *new_entry);
745 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
746 "while paddr was %#x.\n", local_entry->vaddr,
763 int page_size = local_entry->size();
764 Addr paddr = local_entry->paddr + (
vaddr & (page_size - 1));
765 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
767 pkt->
req->setPaddr(paddr);
769 if (local_entry->uncacheable())
781 bool update_stats = !sender_state->
isPrefetch;
787 bool success =
tlb->tlbLookup(pkt->
req, update_stats);
793 tlb->stats.globalNumTLBAccesses++;
797 tlb->stats.globalNumTLBHits++;
799 tlb->stats.globalNumTLBMisses++;
804 if (
tlb->hasMemSidePort) {
806 tlb->memSidePort[0]->sendFunctional(pkt);
812 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
816 [[maybe_unused]]
Addr alignedVaddr =
817 tlb->pageAlign(virt_page_addr);
818 assert(alignedVaddr == virt_page_addr);
824 Addr base =
tlb->gpuDevice->getVM().getPageTableBase(1);
825 tlb->walker->setDevRequestor(
tlb->gpuDevice->vramRequestorId());
832 fatal(
"Translation fault in TLB at %d!", __LINE__);
840 Addr alignedPaddr =
tlb->pageAlign(paddr);
841 pkt->
req->setPaddr(paddr);
842 pkt->
req->setSystemReq(pte.s);
850 new VegaTlbEntry(1 , virt_page_addr,
851 alignedPaddr, logBytes, pte);
861 new VegaTlbEntry(1 , virt_page_addr,
862 alignedPaddr, logBytes, pte);
873 VegaTlbEntry *entry =
tlb->lookup(virt_page_addr, update_stats);
877 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
881 sender_state->
tlbEntry =
new VegaTlbEntry(1 , entry->vaddr,
882 entry->paddr, entry->logBytes,
890 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
898 panic(
"recvReqRetry called");
921 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
924 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
939 panic(
"recvReqRetry called");
949 DPRINTF(GPUTLB,
"Deleting return event for %#lx\n", cleanup_addr);
953 delete old_tlb_event;
969 : statistics::
Group(parent),
970 ADD_STAT(maxDownstreamReached,
"Number of refused translation requests"),
971 ADD_STAT(outstandingReqsMax,
"Maximum count in coalesced request queue"),
972 ADD_STAT(localNumTLBAccesses,
"Number of TLB accesses"),
973 ADD_STAT(localNumTLBHits,
"Number of TLB hits"),
974 ADD_STAT(localNumTLBMisses,
"Number of TLB misses"),
975 ADD_STAT(localTLBMissRate,
"TLB miss rate"),
976 ADD_STAT(globalNumTLBAccesses,
"Number of TLB accesses"),
977 ADD_STAT(globalNumTLBHits,
"Number of TLB hits"),
978 ADD_STAT(globalNumTLBMisses,
"Number of TLB misses"),
979 ADD_STAT(globalTLBMissRate,
"TLB miss rate"),
980 ADD_STAT(accessCycles,
"Cycles spent accessing this TLB level"),
981 ADD_STAT(pageTableCycles,
"Cycles spent accessing the page table"),
982 ADD_STAT(localCycles,
"Number of cycles spent in queue for all "
984 ADD_STAT(localLatency,
"Avg. latency over incoming coalesced reqs")
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Addr getPageTableBase(uint16_t vmid)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ UNCACHEABLE
The request is to an uncacheable address.
virtual void recvReqRetry()
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
const char * description() const
Return a C string describing the event.
void updateOutcome(tlbOutcome _outcome)
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
std::vector< CpuSidePort * > cpuSidePort
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
std::vector< MemSidePort * > memSidePort
std::vector< EntryList > freeList
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Addr pageAlign(Addr vaddr)
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
std::queue< Addr > cleanupQueue
bool FA
true if this is a fully-associative TLB
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
EventFunctionWrapper cleanupEvent
gem5::VegaISA::GpuTLB::VegaTLBStats stats
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
void demapPage(Addr va, uint64_t asn)
GpuTLB(const VegaGPUTLBParams &p)
std::vector< VegaTlbEntry > tlb
Fault createPagefault(Addr vaddr, Mode mode)
bool hasMemSidePort
if true, then this is not the last level TLB
void setDevRequestor(RequestorID mid)
void startTiming(PacketPtr pkt, Addr base, Addr vaddr, BaseMMU::Mode mode)
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< FaultBase > Fault
std::shared_ptr< Request > RequestPtr
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::string csprintf(const char *format, const Args &...args)
constexpr decltype(nullptr) NoFault
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< int > reqCnt
statistics::Scalar localNumTLBMisses
statistics::Formula localLatency
statistics::Formula localTLBMissRate
statistics::Scalar localCycles
statistics::Scalar globalNumTLBAccesses
statistics::Scalar accessCycles
statistics::Scalar localNumTLBAccesses
VegaTLBStats(statistics::Group *parent)
statistics::Formula globalTLBMissRate
statistics::Scalar globalNumTLBMisses
statistics::Scalar localNumTLBHits
statistics::Scalar pageTableCycles
statistics::Scalar globalNumTLBHits
const std::string & name()