40#include "debug/GPUPrefetch.hh"
41#include "debug/GPUTLB.hh"
54 gpuDevice(
p.gpu_device), size(
p.size), stats(this),
59 assert(assoc <= size);
61 allocationPolicy =
p.allocationPolicy;
62 hasMemSidePort =
false;
64 tlb.assign(size, VegaTlbEntry());
66 freeList.resize(numSets);
67 entryList.resize(numSets);
70 for (
int way = 0; way < assoc; ++way) {
71 int x =
set * assoc + way;
72 freeList[
set].push_back(&
tlb.at(
x));
77 setMask = numSets - 1;
79 maxCoalescedReqs =
p.maxOutstandingReqs;
83 hitLatency =
p.hitLatency;
84 missLatency1 =
p.missLatency1;
85 missLatency2 =
p.missLatency2;
88 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
89 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
94 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
95 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
100 walker->setTLB(
this);
105 gpuDevice->getVM().registerTLB(
this);
116 if (if_name ==
"cpu_side_ports") {
118 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
122 }
else if (if_name ==
"mem_side_ports") {
124 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
131 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
138 DPRINTF(GPUTLB,
"GPUTLB: Raising page fault.\n");
146 return std::make_shared<PageFault>(
vaddr, code,
true,
mode,
true);
153 return (
vaddr & ~pageMask);
159 VegaTlbEntry *newEntry =
nullptr;
174 DPRINTF(GPUTLB,
"Inserted %#lx -> %#lx of size %#lx into set %d\n",
175 newEntry->vaddr, newEntry->paddr, entry.size(),
set);
180GpuTLB::EntryList::iterator
191 int page_size = (*entry)->size();
193 if ((*entry)->vaddr <=
va && (*entry)->vaddr + page_size >
va) {
194 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
195 "with size %#x.\n",
va, (*entry)->vaddr, page_size);
226 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
267 VegaTlbEntry *entry =
lookup(alignedVaddr,
true);
327 bool update_stats = !sender_state->
isPrefetch;
329 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
332 int req_cnt = sender_state->
reqCnt.back();
344 auto entry =
tlbLookup(tmp_req, update_stats);
349 VegaTlbEntry *entry =
lookup(virt_page_addr,
false);
353 pkt->
req->setSystemReq(entry->pte.s);
357 new VegaTlbEntry(1 , virt_page_addr, alignedPaddr,
358 entry->logBytes, entry->pte);
381 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
384 panic(
"Virtual Page Address %#x already has a return event\n",
391 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
399 :
Event(CPU_Tick_Pri),
tlb(_tlb), virtPageAddr(_addr),
400 outcome(tlb_outcome), pkt(_pkt)
413 bool badWrite = (!tlb_entry->writable());
419 fatal(
"Page fault on addr %lx PTE=%#lx", pkt->
req->getVaddr(),
420 (uint64_t)tlb_entry->pte);
427 DPRINTF(GPUTLB,
"WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
428 pkt->
req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
434 Addr paddr = page_addr + (entry.vaddr &
mask(entry.logBytes));
435 pkt->
req->setPaddr(paddr);
436 pkt->
req->setSystemReq(entry.pte.s);
440 sender_state->
tlbEntry =
new VegaTlbEntry(entry);
462 VegaTlbEntry *local_entry, *new_entry;
464 int req_cnt = sender_state->
reqCnt.back();
465 bool update_stats = !sender_state->
isPrefetch;
473 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n",
477 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
487 local_entry = new_entry;
490 assert(new_entry->pte);
491 DPRINTF(GPUTLB,
"allocating entry w/ addr %#lx of size %#lx\n",
492 virt_page_addr, new_entry->size());
494 local_entry =
insert(virt_page_addr, *new_entry);
505 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
506 "while paddr was %#x.\n", local_entry->vaddr,
510 int page_size = local_entry->size();
511 Addr paddr = local_entry->paddr + (
vaddr & (page_size - 1));
512 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
520 pkt->
req->setPaddr(paddr);
522 if (local_entry->uncacheable()) {
531 DPRINTF(GPUTLB,
"Scheduled %#lx for cleanup\n", virt_page_addr);
551 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
559 int req_cnt = tmp_sender_state->
reqCnt.back();
560 bool update_stats = !tmp_sender_state->
isPrefetch;
568 DPRINTF(GPUTLB,
"This is a TLB miss\n");
580 tmp_sender_state->
pasId = 0;
583 DPRINTF(GPUTLB,
"Failed sending translation request to "
584 "lower level TLB for addr %#x\n", virtPageAddr);
588 DPRINTF(GPUTLB,
"Sent translation request to lower level "
589 "TLB for addr %#x\n", virtPageAddr);
593 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
594 "addr %#x\n", virtPageAddr);
610 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
625 panic(
"Unexpected TLB outcome %d", outcome);
632 tlb->translationReturn(virtPageAddr, outcome, pkt);
638 return "trigger translationDoneEvent";
666 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
667 assert(!
tlb->translationReturnEvent.count(virt_page_addr));
668 tlb->issueTLBLookup(pkt);
670 tlb->outstandingReqs++;
673 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
674 tlb->outstandingReqs);
675 tlb->stats.maxDownstreamReached++;
680 if (
tlb->outstandingReqs >
tlb->stats.outstandingReqsMax.value())
681 tlb->stats.outstandingReqsMax =
tlb->outstandingReqs;
703 VegaTlbEntry *local_entry, *new_entry;
706 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
711 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
721 local_entry = new_entry;
726 DPRINTF(GPUTLB,
"allocating entry w/ addr %#lx\n",
729 local_entry =
insert(virt_page_addr, *new_entry);
735 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
736 "while paddr was %#x.\n", local_entry->vaddr,
753 int page_size = local_entry->size();
754 Addr paddr = local_entry->paddr + (
vaddr & (page_size - 1));
755 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n",
vaddr, paddr);
757 pkt->
req->setPaddr(paddr);
759 if (local_entry->uncacheable())
771 bool update_stats = !sender_state->
isPrefetch;
777 bool success =
tlb->tlbLookup(pkt->
req, update_stats);
783 tlb->stats.globalNumTLBAccesses++;
787 tlb->stats.globalNumTLBHits++;
789 tlb->stats.globalNumTLBMisses++;
794 if (
tlb->hasMemSidePort) {
796 tlb->memSidePort[0]->sendFunctional(pkt);
802 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
806 [[maybe_unused]]
Addr alignedVaddr =
807 tlb->pageAlign(virt_page_addr);
808 assert(alignedVaddr == virt_page_addr);
814 Addr base =
tlb->gpuDevice->getVM().getPageTableBase(1);
815 tlb->walker->setDevRequestor(
tlb->gpuDevice->vramRequestorId());
822 fatal(
"Translation fault in TLB at %d!", __LINE__);
830 Addr alignedPaddr =
tlb->pageAlign(paddr);
831 pkt->
req->setPaddr(paddr);
832 pkt->
req->setSystemReq(pte.s);
840 new VegaTlbEntry(1 , virt_page_addr,
841 alignedPaddr, logBytes, pte);
851 new VegaTlbEntry(1 , virt_page_addr,
852 alignedPaddr, logBytes, pte);
863 VegaTlbEntry *entry =
tlb->lookup(virt_page_addr, update_stats);
867 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
871 sender_state->
tlbEntry =
new VegaTlbEntry(1 , entry->vaddr,
872 entry->paddr, entry->logBytes,
880 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
888 panic(
"recvReqRetry called");
911 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
914 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
929 panic(
"recvReqRetry called");
939 DPRINTF(GPUTLB,
"Deleting return event for %#lx\n", cleanup_addr);
943 delete old_tlb_event;
959 : statistics::
Group(parent),
960 ADD_STAT(maxDownstreamReached,
"Number of refused translation requests"),
961 ADD_STAT(outstandingReqsMax,
"Maximum count in coalesced request queue"),
962 ADD_STAT(localNumTLBAccesses,
"Number of TLB accesses"),
963 ADD_STAT(localNumTLBHits,
"Number of TLB hits"),
964 ADD_STAT(localNumTLBMisses,
"Number of TLB misses"),
965 ADD_STAT(localTLBMissRate,
"TLB miss rate"),
966 ADD_STAT(globalNumTLBAccesses,
"Number of TLB accesses"),
967 ADD_STAT(globalNumTLBHits,
"Number of TLB hits"),
968 ADD_STAT(globalNumTLBMisses,
"Number of TLB misses"),
969 ADD_STAT(globalTLBMissRate,
"TLB miss rate"),
970 ADD_STAT(accessCycles,
"Cycles spent accessing this TLB level"),
971 ADD_STAT(pageTableCycles,
"Cycles spent accessing the page table"),
972 ADD_STAT(localCycles,
"Number of cycles spent in queue for all "
974 ADD_STAT(localLatency,
"Avg. latency over incoming coalesced reqs")
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Addr getPageTableBase(uint16_t vmid)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ UNCACHEABLE
The request is to an uncacheable address.
virtual void recvReqRetry()
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
const char * description() const
Return a C string describing the event.
void updateOutcome(tlbOutcome _outcome)
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
std::vector< CpuSidePort * > cpuSidePort
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
std::vector< MemSidePort * > memSidePort
std::vector< EntryList > freeList
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Addr pageAlign(Addr vaddr)
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
std::queue< Addr > cleanupQueue
bool FA
true if this is a fully-associative TLB
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
EventFunctionWrapper cleanupEvent
gem5::VegaISA::GpuTLB::VegaTLBStats stats
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
void demapPage(Addr va, uint64_t asn)
GpuTLB(const VegaGPUTLBParams &p)
std::vector< VegaTlbEntry > tlb
Fault createPagefault(Addr vaddr, Mode mode)
bool hasMemSidePort
if true, then this is not the last level TLB
void setDevRequestor(RequestorID mid)
void startTiming(PacketPtr pkt, Addr base, Addr vaddr, BaseMMU::Mode mode)
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< FaultBase > Fault
T safe_cast(U &&ref_or_ptr)
std::shared_ptr< Request > RequestPtr
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::string csprintf(const char *format, const Args &...args)
constexpr decltype(nullptr) NoFault
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< int > reqCnt
statistics::Scalar localNumTLBMisses
statistics::Formula localLatency
statistics::Formula localTLBMissRate
statistics::Scalar localCycles
statistics::Scalar globalNumTLBAccesses
statistics::Scalar accessCycles
statistics::Scalar localNumTLBAccesses
VegaTLBStats(statistics::Group *parent)
statistics::Formula globalTLBMissRate
statistics::Scalar globalNumTLBMisses
statistics::Scalar localNumTLBHits
statistics::Scalar pageTableCycles
statistics::Scalar globalNumTLBHits
const std::string & name()