40#include "debug/GPUTLB.hh"
52 "Probe the TLB below",
54 cleanupEvent([
this]{ processCleanupEvent(); },
55 "Cleanup issuedTranslationsTable hashmap",
57 tlb_level(
p.tlb_level),
58 maxDownstream(
p.maxDownstream),
62 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
63 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
name(),
i),
68 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
69 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
name(),
i),
73 default_pgSize =
p.default_pgSize;
74 potentialPagesize.insert(default_pgSize);
80 if (if_name ==
"cpu_side_ports") {
82 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
86 }
else if (if_name ==
"mem_side_ports") {
88 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
93 panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
125 if (incoming_virt_page_addr != coalesced_virt_page_addr)
134 if (incoming_mode != coalesced_mode)
141 coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
158 VegaISA::VegaTlbEntry tlb_entry =
160 Addr first_entry_vaddr = tlb_entry.vaddr;
161 Addr first_entry_paddr = tlb_entry.paddr;
162 int page_size = tlb_entry.size();
170 virt_page_addr =
roundDown(pkt->
req->getVaddr(), pgsize_seen);
175 DPRINTF(GPUTLB,
"Update phys. addr. for %d \
176 coalesced reqs for page %#x\n",
180 bool uncacheable = tlb_entry.uncacheable();
181 int first_hit_level = sender_state->
hitLevel;
182 bool is_system = pkt->
req->systemReq();
188 Addr local_pkt_vaddr = local_pkt->
req->getVaddr();
192 if (!(first_entry_vaddr <= local_pkt_vaddr &&
193 local_pkt_vaddr < first_entry_vaddr+page_size)) {
204 sender_state->
reqCnt.pop_back();
215 Addr paddr = first_entry_paddr
216 + (local_pkt->
req->getVaddr() & (page_size - 1));
217 local_pkt->
req->setPaddr(paddr);
226 if (sender_state->
tlbEntry == NULL) {
229 new VegaISA::VegaTlbEntry(1 ,
239 sender_state->
hitLevel = first_hit_level;
243 local_pkt->
req->setSystemReq(is_system);
246 sender_state->
ports.pop_back();
275 bool didCoalesce =
false;
277 int coalescedReq_cnt = 0;
282 DPRINTF(GPUTLB,
"Trying to re-issue req at tick: %llu, addr: %#x\n",
296 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
304 DPRINTF(GPUTLB,
"Coalesced re-issued req %i \
305 w/ tick_index %d has %d reqs\n",
317 if (!coalescedReq_cnt || !didCoalesce) {
319 new_array.push_back(pkt);
323 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
324 "push re-issued req\n", tick_index,
344 bool didCoalesce =
false;
346 int coalescedReq_cnt = 0;
351 bool update_stats = !sender_state->
isPrefetch;
357 sender_state->
ports.push_back(
this);
367 if (!sender_state->
reqCnt.empty())
368 req_cnt = sender_state->
reqCnt.back();
370 sender_state->
reqCnt.push_back(req_cnt);
374 req_cnt = sender_state->
reqCnt.back();
375 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
390 if (
coalescer->coalescerFIFO.count(tick_index)) {
391 coalescedReq_cnt =
coalescer->coalescerFIFO[tick_index].size();
396 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
397 first_packet =
coalescer->coalescerFIFO[tick_index][
i].first[0];
400 if (
coalescer->canCoalesce(pkt, first_packet, pg_size)) {
401 coalescer->coalescerFIFO[tick_index][
i].first.push_back(pkt);
403 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
405 coalescer->coalescerFIFO[tick_index][
i].first.size());
415 if (!coalescedReq_cnt || !didCoalesce) {
420 new_array.push_back(pkt);
421 coalescer->coalescerFIFO[tick_index].push_back
422 (std::make_pair(new_array,
coalescer->default_pgSize));
424 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
425 "push\n", tick_index,
426 coalescer->coalescerFIFO[tick_index].size());
431 if (!
coalescer->probeTLBEvent.scheduled()) {
442 panic(
"recvReqRetry called");
452 bool update_stats = !sender_state->
isPrefetch;
458 int map_count =
coalescer->issuedTranslationsTable.count(virt_page_addr);
461 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
462 "req. pending\n", virt_page_addr);
465 coalescer->memSidePort[0]->sendFunctional(pkt);
492 "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
503 if (!
coalescer->probeTLBEvent.scheduled())
511 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
536 DPRINTF(GPUTLB,
"triggered VegaTLBCoalescer %s\n", __func__);
540 DPRINTF(GPUTLB,
"IssueProbeEvent - no downstream slots, bail out\n");
546 int coalescedReq_cnt = iter->second.size();
548 int vector_index = 0;
550 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
551 coalescedReq_cnt, iter->first);
553 while (
i < coalescedReq_cnt) {
555 PacketPtr first_packet = iter->second[vector_index].first[0];
567 iter->second[vector_index].second);
571 int pending_reqs = 0;
577 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
578 "page %#x\n", virt_page_addr);
585 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
587 "Failed to send TLB request for page %#x",
604 bool update_stats = !tmp_sender_state->
isPrefetch;
610 int req_cnt = tmp_sender_state->
reqCnt.back();
613 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
618 int pkt_cnt = iter->second[vector_index].first.size();
622 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x\n",
627 = iter->second[vector_index].first;
630 iter->second.erase(iter->second.begin() + vector_index);
632 if (iter->second.empty())
633 assert(
i == coalescedReq_cnt );
642 if (iter->second.empty())
658 if (iter->second.empty()) {
674 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
685 .name(
name() +
".uncoalesced_accesses")
686 .desc(
"Number of uncoalesced TLB accesses")
690 .name(
name() +
".coalesced_accesses")
691 .desc(
"Number of coalesced TLB accesses")
695 .name(
name() +
".queuing_cycles")
696 .desc(
"Number of cycles spent in queue")
700 .name(
name() +
".local_queuing_cycles")
701 .desc(
"Number of cycles spent in queue for all incoming reqs")
705 .name(
name() +
".local_cycles")
706 .desc(
"Number of cycles spent in queue for all incoming reqs")
710 .name(
name() +
".local_latency")
711 .desc(
"Avg. latency over all incoming pkts")
715 .name(
name() +
".latency")
716 .desc(
"Avg. latency over all incoming pkts")
733 "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
742 DPRINTF(GPUTLB,
"mustStallCUPort: downstream = %d, max = %d\n",
746 warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
761 DPRINTF(GPUTLB,
"unstallPorts()\n");
784 DPRINTF(GPUTLB,
"sending retry for port = %p(%s)\n", port, port->name());
789 port->sendRetryReq();
ClockedObject(const ClockedObjectParams &p)
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual void recvReqRetry()
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
VegaTLBCoalescer * coalescer
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
VegaTLBCoalescer * coalescer
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
void processCleanupEvent()
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2, Addr pagebytes)
std::set< Addr > potentialPagesize
void incrementNumDownstream()
statistics::Scalar uncoalescedAccesses
void reissue_pkt_helper(PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
unsigned int numDownstream
void processProbeTLBEvent()
statistics::Formula latency
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void regStats()
Callback to set stat parameters.
Copyright (c) 2024 Arm Limited All rights reserved.
T safe_cast(U &&ref_or_ptr)
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
const std::string & name()