40#include "debug/GPUTLB.hh"
52 "Probe the TLB below",
54 cleanupEvent([
this]{ processCleanupEvent(); },
55 "Cleanup issuedTranslationsTable hashmap",
57 tlb_level(
p.tlb_level),
58 maxDownstream(
p.maxDownstream),
62 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
63 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
name(),
i),
68 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
69 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
name(),
i),
77 if (if_name ==
"cpu_side_ports") {
79 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
83 }
else if (if_name ==
"mem_side_ports") {
85 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
90 panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
120 if (incoming_virt_page_addr != coalesced_virt_page_addr)
129 if (incoming_mode != coalesced_mode)
136 coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
150 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
158 VegaISA::VegaTlbEntry tlb_entry =
160 Addr first_entry_vaddr = tlb_entry.vaddr;
161 Addr first_entry_paddr = tlb_entry.paddr;
162 int page_size = tlb_entry.size();
163 bool uncacheable = tlb_entry.uncacheable();
164 int first_hit_level = sender_state->
hitLevel;
165 bool is_system = pkt->
req->systemReq();
175 sender_state->
reqCnt.pop_back();
186 Addr paddr = first_entry_paddr
187 + (local_pkt->
req->getVaddr() & (page_size - 1));
188 local_pkt->
req->setPaddr(paddr);
197 if (sender_state->
tlbEntry == NULL) {
200 new VegaISA::VegaTlbEntry(1 ,
210 sender_state->
hitLevel = first_hit_level;
214 local_pkt->
req->setSystemReq(is_system);
217 sender_state->
ports.pop_back();
247 bool didCoalesce =
false;
249 int coalescedReq_cnt = 0;
254 bool update_stats = !sender_state->
isPrefetch;
260 sender_state->
ports.push_back(
this);
270 if (!sender_state->
reqCnt.empty())
271 req_cnt = sender_state->
reqCnt.back();
273 sender_state->
reqCnt.push_back(req_cnt);
277 req_cnt = sender_state->
reqCnt.back();
278 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
293 if (
coalescer->coalescerFIFO.count(tick_index)) {
294 coalescedReq_cnt =
coalescer->coalescerFIFO[tick_index].size();
299 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
300 first_packet =
coalescer->coalescerFIFO[tick_index][
i][0];
302 if (
coalescer->canCoalesce(pkt, first_packet)) {
303 coalescer->coalescerFIFO[tick_index][
i].push_back(pkt);
305 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
307 coalescer->coalescerFIFO[tick_index][
i].size());
317 if (!coalescedReq_cnt || !didCoalesce) {
322 new_array.push_back(pkt);
323 coalescer->coalescerFIFO[tick_index].push_back(new_array);
325 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
326 "push\n", tick_index,
327 coalescer->coalescerFIFO[tick_index].size());
332 if (!
coalescer->probeTLBEvent.scheduled()) {
343 panic(
"recvReqRetry called");
353 bool update_stats = !sender_state->
isPrefetch;
359 int map_count =
coalescer->issuedTranslationsTable.count(virt_page_addr);
362 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
363 "req. pending\n", virt_page_addr);
366 coalescer->memSidePort[0]->sendFunctional(pkt);
393 "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
404 if (!
coalescer->probeTLBEvent.scheduled())
412 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
437 DPRINTF(GPUTLB,
"triggered VegaTLBCoalescer %s\n", __func__);
441 DPRINTF(GPUTLB,
"IssueProbeEvent - no downstream slots, bail out\n");
447 int coalescedReq_cnt = iter->second.size();
449 int vector_index = 0;
451 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
452 coalescedReq_cnt, iter->first);
454 while (
i < coalescedReq_cnt) {
456 PacketPtr first_packet = iter->second[vector_index][0];
474 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
475 "page %#x\n", virt_page_addr);
482 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
484 "Failed to send TLB request for page %#x",
501 bool update_stats = !tmp_sender_state->
isPrefetch;
507 int req_cnt = tmp_sender_state->
reqCnt.back();
510 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
515 int pkt_cnt = iter->second[vector_index].size();
519 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x\n",
524 = iter->second[vector_index];
527 iter->second.erase(iter->second.begin() + vector_index);
529 if (iter->second.empty())
530 assert(
i == coalescedReq_cnt );
539 if (iter->second.empty())
555 if (iter->second.empty()) {
571 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
582 .name(
name() +
".uncoalesced_accesses")
583 .desc(
"Number of uncoalesced TLB accesses")
587 .name(
name() +
".coalesced_accesses")
588 .desc(
"Number of coalesced TLB accesses")
592 .name(
name() +
".queuing_cycles")
593 .desc(
"Number of cycles spent in queue")
597 .name(
name() +
".local_queuing_cycles")
598 .desc(
"Number of cycles spent in queue for all incoming reqs")
602 .name(
name() +
".local_cycles")
603 .desc(
"Number of cycles spent in queue for all incoming reqs")
607 .name(
name() +
".local_latency")
608 .desc(
"Avg. latency over all incoming pkts")
612 .name(
name() +
".latency")
613 .desc(
"Avg. latency over all incoming pkts")
630 "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
639 DPRINTF(GPUTLB,
"mustStallCUPort: downstream = %d, max = %d\n",
643 warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
658 DPRINTF(GPUTLB,
"unstallPorts()\n");
681 DPRINTF(GPUTLB,
"sending retry for port = %p(%s)\n", port, port->name());
686 port->sendRetryReq();
ClockedObject(const ClockedObjectParams &p)
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
Ports are used to interface objects to each other.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual void recvReqRetry()
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
VegaTLBCoalescer * coalescer
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
VegaTLBCoalescer * coalescer
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
void processCleanupEvent()
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
void incrementNumDownstream()
statistics::Scalar uncoalescedAccesses
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
unsigned int numDownstream
void processProbeTLBEvent()
statistics::Formula latency
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void regStats()
Callback to set stat parameters.
Copyright (c) 2024 Arm Limited All rights reserved.
T safe_cast(U &&ref_or_ptr)
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
const std::string & name()