40#include "debug/GPUTLB.hh"
48 TLBProbesPerCycle(
p.probesPerCycle),
49 coalescingWindow(
p.coalescingWindow),
50 disableCoalescing(
p.disableCoalescing),
52 "Probe the TLB below",
54 cleanupEvent([
this]{ processCleanupEvent(); },
55 "Cleanup issuedTranslationsTable hashmap",
57 tlb_level(
p.tlb_level),
58 maxDownstream(
p.maxDownstream),
62 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
63 cpuSidePort.push_back(
new CpuSidePort(
csprintf(
"%s-port%d",
name(),
i),
68 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
69 memSidePort.push_back(
new MemSidePort(
csprintf(
"%s-port%d",
name(),
i),
77 if (if_name ==
"cpu_side_ports") {
79 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
83 }
else if (if_name ==
"mem_side_ports") {
85 panic(
"VegaTLBCoalescer::getPort: unknown index %d\n", idx);
90 panic(
"VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
120 if (incoming_virt_page_addr != coalesced_virt_page_addr)
129 if (incoming_mode != coalesced_mode)
136 coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
150 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
158 VegaISA::VegaTlbEntry tlb_entry =
160 Addr first_entry_vaddr = tlb_entry.vaddr;
161 Addr first_entry_paddr = tlb_entry.paddr;
162 int page_size = tlb_entry.size();
163 bool uncacheable = tlb_entry.uncacheable();
164 int first_hit_level = sender_state->
hitLevel;
165 bool is_system = pkt->
req->systemReq();
175 sender_state->
reqCnt.pop_back();
186 Addr paddr = first_entry_paddr
187 + (local_pkt->
req->getVaddr() & (page_size - 1));
188 local_pkt->
req->setPaddr(paddr);
197 if (sender_state->
tlbEntry == NULL) {
200 new VegaISA::VegaTlbEntry(1 ,
210 sender_state->
hitLevel = first_hit_level;
214 local_pkt->
req->setSystemReq(is_system);
217 sender_state->
ports.pop_back();
247 bool didCoalesce =
false;
249 int coalescedReq_cnt = 0;
254 bool update_stats = !sender_state->
isPrefetch;
260 sender_state->
ports.push_back(
this);
270 if (!sender_state->
reqCnt.empty())
271 req_cnt = sender_state->
reqCnt.back();
273 sender_state->
reqCnt.push_back(req_cnt);
277 req_cnt = sender_state->
reqCnt.back();
278 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
299 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
305 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
317 if (!coalescedReq_cnt || !didCoalesce) {
322 new_array.push_back(pkt);
325 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
326 "push\n", tick_index,
343 panic(
"recvReqRetry called");
353 bool update_stats = !sender_state->
isPrefetch;
356 coalescer->uncoalescedAccesses++;
359 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
362 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
363 "req. pending\n", virt_page_addr);
366 coalescer->memSidePort[0]->sendFunctional(pkt);
384 coalescer->updatePhysAddresses(pkt);
386 if (coalescer->tlb_level != 1)
390 coalescer->decrementNumDownstream();
393 "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
394 coalescer, coalescer->numDownstream, coalescer->maxDownstream);
396 coalescer->unstallPorts();
404 if (!coalescer->probeTLBEvent.scheduled())
405 coalescer->schedule(coalescer->probeTLBEvent,
406 curTick() + coalescer->clockPeriod());
412 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
437 DPRINTF(GPUTLB,
"triggered VegaTLBCoalescer %s\n", __func__);
441 DPRINTF(GPUTLB,
"IssueProbeEvent - no downstream slots, bail out\n");
447 int coalescedReq_cnt = iter->second.size();
449 int vector_index = 0;
451 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
452 coalescedReq_cnt, iter->first);
454 while (
i < coalescedReq_cnt) {
456 PacketPtr first_packet = iter->second[vector_index][0];
474 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
475 "page %#x\n", virt_page_addr);
482 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
484 "Failed to send TLB request for page %#x",
501 bool update_stats = !tmp_sender_state->
isPrefetch;
507 int req_cnt = tmp_sender_state->
reqCnt.back();
510 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
515 int pkt_cnt = iter->second[vector_index].size();
519 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x\n",
524 = iter->second[vector_index];
527 iter->second.erase(iter->second.begin() + vector_index);
529 if (iter->second.empty())
530 assert(
i == coalescedReq_cnt );
539 if (iter->second.empty())
555 if (iter->second.empty()) {
571 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
582 .
name(
name() +
".uncoalesced_accesses")
583 .
desc(
"Number of uncoalesced TLB accesses")
587 .
name(
name() +
".coalesced_accesses")
588 .
desc(
"Number of coalesced TLB accesses")
593 .
desc(
"Number of cycles spent in queue")
597 .
name(
name() +
".local_queuing_cycles")
598 .
desc(
"Number of cycles spent in queue for all incoming reqs")
603 .
desc(
"Number of cycles spent in queue for all incoming reqs")
608 .
desc(
"Avg. latency over all incoming pkts")
613 .
desc(
"Avg. latency over all incoming pkts")
630 "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
639 DPRINTF(GPUTLB,
"mustStallCUPort: downstream = %d, max = %d\n",
643 warn(
"RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
658 DPRINTF(GPUTLB,
"unstallPorts()\n");
681 DPRINTF(GPUTLB,
"sending retry for port = %p(%s)\n", port, port->name());
686 port->sendRetryReq();
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual void recvReqRetry()
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
VegaTLBCoalescer * coalescer
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
void processCleanupEvent()
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
void incrementNumDownstream()
statistics::Scalar uncoalescedAccesses
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
unsigned int numDownstream
void processProbeTLBEvent()
statistics::Formula latency
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void regStats()
Callback to set stat parameters.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
T safe_cast(U &&ref_or_ptr)
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
const std::string & name()