39#include "debug/GPUTLB.hh"
47 TLBProbesPerCycle(
p.probesPerCycle),
48 coalescingWindow(
p.coalescingWindow),
49 disableCoalescing(
p.disableCoalescing),
51 "Probe the TLB below",
53 cleanupEvent([
this]{ processCleanupEvent(); },
54 "Cleanup issuedTranslationsTable hashmap",
59 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
65 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
74 if (if_name ==
"cpu_side_ports") {
76 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
80 }
else if (if_name ==
"mem_side_ports") {
82 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
87 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
117 if (incoming_virt_page_addr != coalesced_virt_page_addr)
126 if (incoming_mode != coalesced_mode)
133 coalesced_state->
reqCnt.back() += incoming_state->
reqCnt.back();
147 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
156 Addr first_entry_vaddr = tlb_entry->
vaddr;
157 Addr first_entry_paddr = tlb_entry->
paddr;
158 int page_size = tlb_entry->
size();
160 int first_hit_level = sender_state->
hitLevel;
165 Addr phys_page_paddr = pkt->
req->getPaddr();
166 phys_page_paddr &= ~(page_size - 1);
177 sender_state->
reqCnt.pop_back();
186 Addr paddr = phys_page_paddr;
187 paddr |= (local_pkt->
req->getVaddr() & (page_size - 1));
188 local_pkt->
req->setPaddr(paddr);
198 first_entry_paddr,
false,
false);
203 sender_state->
hitLevel = first_hit_level;
207 sender_state->
ports.pop_back();
237 bool didCoalesce =
false;
239 int coalescedReq_cnt = 0;
245 sender_state->
ports.push_back(
this);
247 bool update_stats = !sender_state->
isPrefetch;
257 if (!sender_state->
reqCnt.empty())
258 req_cnt = sender_state->
reqCnt.back();
260 sender_state->
reqCnt.push_back(req_cnt);
264 req_cnt = sender_state->
reqCnt.back();
265 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
289 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
295 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
307 if (!coalescedReq_cnt || !didCoalesce) {
312 new_array.push_back(pkt);
315 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
316 "push\n", tick_index,
333 panic(
"recvReqRetry called");
343 bool update_stats = !sender_state->
isPrefetch;
346 coalescer->stats.uncoalescedAccesses++;
353 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
356 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
357 "req. pending\n", virt_page_addr);
360 coalescer->memSidePort[0]->sendFunctional(pkt);
376 coalescer->updatePhysAddresses(pkt);
385 if (!coalescer->probeTLBEvent.scheduled())
386 coalescer->schedule(coalescer->probeTLBEvent,
387 curTick() + coalescer->clockPeriod());
393 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
414 bool rejected =
false;
420 DPRINTF(GPUTLB,
"triggered TLBCoalescer %s\n", __func__);
424 int coalescedReq_cnt = iter->second.size();
426 int vector_index = 0;
428 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
429 coalescedReq_cnt, iter->first);
431 while (
i < coalescedReq_cnt) {
433 PacketPtr first_packet = iter->second[vector_index][0];
444 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
445 "page %#x\n", virt_page_addr);
454 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
455 DPRINTF(GPUTLB,
"Failed to send TLB request for page %#x\n",
467 bool update_stats = !tmp_sender_state->
isPrefetch;
473 int req_cnt = tmp_sender_state->
reqCnt.back();
476 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
481 int pkt_cnt = iter->second[vector_index].size();
485 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x\n",
490 = iter->second[vector_index];
493 iter->second.erase(iter->second.begin() + vector_index);
495 if (iter->second.empty())
496 assert(
i == coalescedReq_cnt);
506 if (iter->second.empty()) {
522 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
528 : statistics::
Group(parent),
529 ADD_STAT(uncoalescedAccesses,
"Number of uncoalesced TLB accesses"),
530 ADD_STAT(coalescedAccesses,
"Number of coalesced TLB accesses"),
531 ADD_STAT(queuingCycles,
"Number of cycles spent in queue"),
533 "Number of cycles spent in queue for all incoming reqs"),
534 ADD_STAT(localLatency,
"Avg. latency over all incoming pkts")
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void makeTimingResponse()
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
@ UNCACHEABLE
The request is to an uncacheable address.
A ResponsePort is a specialization of a port.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvReqRetry()
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
TLBCoalescerParams Params
std::queue< Addr > cleanupQueue
gem5::TLBCoalescer::TLBCoalescerStats stats
CoalescingTable issuedTranslationsTable
std::vector< MemSidePort * > memSidePort
void updatePhysAddresses(PacketPtr pkt)
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
void processCleanupEvent()
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
CoalescingFIFO coalescerFIFO
TLBCoalescer(const Params &p)
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void processProbeTLBEvent()
std::vector< CpuSidePort * > cpuSidePort
virtual Process * getProcessPtr()=0
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
static const Priority Maximum_Pri
Maximum priority.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
T safe_cast(U &&ref_or_ptr)
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::string csprintf(const char *format, const Args &...args)
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
std::vector< int > reqCnt
statistics::Scalar queuingCycles
statistics::Scalar localqueuingCycles
statistics::Formula localLatency
statistics::Scalar coalescedAccesses
statistics::Scalar uncoalescedAccesses
TLBCoalescerStats(statistics::Group *parent)