39 #include "debug/GPUTLB.hh" 44 clock(p->clk_domain->clockPeriod()),
45 TLBProbesPerCycle(p->probesPerCycle),
46 coalescingWindow(p->coalescingWindow),
47 disableCoalescing(p->disableCoalescing),
49 "Probe the TLB below",
52 "Cleanup issuedTranslationsTable hashmap",
56 for (
size_t i = 0;
i <
p->port_slave_connection_count; ++
i) {
62 for (
size_t i = 0;
i <
p->port_master_connection_count; ++
i) {
71 if (if_name ==
"slave") {
72 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
73 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
77 }
else if (if_name ==
"master") {
78 if (idx >= static_cast<PortID>(
memSidePort.size())) {
79 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
84 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
100 TheISA::GpuTLB::TranslationState *incoming_state =
103 TheISA::GpuTLB::TranslationState *coalesced_state =
114 if (incoming_virt_page_addr != coalesced_virt_page_addr)
123 if (incoming_mode != coalesced_mode)
129 if (!incoming_state->prefetch)
130 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
144 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
147 TheISA::GpuTLB::TranslationState *sender_state =
150 TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
152 Addr first_entry_vaddr = tlb_entry->vaddr;
153 Addr first_entry_paddr = tlb_entry->paddr;
154 int page_size = tlb_entry->size();
155 bool uncacheable = tlb_entry->uncacheable;
156 int first_hit_level = sender_state->hitLevel;
161 Addr phys_page_paddr = pkt->
req->getPaddr();
162 phys_page_paddr &= ~(page_size - 1);
166 TheISA::GpuTLB::TranslationState *sender_state =
167 safe_cast<TheISA::GpuTLB::TranslationState*>(
172 if (!sender_state->prefetch)
173 sender_state->reqCnt.pop_back();
182 Addr paddr = phys_page_paddr;
183 paddr |= (local_pkt->
req->getVaddr() & (page_size - 1));
184 local_pkt->
req->setPaddr(paddr);
191 auto p = sender_state->tc->getProcessPtr();
192 sender_state->tlbEntry =
193 new TheISA::TlbEntry(
p->pid(), first_entry_vaddr,
194 first_entry_paddr,
false,
false);
199 sender_state->hitLevel = first_hit_level;
202 SlavePort *return_port = sender_state->ports.back();
203 sender_state->ports.pop_back();
233 bool didCoalesce =
false;
235 int coalescedReq_cnt = 0;
237 TheISA::GpuTLB::TranslationState *sender_state =
241 sender_state->ports.push_back(
this);
243 bool update_stats = !sender_state->prefetch;
253 if (!sender_state->reqCnt.empty())
254 req_cnt = sender_state->reqCnt.back();
256 sender_state->reqCnt.push_back(req_cnt);
259 coalescer->uncoalescedAccesses++;
260 req_cnt = sender_state->reqCnt.back();
261 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
262 coalescer->queuingCycles -= (
curTick() * req_cnt);
263 coalescer->localqueuingCycles -=
curTick();
271 if (!sender_state->issueTime)
272 sender_state->issueTime =
curTick();
277 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
279 if (coalescer->coalescerFIFO.count(tick_index)) {
280 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
285 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
286 first_packet = coalescer->coalescerFIFO[tick_index][
i][0];
288 if (coalescer->canCoalesce(pkt, first_packet)) {
289 coalescer->coalescerFIFO[tick_index][
i].push_back(pkt);
291 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
293 coalescer->coalescerFIFO[tick_index][
i].size());
303 if (!coalescedReq_cnt || !didCoalesce) {
305 coalescer->coalescedAccesses++;
308 new_array.push_back(pkt);
309 coalescer->coalescerFIFO[tick_index].push_back(new_array);
311 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after " 312 "push\n", tick_index,
313 coalescer->coalescerFIFO[tick_index].size());
318 if (!coalescer->probeTLBEvent.scheduled()) {
319 coalescer->schedule(coalescer->probeTLBEvent,
320 curTick() + coalescer->ticks(1));
329 panic(
"recvReqRetry called");
336 TheISA::GpuTLB::TranslationState *sender_state =
339 bool update_stats = !sender_state->prefetch;
342 coalescer->uncoalescedAccesses++;
349 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
352 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing " 353 "req. pending\n", virt_page_addr);
356 coalescer->memSidePort[0]->sendFunctional(pkt);
372 coalescer->updatePhysAddresses(pkt);
381 if (!coalescer->probeTLBEvent.scheduled())
382 coalescer->schedule(coalescer->probeTLBEvent,
383 curTick() + coalescer->ticks(1));
389 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
410 bool rejected =
false;
416 DPRINTF(GPUTLB,
"triggered TLBCoalescer %s\n", __func__);
420 int coalescedReq_cnt = iter->second.size();
422 int vector_index = 0;
424 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
425 coalescedReq_cnt, iter->first);
427 while (i < coalescedReq_cnt) {
429 PacketPtr first_packet = iter->second[vector_index][0];
440 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for " 441 "page %#x\n", virt_page_addr);
450 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
451 DPRINTF(GPUTLB,
"Failed to send TLB request for page %#x",
459 TheISA::GpuTLB::TranslationState *tmp_sender_state =
460 safe_cast<TheISA::GpuTLB::TranslationState*>
463 bool update_stats = !tmp_sender_state->prefetch;
469 int req_cnt = tmp_sender_state->reqCnt.back();
472 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
477 int pkt_cnt = iter->second[vector_index].size();
481 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x",
486 = iter->second[vector_index];
489 iter->second.erase(iter->second.begin() + vector_index);
491 if (iter->second.empty())
492 assert(i == coalescedReq_cnt);
502 if (iter->second.empty()) {
518 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
529 .
name(
name() +
".uncoalesced_accesses")
530 .
desc(
"Number of uncoalesced TLB accesses")
534 .
name(
name() +
".coalesced_accesses")
535 .
desc(
"Number of coalesced TLB accesses")
540 .
desc(
"Number of cycles spent in queue")
544 .
name(
name() +
".local_queuing_cycles")
545 .
desc(
"Number of cycles spent in queue for all incoming reqs")
550 .
desc(
"Avg. latency over all incoming pkts")
558 TLBCoalescerParams::create()
#define panic(...)
This implements a cprintf based panic() function.
static const Priority Maximum_Pri
Maximum priority.
Ports are used to interface objects to each other.
TLBCoalescer(const Params *p)
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void recvFunctional(PacketPtr pkt)
CoalescingFIFO coalescerFIFO
void updatePhysAddresses(PacketPtr pkt)
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
void makeTimingResponse()
The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of each TLB.
CoalescingTable issuedTranslationsTable
A SlavePort is a specialisation of a port.
The request is to an uncacheable address.
Stats::Scalar localqueuingCycles
RequestPtr req
A pointer to the original request.
std::queue< Addr > cleanupQueue
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
void processCleanupEvent()
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TLBCoalescerParams Params
Stats::Formula localLatency
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
void schedule(Event &event, Tick when)
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Stats::Scalar coalescedAccesses
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::vector< CpuSidePort * > cpuSidePort
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
bool scheduled() const
Determine if the current event is scheduled.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
virtual const std::string name() const
std::vector< MemSidePort * > memSidePort
SenderState * senderState
This packet's sender state.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void regStats() override
Callback to set stat parameters.
Stats::Scalar uncoalescedAccesses
virtual void recvReqRetry()
Stats::Scalar queuingCycles
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void regStats()
Callback to set stat parameters.
void processProbeTLBEvent()
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.