41 #include "debug/GPUTLB.hh" 46 clock(p->clk_domain->clockPeriod()),
47 TLBProbesPerCycle(p->probesPerCycle),
48 coalescingWindow(p->coalescingWindow),
49 disableCoalescing(p->disableCoalescing),
51 "Probe the TLB below",
54 "Cleanup issuedTranslationsTable hashmap",
58 for (
size_t i = 0;
i <
p->port_slave_connection_count; ++
i) {
64 for (
size_t i = 0;
i <
p->port_master_connection_count; ++
i) {
73 if (if_name ==
"slave") {
74 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
75 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
79 }
else if (if_name ==
"master") {
80 if (idx >= static_cast<PortID>(
memSidePort.size())) {
81 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
86 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
102 TheISA::GpuTLB::TranslationState *incoming_state =
105 TheISA::GpuTLB::TranslationState *coalesced_state =
116 if (incoming_virt_page_addr != coalesced_virt_page_addr)
125 if (incoming_mode != coalesced_mode)
131 if (!incoming_state->prefetch)
132 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
146 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
149 TheISA::GpuTLB::TranslationState *sender_state =
152 TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
154 Addr first_entry_vaddr = tlb_entry->vaddr;
155 Addr first_entry_paddr = tlb_entry->paddr;
156 int page_size = tlb_entry->size();
157 bool uncacheable = tlb_entry->uncacheable;
158 int first_hit_level = sender_state->hitLevel;
163 Addr phys_page_paddr = pkt->
req->getPaddr();
164 phys_page_paddr &= ~(page_size - 1);
168 TheISA::GpuTLB::TranslationState *sender_state =
169 safe_cast<TheISA::GpuTLB::TranslationState*>(
174 if (!sender_state->prefetch)
175 sender_state->reqCnt.pop_back();
184 Addr paddr = phys_page_paddr;
185 paddr |= (local_pkt->
req->getVaddr() & (page_size - 1));
186 local_pkt->
req->setPaddr(paddr);
193 auto p = sender_state->tc->getProcessPtr();
194 sender_state->tlbEntry =
195 new TheISA::TlbEntry(
p->pid(), first_entry_vaddr,
196 first_entry_paddr,
false,
false);
201 sender_state->hitLevel = first_hit_level;
204 SlavePort *return_port = sender_state->ports.back();
205 sender_state->ports.pop_back();
235 bool didCoalesce =
false;
237 int coalescedReq_cnt = 0;
239 TheISA::GpuTLB::TranslationState *sender_state =
243 sender_state->ports.push_back(
this);
245 bool update_stats = !sender_state->prefetch;
255 if (!sender_state->reqCnt.empty())
256 req_cnt = sender_state->reqCnt.back();
258 sender_state->reqCnt.push_back(req_cnt);
261 coalescer->uncoalescedAccesses++;
262 req_cnt = sender_state->reqCnt.back();
263 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
264 coalescer->queuingCycles -= (
curTick() * req_cnt);
265 coalescer->localqueuingCycles -=
curTick();
273 if (!sender_state->issueTime)
274 sender_state->issueTime =
curTick();
279 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
281 if (coalescer->coalescerFIFO.count(tick_index)) {
282 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
287 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
288 first_packet = coalescer->coalescerFIFO[tick_index][
i][0];
290 if (coalescer->canCoalesce(pkt, first_packet)) {
291 coalescer->coalescerFIFO[tick_index][
i].push_back(pkt);
293 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
295 coalescer->coalescerFIFO[tick_index][
i].size());
305 if (!coalescedReq_cnt || !didCoalesce) {
307 coalescer->coalescedAccesses++;
310 new_array.push_back(pkt);
311 coalescer->coalescerFIFO[tick_index].push_back(new_array);
313 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after " 314 "push\n", tick_index,
315 coalescer->coalescerFIFO[tick_index].size());
320 if (!coalescer->probeTLBEvent.scheduled()) {
321 coalescer->schedule(coalescer->probeTLBEvent,
322 curTick() + coalescer->ticks(1));
331 panic(
"recvReqRetry called");
338 TheISA::GpuTLB::TranslationState *sender_state =
341 bool update_stats = !sender_state->prefetch;
344 coalescer->uncoalescedAccesses++;
351 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
354 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing " 355 "req. pending\n", virt_page_addr);
358 coalescer->memSidePort[0]->sendFunctional(pkt);
374 coalescer->updatePhysAddresses(pkt);
383 if (!coalescer->probeTLBEvent.scheduled())
384 coalescer->schedule(coalescer->probeTLBEvent,
385 curTick() + coalescer->ticks(1));
391 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
412 bool rejected =
false;
418 DPRINTF(GPUTLB,
"triggered TLBCoalescer %s\n", __func__);
422 int coalescedReq_cnt = iter->second.size();
424 int vector_index = 0;
426 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
427 coalescedReq_cnt, iter->first);
429 while (i < coalescedReq_cnt) {
431 PacketPtr first_packet = iter->second[vector_index][0];
442 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for " 443 "page %#x\n", virt_page_addr);
452 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
453 DPRINTF(GPUTLB,
"Failed to send TLB request for page %#x",
461 TheISA::GpuTLB::TranslationState *tmp_sender_state =
462 safe_cast<TheISA::GpuTLB::TranslationState*>
465 bool update_stats = !tmp_sender_state->prefetch;
471 int req_cnt = tmp_sender_state->reqCnt.back();
474 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
479 int pkt_cnt = iter->second[vector_index].size();
483 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x",
488 = iter->second[vector_index];
491 iter->second.erase(iter->second.begin() + vector_index);
493 if (iter->second.empty())
494 assert(i == coalescedReq_cnt);
504 if (iter->second.empty()) {
520 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
531 .
name(
name() +
".uncoalesced_accesses")
532 .
desc(
"Number of uncoalesced TLB accesses")
536 .
name(
name() +
".coalesced_accesses")
537 .
desc(
"Number of coalesced TLB accesses")
542 .
desc(
"Number of cycles spent in queue")
546 .
name(
name() +
".local_queuing_cycles")
547 .
desc(
"Number of cycles spent in queue for all incoming reqs")
552 .
desc(
"Avg. latency over all incoming pkts")
560 TLBCoalescerParams::create()
#define panic(...)
This implements a cprintf based panic() function.
The request is to an uncacheable address.
Ports are used to interface objects to each other.
TLBCoalescer(const Params *p)
#define fatal(...)
This implements a cprintf based fatal() function.
virtual void recvFunctional(PacketPtr pkt)
CoalescingFIFO coalescerFIFO
void updatePhysAddresses(PacketPtr pkt)
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
void makeTimingResponse()
The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of each TLB.
CoalescingTable issuedTranslationsTable
virtual void regStats()
Callback to set stat parameters.
A SlavePort is a specialisation of a port.
Stats::Scalar localqueuingCycles
RequestPtr req
A pointer to the original request.
std::queue< Addr > cleanupQueue
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
void processCleanupEvent()
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
bool scheduled() const
Determine if the current event is scheduled.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
TLBCoalescerParams Params
Stats::Formula localLatency
T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Stats::Scalar coalescedAccesses
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
virtual const std::string name() const
std::vector< CpuSidePort * > cpuSidePort
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
std::vector< MemSidePort * > memSidePort
SenderState * senderState
This packet's sender state.
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void regStats() override
Callback to set stat parameters.
Stats::Scalar uncoalescedAccesses
void schedule(Event &event, Tick when)
virtual void recvReqRetry()
Stats::Scalar queuingCycles
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
void processProbeTLBEvent()
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
static const Priority Maximum_Pri
Maximum priority.
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.