40 #include "debug/GPUTLB.hh"
45 TLBProbesPerCycle(
p.probesPerCycle),
46 coalescingWindow(
p.coalescingWindow),
47 disableCoalescing(
p.disableCoalescing),
49 "Probe the TLB below",
51 cleanupEvent([
this]{ processCleanupEvent(); },
52 "Cleanup issuedTranslationsTable hashmap",
57 for (
size_t i = 0;
i <
p.port_cpu_side_ports_connection_count; ++
i) {
63 for (
size_t i = 0;
i <
p.port_mem_side_ports_connection_count; ++
i) {
72 if (if_name ==
"cpu_side_ports") {
74 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
78 }
else if (if_name ==
"mem_side_ports") {
80 panic(
"TLBCoalescer::getPort: unknown index %d\n", idx);
85 panic(
"TLBCoalescer::getPort: unknown port %s\n", if_name);
101 TheISA::GpuTLB::TranslationState *incoming_state =
102 safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->
senderState);
104 TheISA::GpuTLB::TranslationState *coalesced_state =
105 safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->
senderState);
115 if (incoming_virt_page_addr != coalesced_virt_page_addr)
124 if (incoming_mode != coalesced_mode)
130 if (!incoming_state->prefetch)
131 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
145 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
148 TheISA::GpuTLB::TranslationState *sender_state =
149 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
151 TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
153 Addr first_entry_vaddr = tlb_entry->vaddr;
154 Addr first_entry_paddr = tlb_entry->paddr;
155 int page_size = tlb_entry->size();
156 bool uncacheable = tlb_entry->uncacheable;
157 int first_hit_level = sender_state->hitLevel;
162 Addr phys_page_paddr = pkt->
req->getPaddr();
163 phys_page_paddr &= ~(page_size - 1);
167 TheISA::GpuTLB::TranslationState *sender_state =
168 safe_cast<TheISA::GpuTLB::TranslationState*>(
173 if (!sender_state->prefetch)
174 sender_state->reqCnt.pop_back();
183 Addr paddr = phys_page_paddr;
184 paddr |= (local_pkt->
req->getVaddr() & (page_size - 1));
185 local_pkt->
req->setPaddr(paddr);
192 auto p = sender_state->tc->getProcessPtr();
193 sender_state->tlbEntry =
194 new TheISA::TlbEntry(
p->pid(), first_entry_vaddr,
195 first_entry_paddr,
false,
false);
200 sender_state->hitLevel = first_hit_level;
204 sender_state->ports.pop_back();
234 bool didCoalesce =
false;
236 int coalescedReq_cnt = 0;
238 TheISA::GpuTLB::TranslationState *sender_state =
239 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
242 sender_state->ports.push_back(
this);
244 bool update_stats = !sender_state->prefetch;
254 if (!sender_state->reqCnt.empty())
255 req_cnt = sender_state->reqCnt.back();
257 sender_state->reqCnt.push_back(req_cnt);
261 req_cnt = sender_state->reqCnt.back();
262 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
272 if (!sender_state->issueTime)
273 sender_state->issueTime =
curTick();
286 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
292 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
304 if (!coalescedReq_cnt || !didCoalesce) {
309 new_array.push_back(pkt);
312 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
313 "push\n", tick_index,
330 panic(
"recvReqRetry called");
337 TheISA::GpuTLB::TranslationState *sender_state =
338 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
340 bool update_stats = !sender_state->prefetch;
343 coalescer->stats.uncoalescedAccesses++;
350 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
353 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
354 "req. pending\n", virt_page_addr);
357 coalescer->memSidePort[0]->sendFunctional(pkt);
373 coalescer->updatePhysAddresses(pkt);
382 if (!coalescer->probeTLBEvent.scheduled())
383 coalescer->schedule(coalescer->probeTLBEvent,
384 curTick() + coalescer->clockPeriod());
390 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
411 bool rejected =
false;
417 DPRINTF(GPUTLB,
"triggered TLBCoalescer %s\n", __func__);
421 int coalescedReq_cnt = iter->second.size();
423 int vector_index = 0;
425 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
426 coalescedReq_cnt, iter->first);
428 while (
i < coalescedReq_cnt) {
430 PacketPtr first_packet = iter->second[vector_index][0];
441 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
442 "page %#x\n", virt_page_addr);
451 if (!
memSidePort[0]->sendTimingReq(first_packet)) {
452 DPRINTF(GPUTLB,
"Failed to send TLB request for page %#x\n",
460 TheISA::GpuTLB::TranslationState *tmp_sender_state =
461 safe_cast<TheISA::GpuTLB::TranslationState*>
464 bool update_stats = !tmp_sender_state->prefetch;
470 int req_cnt = tmp_sender_state->reqCnt.back();
473 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
478 int pkt_cnt = iter->second[vector_index].size();
482 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x",
487 = iter->second[vector_index];
490 iter->second.erase(iter->second.begin() + vector_index);
492 if (iter->second.empty())
493 assert(
i == coalescedReq_cnt);
503 if (iter->second.empty()) {
519 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
526 ADD_STAT(uncoalescedAccesses,
"Number of uncoalesced TLB accesses"),
527 ADD_STAT(coalescedAccesses,
"Number of coalesced TLB accesses"),
528 ADD_STAT(queuingCycles,
"Number of cycles spent in queue"),
530 "Number of cycles spent in queue for all incoming reqs"),
531 ADD_STAT(localLatency,
"Avg. latency over all incoming pkts")