gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
tlb_coalescer.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <cstring>
35
38#include "arch/generic/mmu.hh"
39#include "base/logging.hh"
40#include "debug/GPUTLB.hh"
41#include "sim/process.hh"
42
43namespace gem5
44{
45
46VegaTLBCoalescer::VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
48 TLBProbesPerCycle(p.probesPerCycle),
52 "Probe the TLB below",
53 false, Event::CPU_Tick_Pri),
54 cleanupEvent([this]{ processCleanupEvent(); },
55 "Cleanup issuedTranslationsTable hashmap",
56 false, Event::Maximum_Pri),
57 tlb_level(p.tlb_level),
58 maxDownstream(p.maxDownstream),
59 numDownstream(0)
60{
61 // create the response ports based on the number of connected ports
62 for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
63 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
64 this, i));
65 }
66
67 // create the request ports based on the number of connected ports
68 for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
69 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
70 this, i));
71 }
72
73 default_pgSize = p.default_pgSize;
74 potentialPagesize.insert(default_pgSize);
75}
76
77Port &
78VegaTLBCoalescer::getPort(const std::string &if_name, PortID idx)
79{
80 if (if_name == "cpu_side_ports") {
81 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
82 panic("VegaTLBCoalescer::getPort: unknown index %d\n", idx);
83 }
84
85 return *cpuSidePort[idx];
86 } else if (if_name == "mem_side_ports") {
87 if (idx >= static_cast<PortID>(memSidePort.size())) {
88 panic("VegaTLBCoalescer::getPort: unknown index %d\n", idx);
89 }
90
91 return *memSidePort[idx];
92 } else {
93 panic("VegaTLBCoalescer::getPort: unknown port %s\n", if_name);
94 }
95}
96
97/*
98 * This method returns true if the <incoming_pkt>
99 * can be coalesced with <coalesced_pkt> and false otherwise.
100 * A given set of rules is checked.
101 * The rules can potentially be modified based on the TLB level.
102 */
103bool
105(PacketPtr incoming_pkt, PacketPtr coalesced_pkt,
106 Addr pagebytes = VegaISA::PageBytes)
107{
109 return false;
110
111 GpuTranslationState *incoming_state =
113
114 GpuTranslationState *coalesced_state =
116
117 // Rule 1: Coalesce requests only if they
118 // fall within the same virtual page
119 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
120 pagebytes);
121
122 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
123 pagebytes);
124
125 if (incoming_virt_page_addr != coalesced_virt_page_addr)
126 return false;
127
128 //* Rule 2: Coalesce requests only if they
129 // share a TLB Mode, i.e. they are both read
130 // or write requests.
131 BaseMMU::Mode incoming_mode = incoming_state->tlbMode;
132 BaseMMU::Mode coalesced_mode = coalesced_state->tlbMode;
133
134 if (incoming_mode != coalesced_mode)
135 return false;
136
137 // when we can coalesce a packet update the reqCnt
138 // that is the number of packets represented by
139 // this coalesced packet
140 if (!incoming_state->isPrefetch)
141 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
142
143 return true;
144}
145
146/*
147 * We need to update the physical addresses of all the translation requests
148 * that were coalesced into the one that just returned.
149 */
150void
152{
153 GpuTranslationState *sender_state =
155
156 // Make a copy. This gets deleted after the first is sent back on the port
157 assert(sender_state->tlbEntry);
158 VegaISA::VegaTlbEntry tlb_entry =
160 Addr first_entry_vaddr = tlb_entry.vaddr;
161 Addr first_entry_paddr = tlb_entry.paddr;
162 int page_size = tlb_entry.size();
163
164 potentialPagesize.insert(page_size);
165
166 Addr virt_page_addr;
167
168 // Find coalesced translation request.
169 for (auto pgsize_seen : potentialPagesize) {
170 virt_page_addr = roundDown(pkt->req->getVaddr(), pgsize_seen);
171 if (issuedTranslationsTable.count(virt_page_addr) != 0)
172 break;
173 }
174
175 DPRINTF(GPUTLB, "Update phys. addr. for %d \
176 coalesced reqs for page %#x\n",
177 issuedTranslationsTable[virt_page_addr].size(),
178 virt_page_addr);
179
180 bool uncacheable = tlb_entry.uncacheable();
181 int first_hit_level = sender_state->hitLevel;
182 bool is_system = pkt->req->systemReq();
183
184 for (int i = 0;
185 i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
186 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
187
188 Addr local_pkt_vaddr = local_pkt->req->getVaddr();
189
190 //check if the pending req's vaddr matches the returned page,
191 //if not, reissue pending req as a 4k page
192 if (!(first_entry_vaddr <= local_pkt_vaddr &&
193 local_pkt_vaddr < first_entry_vaddr+page_size)) {
194 reissue_pkt_helper(local_pkt);
195 continue;
196 }
197
198 GpuTranslationState *sender_state =
200
201 // we are sending the packet back, so pop the reqCnt associated
202 // with this level in the TLB hiearchy
203 if (!sender_state->isPrefetch) {
204 sender_state->reqCnt.pop_back();
206 }
207
208 /*
209 * Only the first packet from this coalesced request has been
210 * translated. Grab the translated phys. page addr and update the
211 * physical addresses of the remaining packets with the appropriate
212 * page offsets.
213 */
214 if (i) {
215 Addr paddr = first_entry_paddr
216 + (local_pkt->req->getVaddr() & (page_size - 1));
217 local_pkt->req->setPaddr(paddr);
218
219 if (uncacheable)
220 local_pkt->req->setFlags(Request::UNCACHEABLE);
221
222 // update senderState->tlbEntry, so we can insert
223 // the correct TLBEentry in the TLBs above.
224
225 //auto p = sender_state->tc->getProcessPtr();
226 if (sender_state->tlbEntry == NULL) {
227 // not set by lower(l2) coalescer
228 sender_state->tlbEntry =
229 new VegaISA::VegaTlbEntry(1 /* VMID TODO */,
230 first_entry_vaddr,
231 first_entry_paddr,
232 tlb_entry.logBytes,
233 tlb_entry.pte);
234 }
235
236 // update the hitLevel for all uncoalesced reqs
237 // so that each packet knows where it hit
238 // (used for statistics in the CUs)
239 sender_state->hitLevel = first_hit_level;
240 }
241
242 // Copy PTE system bit information to coalesced requests
243 local_pkt->req->setSystemReq(is_system);
244
245 ResponsePort *return_port = sender_state->ports.back();
246 sender_state->ports.pop_back();
247
248 // Translation is done - Convert to a response pkt if necessary and
249 // send the translation back
250 if (local_pkt->isRequest()) {
251 local_pkt->makeTimingResponse();
252 }
253
254 return_port->sendTimingResp(local_pkt);
255 }
256
257 // schedule clean up for end of this cycle
258 // This is a maximum priority event and must be on
259 // the same cycle as GPUTLB cleanup event to prevent
260 // race conditions with an IssueProbeEvent caused by
261 // MemSidePort::recvReqRetry
262 cleanupQueue.push(virt_page_addr);
263
264 if (!cleanupEvent.scheduled())
266}
267
268// re-coalesce packet to 4k pages
269void
271{
272 // first packet of a coalesced request
273 PacketPtr first_packet = nullptr;
274 // true if we are able to do coalescing
275 bool didCoalesce = false;
276 // number of coalesced reqs for a given window
277 int coalescedReq_cnt = 0;
278
279 GpuTranslationState *sender_state =
281
282 DPRINTF(GPUTLB, "Trying to re-issue req at tick: %llu, addr: %#x\n",
283 sender_state->issueTime, pkt->req->getVaddr());
284
285 // The tick index is used as a key to the coalescerFIFO hashmap.
286 // It is shared by all candidates that fall within the
287 // given coalescingWindow.
288 Tick tick_index = sender_state->issueTime / coalescingWindow;
289
290 if (coalescerFIFO.count(tick_index)) {
291 coalescedReq_cnt = coalescerFIFO[tick_index].size();
292 }
293
294 // see if we can coalesce the incoming pkt with another
295 // coalesced request with the same tick_index
296 for (int i = 0; i < coalescedReq_cnt; ++i) {
297 first_packet = coalescerFIFO[tick_index][i].first[0];
298 if (coalescerFIFO[tick_index][i].second != VegaISA::PageBytes)
299 continue;
300
301 if (canCoalesce(pkt, first_packet, VegaISA::PageBytes)) {
302 coalescerFIFO[tick_index][i].first.push_back(pkt);
303
304 DPRINTF(GPUTLB, "Coalesced re-issued req %i \
305 w/ tick_index %d has %d reqs\n",
306 i, tick_index,
307 coalescerFIFO[tick_index][i].first.size());
308
309 didCoalesce = true;
310 break;
311 }
312 }
313
314 // if this is the first request for this tick_index
315 // or we did not manage to coalesce, update stats
316 // and make necessary allocations.
317 if (!coalescedReq_cnt || !didCoalesce) {
318 std::vector<PacketPtr> new_array;
319 new_array.push_back(pkt);
320 coalescerFIFO[tick_index].push_back(
321 std::make_pair(new_array, VegaISA::PageBytes));
322
323 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
324 "push re-issued req\n", tick_index,
325 coalescerFIFO[tick_index].size());
326 }
327
328 //schedule probeTLBEvent next cycle to send the
329 //coalesced requests to the TLB
330 if (!probeTLBEvent.scheduled()) {
332 curTick() + clockPeriod());
333 }
334}
335
336// Receive translation requests, create a coalesced request,
337// and send them to the TLB (TLBProbesPerCycle)
338bool
340{
341 // first packet of a coalesced request
342 PacketPtr first_packet = nullptr;
343 // true if we are able to do coalescing
344 bool didCoalesce = false;
345 // number of coalesced reqs for a given window
346 int coalescedReq_cnt = 0;
347
348 GpuTranslationState *sender_state =
350
351 bool update_stats = !sender_state->isPrefetch;
352
353 if (coalescer->tlb_level == 1 && coalescer->mustStallCUPort(this))
354 return false;
355
356 // push back the port to remember the path back
357 sender_state->ports.push_back(this);
358
359 if (update_stats) {
360 // if reqCnt is empty then this packet does not represent
361 // multiple uncoalesced reqs(pkts) but just a single pkt.
362 // If it does though then the reqCnt for each level in the
363 // hierarchy accumulates the total number of reqs this packet
364 // represents
365 int req_cnt = 1;
366
367 if (!sender_state->reqCnt.empty())
368 req_cnt = sender_state->reqCnt.back();
369
370 sender_state->reqCnt.push_back(req_cnt);
371
372 // update statistics
373 coalescer->uncoalescedAccesses++;
374 req_cnt = sender_state->reqCnt.back();
375 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
376 coalescer->queuingCycles -= (coalescer->curCycle() * req_cnt);
377 coalescer->localqueuingCycles -= coalescer->curCycle();
378 coalescer->localCycles -= coalescer->curCycle();
379 }
380
381 // Coalesce based on the time the packet arrives at the coalescer (here).
382 if (!sender_state->issueTime)
383 sender_state->issueTime = curTick();
384
385 // The tick index is used as a key to the coalescerFIFO hashmap.
386 // It is shared by all candidates that fall within the
387 // given coalescingWindow.
388 Tick tick_index = sender_state->issueTime / coalescer->coalescingWindow;
389
390 if (coalescer->coalescerFIFO.count(tick_index)) {
391 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
392 }
393
394 // see if we can coalesce the incoming pkt with another
395 // coalesced request with the same tick_index
396 for (int i = 0; i < coalescedReq_cnt; ++i) {
397 first_packet = coalescer->coalescerFIFO[tick_index][i].first[0];
398 Addr pg_size = coalescer->coalescerFIFO[tick_index][i].second;
399
400 if (coalescer->canCoalesce(pkt, first_packet, pg_size)) {
401 coalescer->coalescerFIFO[tick_index][i].first.push_back(pkt);
402
403 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
404 i, tick_index,
405 coalescer->coalescerFIFO[tick_index][i].first.size());
406
407 didCoalesce = true;
408 break;
409 }
410 }
411
412 // if this is the first request for this tick_index
413 // or we did not manage to coalesce, update stats
414 // and make necessary allocations.
415 if (!coalescedReq_cnt || !didCoalesce) {
416 if (update_stats)
417 coalescer->coalescedAccesses++;
418
419 std::vector<PacketPtr> new_array;
420 new_array.push_back(pkt);
421 coalescer->coalescerFIFO[tick_index].push_back
422 (std::make_pair(new_array, coalescer->default_pgSize));
423
424 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
425 "push\n", tick_index,
426 coalescer->coalescerFIFO[tick_index].size());
427 }
428
429 //schedule probeTLBEvent next cycle to send the
430 //coalesced requests to the TLB
431 if (!coalescer->probeTLBEvent.scheduled()) {
432 coalescer->schedule(coalescer->probeTLBEvent,
433 curTick() + coalescer->clockPeriod());
434 }
435
436 return true;
437}
438
439void
441{
442 panic("recvReqRetry called");
443}
444
445void
447{
448
449 GpuTranslationState *sender_state =
451
452 bool update_stats = !sender_state->isPrefetch;
453
454 if (update_stats)
455 coalescer->uncoalescedAccesses++;
456
457 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), VegaISA::PageBytes);
458 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
459
460 if (map_count) {
461 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
462 "req. pending\n", virt_page_addr);
463 }
464
465 coalescer->memSidePort[0]->sendFunctional(pkt);
466}
467
470{
471 // currently not checked by the requestor
472 AddrRangeList ranges;
473
474 return ranges;
475}
476
477/*
478 * a translation completed and returned
479 */
480bool
482{
483 coalescer->updatePhysAddresses(pkt);
484
485 if (coalescer->tlb_level != 1)
486 return true;
487
488
489 coalescer->decrementNumDownstream();
490
491 DPRINTF(GPUTLB,
492 "recvTimingReq: clscr = %p, numDownstream = %d, max = %d\n",
493 coalescer, coalescer->numDownstream, coalescer->maxDownstream);
494
495 coalescer->unstallPorts();
496 return true;
497}
498
499void
501{
502 //we've receeived a retry. Schedule a probeTLBEvent
503 if (!coalescer->probeTLBEvent.scheduled())
504 coalescer->schedule(coalescer->probeTLBEvent,
505 curTick() + coalescer->clockPeriod());
506}
507
508void
510{
511 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
512}
513
514/*
515 * Here we scan the coalescer FIFO and issue the max
516 * number of permitted probes to the TLB below. We
517 * permit bypassing of coalesced requests for the same
518 * tick_index.
519 *
520 * We do not access the next tick_index unless we've
521 * drained the previous one. The coalesced requests
522 * that are successfully sent are moved to the
523 * issuedTranslationsTable table (the table which keeps
524 * track of the outstanding reqs)
525 */
526void
528{
529 // number of TLB probes sent so far
530 int sent_probes = 0;
531
532 // It is set to true either when the recvTiming of the TLB below
533 // returns false or when there is another outstanding request for the
534 // same virt. page.
535
536 DPRINTF(GPUTLB, "triggered VegaTLBCoalescer %s\n", __func__);
537
538 if ((tlb_level == 1)
539 && (availDownstreamSlots() == 0)) {
540 DPRINTF(GPUTLB, "IssueProbeEvent - no downstream slots, bail out\n");
541 return;
542 }
543
544 for (auto iter = coalescerFIFO.begin();
545 iter != coalescerFIFO.end();) {
546 int coalescedReq_cnt = iter->second.size();
547 int i = 0;
548 int vector_index = 0;
549
550 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
551 coalescedReq_cnt, iter->first);
552
553 while (i < coalescedReq_cnt) {
554 ++i;
555 PacketPtr first_packet = iter->second[vector_index].first[0];
556 //The request to coalescer is origanized as follows.
557 //The coalescerFIFO is a map which is indexed by coalescingWindow
558 // cycle. Only requests that falls in the same coalescingWindow
559 // considered for coalescing. Each entry of a coalescerFIFO is a
560 // vector of vectors. There is one entry for each different virtual
561 // page number and it contains vector of all request that are
562 // coalesced for the same virtual page address
563
564 // compute virtual page address for this request use the assumed
565 // page size, stored in pair.second of the coalesced req
566 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
567 iter->second[vector_index].second);
568
569 // is there another outstanding request for the same page addr?
570 // consider all possible page size
571 int pending_reqs = 0;
572 for (auto i_pgsize : potentialPagesize)
573 pending_reqs += issuedTranslationsTable.count
574 (roundDown(first_packet->req->getVaddr(), i_pgsize));
575
576 if (pending_reqs) {
577 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
578 "page %#x\n", virt_page_addr);
579
580 ++vector_index;
581 continue;
582 }
583
584 // send the coalesced request for virt_page_addr
585 if (!memSidePort[0]->sendTimingReq(first_packet)) {
586 DPRINTF(GPUTLB,
587 "Failed to send TLB request for page %#x",
588 virt_page_addr);
589
590 // No need for a retries queue since we are already
591 // buffering the coalesced request in coalescerFIFO.
592 // Arka:: No point trying to send other requests to TLB at
593 // this point since it is busy. Retries will be called later
594 // by the TLB below
595 return;
596 } else {
597
598 if (tlb_level == 1)
600
601 GpuTranslationState *tmp_sender_state =
603
604 bool update_stats = !tmp_sender_state->isPrefetch;
605
606 if (update_stats) {
607 // req_cnt is total number of packets represented
608 // by the one we just sent counting all the way from
609 // the top of TLB hiearchy (i.e., from the CU)
610 int req_cnt = tmp_sender_state->reqCnt.back();
611 queuingCycles += (curCycle() * req_cnt);
612
613 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
614 name(), req_cnt);
615
616 // pkt_cnt is number of packets we coalesced into the one
617 // we just sent but only at this coalescer level
618 int pkt_cnt = iter->second[vector_index].first.size();
619 localqueuingCycles += (curCycle() * pkt_cnt);
620 }
621
622 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x\n",
623 virt_page_addr);
624
625 //copy coalescedReq to issuedTranslationsTable
626 issuedTranslationsTable[virt_page_addr]
627 = iter->second[vector_index].first;
628
629 //erase the entry of this coalesced req
630 iter->second.erase(iter->second.begin() + vector_index);
631
632 if (iter->second.empty())
633 assert( i == coalescedReq_cnt );
634
635 sent_probes++;
636
637 if (sent_probes == TLBProbesPerCycle ||
638 ((tlb_level == 1) && (!availDownstreamSlots()))) {
639 //Before returning make sure that empty vectors are taken
640 // out. Not a big issue though since a later invocation
641 // will take it out anyway.
642 if (iter->second.empty())
643 coalescerFIFO.erase(iter);
644
645 //schedule probeTLBEvent next cycle to send the
646 //coalesced requests to the TLB
647 if (!probeTLBEvent.scheduled()) {
650 }
651 return;
652 }
653 }
654 }
655
656 //if there are no more coalesced reqs for this tick_index
657 //erase the hash_map with the first iterator
658 if (iter->second.empty()) {
659 coalescerFIFO.erase(iter++);
660 } else {
661 ++iter;
662 }
663 }
664}
665
666void
668{
669 while (!cleanupQueue.empty()) {
670 Addr cleanup_addr = cleanupQueue.front();
671 cleanupQueue.pop();
672 issuedTranslationsTable.erase(cleanup_addr);
673
674 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
675 cleanup_addr);
676 }
677}
678
679void
681{
683
685 .name(name() + ".uncoalesced_accesses")
686 .desc("Number of uncoalesced TLB accesses")
687 ;
688
690 .name(name() + ".coalesced_accesses")
691 .desc("Number of coalesced TLB accesses")
692 ;
693
695 .name(name() + ".queuing_cycles")
696 .desc("Number of cycles spent in queue")
697 ;
698
700 .name(name() + ".local_queuing_cycles")
701 .desc("Number of cycles spent in queue for all incoming reqs")
702 ;
703
705 .name(name() + ".local_cycles")
706 .desc("Number of cycles spent in queue for all incoming reqs")
707 ;
708
710 .name(name() + ".local_latency")
711 .desc("Avg. latency over all incoming pkts")
712 ;
713
714 latency
715 .name(name() + ".latency")
716 .desc("Avg. latency over all incoming pkts")
717 ;
718
721}
722
723void
725{
726 assert(tlb_level == 1);
727 if (stalledPortsMap.count(port) != 0)
728 return; // we already know this port is stalled
729
730 stalledPortsMap[port] = port;
731 stalledPortsQueue.push(port);
732 DPRINTF(GPUTLB,
733 "insertStalledPortIfNotMapped: port %p, mapSz = %d, qsz = %d\n",
734 port, stalledPortsMap.size(), stalledPortsQueue.size());
735}
736
737bool
739{
740 assert(tlb_level == 1);
741
742 DPRINTF(GPUTLB, "mustStallCUPort: downstream = %d, max = %d\n",
744
746 warn("RED ALERT - VegaTLBCoalescer::mustStallCUPort\n");
748 return true;
749 }
750 else
751 return false;
752}
753
754void
756{
757 assert(tlb_level == 1);
758 if (!stalledPorts() || availDownstreamSlots() == 0)
759 return;
760
761 DPRINTF(GPUTLB, "unstallPorts()\n");
762 /*
763 * this check is needed because we can be called from recvTiiningResponse()
764 * or, synchronously due to having called sendRetry, from recvTimingReq()
765 */
766 if (availDownstreamSlots() == 0) // can happen if retry sent 1 downstream
767 return;
768 /*
769 * Consider this scenario
770 * 1) max downstream is reached
771 * 2) port1 tries to send a req, cant => stalledPortsQueue = [port1]
772 * 3) port2 tries to send a req, cant => stalledPortsQueue = [port1,
773 * port2]
774 * 4) a request completes and we remove port1 from both data
775 * structures & call
776 * sendRetry => stalledPortsQueue = [port2]
777 * 5) port1 sends one req downstream and a second is rejected
778 * => stalledPortsQueue = [port2, port1]
779 *
780 * so we round robin and each stalled port can send 1 req on retry
781 */
782 assert(availDownstreamSlots() == 1);
783 auto port = stalledPortsQueue.front();
784 DPRINTF(GPUTLB, "sending retry for port = %p(%s)\n", port, port->name());
785 stalledPortsQueue.pop();
786 auto iter = stalledPortsMap.find(port);
787 assert(iter != stalledPortsMap.end());
788 stalledPortsMap.erase(iter);
789 port->sendRetryReq(); // cu will synchronously call recvTimingReq
790}
791
792} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
ClockedObject(const ClockedObjectParams &p)
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Tick clockPeriod() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
virtual std::string name() const
Definition named.hh:60
void makeTimingResponse()
Definition packet.hh:1080
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
bool isRequest() const
Definition packet.hh:597
Ports are used to interface objects to each other.
Definition port.hh:62
@ UNCACHEABLE
The request is to an uncacheable address.
Definition request.hh:125
A ResponsePort is a specialization of a port.
Definition port.hh:349
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition port.hh:454
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the peer.
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
virtual void recvFunctional(PacketPtr pkt)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
statistics::Scalar localqueuingCycles
statistics::Scalar coalescedAccesses
CoalescingTable issuedTranslationsTable
void updatePhysAddresses(PacketPtr pkt)
statistics::Scalar queuingCycles
EventFunctionWrapper cleanupEvent
The cleanupEvent is scheduled after a TLBEvent triggers in order to free memory and do the required c...
std::queue< CpuSidePort * > stalledPortsQueue
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2, Addr pagebytes)
std::set< Addr > potentialPagesize
statistics::Scalar uncoalescedAccesses
void reissue_pkt_helper(PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< CpuSidePort * > cpuSidePort
std::vector< MemSidePort * > memSidePort
EventFunctionWrapper probeTLBEvent
This event issues the TLB probes.
VegaTLBCoalescer(const VegaTLBCoalescerParams &p)
void insertStalledPortIfNotMapped(CpuSidePort *)
CoalescingFIFO coalescerFIFO
std::map< CpuSidePort *, CpuSidePort * > stalledPortsMap
void regStats() override
Callback to set stat parameters.
statistics::Formula localLatency
unsigned int availDownstreamSlots()
std::queue< Addr > cleanupQueue
bool mustStallCUPort(CpuSidePort *)
statistics::Scalar localCycles
statistics::Formula latency
STL vector class.
Definition stl.hh:37
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:279
static const Priority Maximum_Pri
Maximum priority.
Definition eventq.hh:244
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
virtual void regStats()
Callback to set stat parameters.
Definition group.cc:68
#define warn(...)
Definition logging.hh:288
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 0 > p
const Addr PageBytes
Definition page_size.hh:42
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
T safe_cast(U &&ref_or_ptr)
Definition cast.hh:74
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
uint64_t Tick
Tick count type.
Definition types.hh:58
Packet * PacketPtr
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
std::vector< ResponsePort * > ports
const std::string & name()
Definition trace.cc:48

Generated on Mon Oct 27 2025 04:12:54 for gem5 by doxygen 1.14.0