gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
tlb.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <cmath>
35#include <cstring>
36
40#include "debug/GPUPrefetch.hh"
41#include "debug/GPUTLB.hh"
43
44namespace gem5
45{
46namespace VegaISA
47{
48
49// we have no limit for the number of translations we send
50// downstream as we depend on the limit of the coalescer
51// above us
52GpuTLB::GpuTLB(const VegaGPUTLBParams &p)
54 gpuDevice(p.gpu_device), size(p.size), stats(this),
55 cleanupEvent([this]{ cleanup(); }, name(), false,
57{
58 assoc = p.assoc;
59 assert(assoc <= size);
60 numSets = size/assoc;
61 allocationPolicy = p.allocationPolicy;
62 hasMemSidePort = false;
63
64 tlb.assign(size, VegaTlbEntry());
65
66 freeList.resize(numSets);
67 entryList.resize(numSets);
68
69 for (int set = 0; set < numSets; ++set) {
70 for (int way = 0; way < assoc; ++way) {
71 int x = set * assoc + way;
72 freeList[set].push_back(&tlb.at(x));
73 }
74 }
75
76 FA = (size == assoc);
77 setMask = numSets - 1;
78
79 maxCoalescedReqs = p.maxOutstandingReqs;
80
81
82 outstandingReqs = 0;
83 hitLatency = p.hitLatency;
84 missLatency1 = p.missLatency1;
85 missLatency2 = p.missLatency2;
86
87 // create the response ports based on the number of connected ports
88 for (size_t i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
89 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
90 name(), i), this, i));
91 }
92
93 // create the requestor ports based on the number of connected ports
94 for (size_t i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
95 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
96 name(), i), this, i));
97 }
98
99 // assuming one walker per TLB, set our walker's TLB to this TLB.
100 walker->setTLB(this);
101
102 // gpuDevice should be non-null in full system only and is set by GpuTLB
103 // params from the config file.
104 if (gpuDevice) {
105 gpuDevice->getVM().registerTLB(this);
106 }
107}
108
110{
111}
112
113Port &
114GpuTLB::getPort(const std::string &if_name, PortID idx)
115{
116 if (if_name == "cpu_side_ports") {
117 if (idx >= static_cast<PortID>(cpuSidePort.size())) {
118 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
119 }
120
121 return *cpuSidePort[idx];
122 } else if (if_name == "mem_side_ports") {
123 if (idx >= static_cast<PortID>(memSidePort.size())) {
124 panic("TLBCoalescer::getPort: unknown index %d\n", idx);
125 }
126
127 hasMemSidePort = true;
128
129 return *memSidePort[idx];
130 } else {
131 panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
132 }
133}
134
135Fault
137{
138 DPRINTF(GPUTLB, "GPUTLB: Raising page fault.\n");
139 ExceptionCode code;
140 if (mode == BaseMMU::Read)
142 else if (mode == BaseMMU::Write)
144 else
146 return std::make_shared<PageFault>(vaddr, code, true, mode, true);
147}
148
149Addr
151{
152 Addr pageMask = mask(VegaISA::PageShift);
153 return (vaddr & ~pageMask);
154}
155
156int
157GpuTLB::getSet(Addr va, unsigned int page_shift)
158{
159 return (va >> page_shift) & setMask;
160}
161
162VegaTlbEntry*
163GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
164{
165 VegaTlbEntry *newEntry = nullptr;
166
167 int set = getSet(entry.vaddr, entry.logBytes);
168
169 if (!freeList[set].empty()) {
170 newEntry = freeList[set].front();
171 freeList[set].pop_front();
172 } else {
173 newEntry = entryList[set].back();
174 entryList[set].pop_back();
175 }
176
177 *newEntry = entry;
178 entryList[set].push_front(newEntry);
179
180 DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
181 newEntry->vaddr, newEntry->paddr, entry.size(), set);
182
183 return newEntry;
184}
185
186GpuTLB::EntryList::iterator
187GpuTLB::lookupIt(Addr va, unsigned int ps, bool update_lru)
188{
189 int set = getSet(va, ps);
190
191 if (FA) {
192 assert(!set);
193 }
194
195 auto entry = entryList[set].begin();
196 for (; entry != entryList[set].end(); ++entry) {
197 int page_size = (*entry)->size();
198
199 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va &&
200 ps == (*entry)->logBytes) {
201 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
202 "with size %#x.\n", va, (*entry)->vaddr, page_size);
203
204 if (update_lru) {
205 entryList[set].push_front(*entry);
206 entryList[set].erase(entry);
207 entry = entryList[set].begin();
208 }
209
210 break;
211 }
212 }
213
214 return entry;
215}
216
217VegaTlbEntry*
218GpuTLB::lookup(Addr va, bool update_lru)
219{
220 for (auto ps : logPageShiftList) {
221 int set = getSet(va, ps);
222
223 auto entry = lookupIt(va, ps, update_lru);
224
225 if (entry == entryList[set].end())
226 continue;
227 else
228 return *entry;
229 }
230 return nullptr;
231}
232
233void
235{
236 DPRINTF(GPUTLB, "Invalidating all entries.\n");
237
238 for (int i = 0; i < numSets; ++i) {
239 while (!entryList[i].empty()) {
240 VegaTlbEntry *entry = entryList[i].front();
241 entryList[i].pop_front();
242 freeList[i].push_back(entry);
243 }
244 }
245}
246
247void
249{
250 DPRINTF(GPUTLB, "Demapping vaddr %#x.\n", va);
251 for (auto ps : logPageShiftList) {
252 int set = getSet(va, ps);
253 auto entry = lookupIt(va, ps, false);
254
255 if (entry != entryList[set].end()) {
256 freeList[set].push_back(*entry);
257 entryList[set].erase(entry);
258 }
259 }
260}
261
262
263
271VegaTlbEntry *
272GpuTLB::tlbLookup(const RequestPtr &req, bool update_stats)
273{
274 if (req->hasNoAddr()) {
275 return NULL;
276 }
277 Addr vaddr = req->getVaddr();
278 Addr alignedVaddr = pageAlign(vaddr);
279 DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
280
281 //update LRU stack on a hit
282 VegaTlbEntry *entry = lookup(alignedVaddr, true);
283
284 if (!update_stats) {
285 // functional tlb access for memory initialization
286 // i.e., memory seeding or instr. seeding -> don't update
287 // TLB and stats
288 return entry;
289 }
290
291 stats.localNumTLBAccesses++;
292
293 if (!entry) {
294 stats.localNumTLBMisses++;
295 } else {
296 stats.localNumTLBHits++;
297 }
298
299 return entry;
300}
301
302Walker*
304{
305 return walker;
306}
307
308
309void
313
314void
318
323
324void
326{
327 assert(pkt);
328 assert(pkt->senderState);
329
336 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
338
339 GpuTranslationState *sender_state =
341
342 bool update_stats = !sender_state->isPrefetch;
343
344 DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
345 virt_page_addr);
346
347 int req_cnt = sender_state->reqCnt.back();
348
349 if (update_stats) {
350 stats.accessCycles -= (curCycle() * req_cnt);
351 stats.localCycles -= curCycle();
352 stats.globalNumTLBAccesses += req_cnt;
353 }
354
355 tlbOutcome lookup_outcome = TLB_MISS;
356 const RequestPtr &tmp_req = pkt->req;
357
358 // Access the TLB and figure out if it's a hit or a miss.
359 auto entry = tlbLookup(tmp_req, update_stats);
360 if (entry || pkt->req->hasNoAddr()) {
361 // Put the entry in SenderState
362 lookup_outcome = TLB_HIT;
363 if (pkt->req->hasNoAddr()) {
364 sender_state->tlbEntry =
365 new VegaTlbEntry(1 /* VMID */, 0, 0, 0, 0);
366 // set false because we shouldn't go to
367 // host memory for a memtime request
368 pkt->req->setSystemReq(false);
369 } else {
370 VegaTlbEntry *entry = lookup(virt_page_addr, false);
371 assert(entry);
372
373 // Set if this is a system request
374 pkt->req->setSystemReq(entry->pte.s);
375
376 sender_state->tlbEntry =
377 new VegaTlbEntry(*entry);
378 }
379
380 if (update_stats) {
381 // the reqCnt has an entry per level, so its size tells us
382 // which level we are in
383 sender_state->hitLevel = sender_state->reqCnt.size();
384 stats.globalNumTLBHits += req_cnt;
385 }
386 } else {
387 if (update_stats)
388 stats.globalNumTLBMisses += req_cnt;
389 }
390
391 /*
392 * We now know the TLB lookup outcome (if it's a hit or a miss), as
393 * well as the TLB access latency.
394 *
395 * We create and schedule a new TLBEvent which will help us take the
396 * appropriate actions (e.g., update TLB on a hit, send request to
397 * lower level TLB on a miss, or start a page walk if this was the
398 * last-level TLB)
399 */
400 TLBEvent *tlb_event =
401 new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
402
403 if (translationReturnEvent.count(virt_page_addr)) {
404 panic("Virtual Page Address %#x already has a return event\n",
405 virt_page_addr);
406 }
407
408 translationReturnEvent[virt_page_addr] = tlb_event;
409 assert(tlb_event);
410
411 DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
413
415}
416
418 tlbOutcome tlb_outcome, PacketPtr _pkt)
419 : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
420 outcome(tlb_outcome), pkt(_pkt)
421{
422}
423
428void
429GpuTLB::pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry * tlb_entry,
430 Mode mode)
431{
432 // Do paging protection checks.
433 bool badWrite = (!tlb_entry->writable());
434
435 if (mode == BaseMMU::Write && badWrite) {
436 // The page must have been present to get into the TLB in
437 // the first place. We'll assume the reserved bits are
438 // fine even though we're not checking them.
439 fatal("Page fault on addr %lx PTE=%#lx", pkt->req->getVaddr(),
440 (uint64_t)tlb_entry->pte);
441 }
442}
443
444void
445GpuTLB::walkerResponse(VegaTlbEntry& entry, PacketPtr pkt)
446{
447 DPRINTF(GPUTLB, "WalkerResponse for %#lx. Entry: (%#lx, %#lx, %#lx)\n",
448 pkt->req->getVaddr(), entry.vaddr, entry.paddr, entry.size());
449
450 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
452
453 Addr page_addr = entry.pte.ppn << VegaISA::PageShift;
454 Addr paddr = page_addr + (entry.vaddr & mask(entry.logBytes));
455 pkt->req->setPaddr(paddr);
456 pkt->req->setSystemReq(entry.pte.s);
457
458 GpuTranslationState *sender_state =
460 sender_state->tlbEntry = new VegaTlbEntry(entry);
461
462 handleTranslationReturn(virt_page_addr, TLB_MISS, pkt);
463}
464
470void
472 tlbOutcome tlb_outcome, PacketPtr pkt)
473{
474 assert(pkt);
475 Addr vaddr = pkt->req->getVaddr();
476
477 GpuTranslationState *sender_state =
479
480 Mode mode = sender_state->tlbMode;
481
482 VegaTlbEntry *local_entry, *new_entry;
483
484 int req_cnt = sender_state->reqCnt.back();
485 bool update_stats = !sender_state->isPrefetch;
486
487 if (update_stats) {
488 stats.accessCycles += (req_cnt * curCycle());
489 stats.localCycles += curCycle();
490 }
491
492 if (tlb_outcome == TLB_HIT) {
493 DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
494 vaddr);
495 local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
496 } else {
497 DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
498 vaddr);
499
505 new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
506 assert(new_entry);
507 local_entry = new_entry;
508
509 if (allocationPolicy) {
510 assert(new_entry->pte);
511 DPRINTF(GPUTLB, "allocating entry w/ addr %#lx of size %#lx\n",
512 virt_page_addr, new_entry->size());
513
514 local_entry = insert(virt_page_addr, *new_entry);
515 }
516
517 assert(local_entry);
518 }
519
525 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
526 "while paddr was %#x.\n", local_entry->vaddr,
527 local_entry->paddr);
528
529 pagingProtectionChecks(pkt, local_entry, mode);
530 int page_size = local_entry->size();
531 Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
532 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
533
534 // Since this packet will be sent through the cpu side port, it must be
535 // converted to a response pkt if it is not one already
536 if (pkt->isRequest()) {
537 pkt->makeTimingResponse();
538 }
539
540 pkt->req->setPaddr(paddr);
541
542 if (local_entry->uncacheable()) {
543 pkt->req->setFlags(Request::UNCACHEABLE);
544 }
545
546 //send packet back to coalescer
547 cpuSidePort[0]->sendTimingResp(pkt);
548 //schedule cleanup event
549 cleanupQueue.push(virt_page_addr);
550
551 DPRINTF(GPUTLB, "Scheduled %#lx for cleanup\n", virt_page_addr);
552
553 // schedule this only once per cycle.
554 // The check is required because we might have multiple translations
555 // returning the same cycle
556 // this is a maximum priority event and must be on the same cycle
557 // as the cleanup event in TLBCoalescer to avoid a race with
558 // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
559 if (!cleanupEvent.scheduled())
561}
562
567void
569 PacketPtr pkt)
570{
571 DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
572
573 assert(translationReturnEvent[virtPageAddr]);
574 assert(pkt);
575
576 GpuTranslationState *tmp_sender_state =
578
579 int req_cnt = tmp_sender_state->reqCnt.back();
580 bool update_stats = !tmp_sender_state->isPrefetch;
581
582
583 if (outcome == TLB_HIT) {
584 handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
585
586 } else if (outcome == TLB_MISS) {
587
588 DPRINTF(GPUTLB, "This is a TLB miss\n");
589 if (hasMemSidePort) {
590 // the one cyle added here represent the delay from when we get
591 // the reply back till when we propagate it to the coalescer
592 // above.
593
599 tmp_sender_state->deviceId = 1;
600 tmp_sender_state->pasId = 0;
601
602 if (!memSidePort[0]->sendTimingReq(pkt)) {
603 DPRINTF(GPUTLB, "Failed sending translation request to "
604 "lower level TLB for addr %#x\n", virtPageAddr);
605
606 memSidePort[0]->retries.push_back(pkt);
607 } else {
608 DPRINTF(GPUTLB, "Sent translation request to lower level "
609 "TLB for addr %#x\n", virtPageAddr);
610 }
611 } else {
612 //this is the last level TLB. Start a page walk
613 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
614 "addr %#x\n", virtPageAddr);
615
616 if (update_stats)
617 stats.pageTableCycles -= (req_cnt*curCycle());
618
619 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
620 assert(tlb_event);
621 tlb_event->updateOutcome(PAGE_WALK);
622 schedule(tlb_event,
624 }
625 } else if (outcome == PAGE_WALK) {
626 if (update_stats)
627 stats.pageTableCycles += (req_cnt*curCycle());
628
629 // Need to access the page table and update the TLB
630 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
631 virtPageAddr);
632
633 Addr base = gpuDevice->getVM().getPageTableBase(1);
634 Addr vaddr = pkt->req->getVaddr();
635 walker->setDevRequestor(gpuDevice->vramRequestorId());
636
637 // Do page table walk
638 walker->startTiming(pkt, base, vaddr, BaseMMU::Mode::Read);
639 } else if (outcome == MISS_RETURN) {
643 handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
644 } else {
645 panic("Unexpected TLB outcome %d", outcome);
646 }
647}
648
649void
651{
652 tlb->translationReturn(virtPageAddr, outcome, pkt);
653}
654
655const char*
657{
658 return "trigger translationDoneEvent";
659}
660
661void
663{
664 outcome = _outcome;
665}
666
667Addr
672
679bool
681{
682 bool ret = false;
683 [[maybe_unused]] Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
685
686 if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
687 assert(!tlb->translationReturnEvent.count(virt_page_addr));
688 tlb->issueTLBLookup(pkt);
689 // update number of outstanding translation requests
690 tlb->outstandingReqs++;
691 ret = true;
692 } else {
693 DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
694 tlb->outstandingReqs);
695 tlb->stats.maxDownstreamReached++;
696 ret = false;
697
698 }
699
700 if (tlb->outstandingReqs > tlb->stats.outstandingReqsMax.value())
701 tlb->stats.outstandingReqsMax = tlb->outstandingReqs;
702
703 return ret;
704}
705
714void
716{
717 GpuTranslationState *sender_state =
719
720 Mode mode = sender_state->tlbMode;
721 Addr vaddr = pkt->req->getVaddr();
722
723 VegaTlbEntry *local_entry, *new_entry;
724
725 if (tlb_outcome == TLB_HIT) {
726 DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
727 "%#x\n", vaddr);
728
729 local_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
730 } else {
731 DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
732 "%#x\n", vaddr);
733
739 new_entry = safe_cast<VegaTlbEntry *>(sender_state->tlbEntry);
740 assert(new_entry);
741 local_entry = new_entry;
742
743 if (allocationPolicy) {
744 Addr virt_page_addr = roundDown(vaddr, VegaISA::PageBytes);
745
746 DPRINTF(GPUTLB, "allocating entry w/ addr %#lx\n",
747 virt_page_addr);
748
749 local_entry = insert(virt_page_addr, *new_entry);
750 }
751
752 assert(local_entry);
753 }
754
755 DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
756 "while paddr was %#x.\n", local_entry->vaddr,
757 local_entry->paddr);
758
770 if (!sender_state->isPrefetch && sender_state->tlbEntry)
771 pagingProtectionChecks(pkt, local_entry, mode);
772
773 int page_size = local_entry->size();
774 Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
775 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
776
777 pkt->req->setPaddr(paddr);
778
779 if (local_entry->uncacheable())
780 pkt->req->setFlags(Request::UNCACHEABLE);
781}
782
783// This is used for atomic translations. Need to
784// make it all happen during the same cycle.
785void
787{
788 GpuTranslationState *sender_state =
790
791 bool update_stats = !sender_state->isPrefetch;
792
793 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
795
796 // do the TLB lookup without updating the stats
797 bool success = tlb->tlbLookup(pkt->req, update_stats);
798 tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
799
800 // functional mode means no coalescing
801 // global metrics are the same as the local metrics
802 if (update_stats) {
803 tlb->stats.globalNumTLBAccesses++;
804
805 if (success) {
806 sender_state->hitLevel = sender_state->reqCnt.size();
807 tlb->stats.globalNumTLBHits++;
808 } else {
809 tlb->stats.globalNumTLBMisses++;
810 }
811 }
812
813 if (!success) {
814 if (tlb->hasMemSidePort) {
815 // there is a TLB below -> propagate down the TLB hierarchy
816 tlb->memSidePort[0]->sendFunctional(pkt);
817 // If no valid translation from a prefetch, then just return
818 if (sender_state->isPrefetch && !pkt->req->hasPaddr())
819 return;
820 } else {
821 // Need to access the page table and update the TLB
822 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
823 virt_page_addr);
824
825 Addr vaddr = pkt->req->getVaddr();
826 [[maybe_unused]] Addr alignedVaddr =
827 tlb->pageAlign(virt_page_addr);
828 assert(alignedVaddr == virt_page_addr);
829
830 unsigned logBytes;
831 PageTableEntry pte;
832
833 // Initialize walker state for VMID
834 Addr base = tlb->gpuDevice->getVM().getPageTableBase(1);
835 tlb->walker->setDevRequestor(tlb->gpuDevice->vramRequestorId());
836
837 // Do page table walk
838 Fault fault = tlb->walker->startFunctional(base, vaddr, pte,
839 logBytes,
841 if (fault != NoFault) {
842 fatal("Translation fault in TLB at %d!", __LINE__);
843 }
844
845 // PPN is already shifted by fragment so we only shift by native
846 // page size. Fragment is still used via logBytes to select lower
847 // bits from vaddr.
848 Addr page_addr = pte.ppn << PageShift;
849 Addr paddr = page_addr + (vaddr & mask(logBytes));
850 Addr alignedPaddr = tlb->pageAlign(paddr);
851 pkt->req->setPaddr(paddr);
852 pkt->req->setSystemReq(pte.s);
853
854 if (!sender_state->isPrefetch) {
855 assert(paddr);
856
857 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
858
859 sender_state->tlbEntry =
860 new VegaTlbEntry(1 /* VMID */, vaddr & (~mask(logBytes)),
861 alignedPaddr, logBytes, pte);
862 } else {
863 // If this was a prefetch, then do the normal thing if it
864 // was a successful translation. Otherwise, send an empty
865 // TLB entry back so that it can be figured out as empty
866 // and handled accordingly.
867 if (paddr) {
868 DPRINTF(GPUTLB, "Mapping %#x to %#x\n", vaddr, paddr);
869
870 sender_state->tlbEntry =
871 new VegaTlbEntry(1 /* VMID */,
872 vaddr & (~mask(logBytes)),
873 alignedPaddr, logBytes, pte);
874 } else {
875 DPRINTF(GPUPrefetch, "Prefetch failed %#x\n", vaddr);
876
877 sender_state->tlbEntry = nullptr;
878
879 return;
880 }
881 }
882 }
883 } else {
884 VegaTlbEntry *entry = tlb->lookup(virt_page_addr, update_stats);
885 assert(entry);
886
887 if (sender_state->isPrefetch) {
888 DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
889 entry->vaddr);
890 }
891
892 sender_state->tlbEntry = new VegaTlbEntry(*entry);
893 }
894
895 // This is the function that would populate pkt->req with the paddr of
896 // the translation. But if no translation happens (i.e Prefetch fails)
897 // then the early returns in the above code wiill keep this function
898 // from executing.
899 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
900}
901
902void
904{
905 // The CPUSidePort never sends anything but replies. No retries
906 // expected.
907 panic("recvReqRetry called");
908}
909
912{
913 // currently not checked by the requestor
914 AddrRangeList ranges;
915
916 return ranges;
917}
918
924bool
926{
927 Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
929
930 DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
931 virt_page_addr);
932
933 TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
934 assert(tlb_event);
935 assert(virt_page_addr == tlb_event->getTLBEventVaddr());
936
937 tlb_event->updateOutcome(MISS_RETURN);
938 tlb->schedule(tlb_event, curTick()+tlb->clockPeriod());
939
940 return true;
941}
942
943void
945{
946 // No retries should reach the TLB. The retries
947 // should only reach the TLBCoalescer.
948 panic("recvReqRetry called");
949}
950
951void
953{
954 while (!cleanupQueue.empty()) {
955 Addr cleanup_addr = cleanupQueue.front();
956 cleanupQueue.pop();
957
958 DPRINTF(GPUTLB, "Deleting return event for %#lx\n", cleanup_addr);
959
960 // delete TLBEvent
961 TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
962 delete old_tlb_event;
963 translationReturnEvent.erase(cleanup_addr);
964
965 // update number of outstanding requests
967 }
968
972 for (int i = 0; i < cpuSidePort.size(); ++i) {
973 cpuSidePort[i]->sendRetryReq();
974 }
975}
976
978 : statistics::Group(parent),
979 ADD_STAT(maxDownstreamReached, "Number of refused translation requests"),
980 ADD_STAT(outstandingReqsMax, "Maximum count in coalesced request queue"),
981 ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
982 ADD_STAT(localNumTLBHits, "Number of TLB hits"),
983 ADD_STAT(localNumTLBMisses, "Number of TLB misses"),
984 ADD_STAT(localTLBMissRate, "TLB miss rate"),
985 ADD_STAT(globalNumTLBAccesses, "Number of TLB accesses"),
986 ADD_STAT(globalNumTLBHits, "Number of TLB hits"),
987 ADD_STAT(globalNumTLBMisses, "Number of TLB misses"),
988 ADD_STAT(globalTLBMissRate, "TLB miss rate"),
989 ADD_STAT(accessCycles, "Cycles spent accessing this TLB level"),
990 ADD_STAT(pageTableCycles, "Cycles spent accessing the page table"),
991 ADD_STAT(localCycles, "Number of cycles spent in queue for all "
992 "incoming reqs"),
993 ADD_STAT(localLatency, "Avg. latency over incoming coalesced reqs")
994{
997
999}
1000
1001} // namespace VegaISA
1002} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
ClockedObject(const ClockedObjectParams &p)
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
Event(Priority p=Default_Pri, Flags f=0)
Definition eventq.hh:407
void makeTimingResponse()
Definition packet.hh:1080
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
bool isRequest() const
Definition packet.hh:597
Ports are used to interface objects to each other.
Definition port.hh:62
@ UNCACHEABLE
The request is to an uncacheable address.
Definition request.hh:125
virtual bool recvTimingReq(PacketPtr pkt)
recvTiming receives a coalesced timing request from a TLBCoalescer and it calls issueTLBLookup() It o...
Definition tlb.cc:680
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition tlb.cc:911
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the peer.
Definition tlb.cc:786
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition tlb.cc:925
virtual void recvReqRetry()
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
Definition tlb.cc:944
const char * description() const
Return a C string describing the event.
Definition tlb.cc:656
void updateOutcome(tlbOutcome _outcome)
Definition tlb.cc:662
tlbOutcome outcome
outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
Definition tlb.hh:302
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition tlb.cc:417
VegaTlbEntry * lookup(Addr va, bool update_lru=true)
Definition tlb.cc:218
void walkerResponse(VegaTlbEntry &entry, PacketPtr pkt)
Definition tlb.cc:445
std::vector< CpuSidePort * > cpuSidePort
Definition tlb.hh:268
virtual void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition tlb.cc:310
virtual void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition tlb.cc:315
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access latency> cycles l...
Definition tlb.cc:325
VegaTlbEntry * insert(Addr vpn, VegaTlbEntry &entry)
Definition tlb.cc:163
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e....
Definition tlb.cc:568
void pagingProtectionChecks(PacketPtr pkt, VegaTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition tlb.cc:429
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition tlb.cc:715
enum BaseMMU::Mode Mode
Definition tlb.hh:68
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition tlb.hh:317
std::vector< MemSidePort * > memSidePort
Definition tlb.hh:270
Walker * getWalker()
Definition tlb.cc:303
std::vector< EntryList > freeList
Definition tlb.hh:147
void invalidateAll()
Definition tlb.cc:234
Walker * walker
Definition tlb.hh:110
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition tlb.cc:114
int getSet(Addr va, unsigned int page_shift)
Definition tlb.cc:157
AMDGPUDevice * gpuDevice
Definition tlb.hh:111
Addr pageAlign(Addr vaddr)
Definition tlb.cc:150
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns.
Definition tlb.cc:471
std::queue< Addr > cleanupQueue
Definition tlb.hh:321
bool FA
true if this is a fully-associative TLB
Definition tlb.hh:126
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition tlb.hh:133
EventFunctionWrapper cleanupEvent
Definition tlb.hh:327
gem5::VegaISA::GpuTLB::VegaTLBStats stats
VegaTlbEntry * tlbLookup(const RequestPtr &req, bool update_stats)
TLB_lookup will only perform a TLB lookup returning the TLB entry on a TLB hit and nullptr on a TLB m...
Definition tlb.cc:272
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions.
Definition tlb.hh:156
void demapPage(Addr va, uint64_t asn)
Definition tlb.cc:248
GpuTLB(const VegaGPUTLBParams &p)
Definition tlb.cc:52
const std::array< unsigned int, 2 > logPageShiftList
Definition tlb.hh:117
Fault createPagefault(Addr vaddr, Mode mode)
Definition tlb.cc:136
bool hasMemSidePort
if true, then this is not the last level TLB
Definition tlb.hh:138
EntryList::iterator lookupIt(Addr va, unsigned int ps, bool update_lru=true)
Definition tlb.cc:187
Statistics container.
Definition group.hh:93
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:279
static const Priority Maximum_Pri
Maximum priority.
Definition eventq.hh:244
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition eventq.hh:207
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 18, 16 > ps
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 12, 11 > set
Bitfield< 8 > va
Bitfield< 59, 56 > tlb
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
Bitfield< 4 > x
Definition pagetable.hh:61
const Addr PageShift
Definition page_size.hh:41
const Addr PageBytes
Definition page_size.hh:42
Bitfield< 54 > p
Definition pagetable.hh:70
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
T safe_cast(U &&ref_or_ptr)
Definition cast.hh:74
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
Packet * PacketPtr
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161
constexpr decltype(nullptr) NoFault
Definition types.hh:253
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
statistics::Scalar outstandingReqsMax
Definition tlb.hh:169
statistics::Scalar localNumTLBMisses
Definition tlb.hh:175
statistics::Formula localLatency
Definition tlb.hh:192
statistics::Formula localTLBMissRate
Definition tlb.hh:176
statistics::Scalar localCycles
Definition tlb.hh:191
statistics::Scalar globalNumTLBAccesses
Definition tlb.hh:181
statistics::Scalar accessCycles
Definition tlb.hh:187
statistics::Scalar localNumTLBAccesses
Definition tlb.hh:173
VegaTLBStats(statistics::Group *parent)
Definition tlb.cc:977
statistics::Formula globalTLBMissRate
Definition tlb.hh:184
statistics::Scalar globalNumTLBMisses
Definition tlb.hh:183
statistics::Scalar localNumTLBHits
Definition tlb.hh:174
statistics::Scalar pageTableCycles
Definition tlb.hh:188
statistics::Scalar globalNumTLBHits
Definition tlb.hh:182
statistics::Scalar maxDownstreamReached
Definition tlb.hh:168
const std::string & name()
Definition trace.cc:48

Generated on Mon Oct 27 2025 04:12:58 for gem5 by doxygen 1.14.0