gem5 v24.0.0.0
Loading...
Searching...
No Matches
fetch.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * Copyright (c) 2012-2013 AMD
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/fetch.hh"
43
44#include <algorithm>
45#include <cstring>
46#include <list>
47#include <map>
48#include <queue>
49
50#include "arch/generic/tlb.hh"
51#include "base/random.hh"
52#include "base/types.hh"
53#include "cpu/base.hh"
54#include "cpu/exetrace.hh"
56#include "cpu/o3/cpu.hh"
57#include "cpu/o3/dyn_inst.hh"
58#include "cpu/o3/limits.hh"
59#include "debug/Activity.hh"
60#include "debug/Drain.hh"
61#include "debug/Fetch.hh"
62#include "debug/O3CPU.hh"
63#include "debug/O3PipeView.hh"
64#include "mem/packet.hh"
65#include "params/BaseO3CPU.hh"
66#include "sim/byteswap.hh"
67#include "sim/core.hh"
68#include "sim/eventq.hh"
69#include "sim/full_system.hh"
70#include "sim/system.hh"
71
72namespace gem5
73{
74
75namespace o3
76{
77
79 RequestPort(_cpu->name() + ".icache_port"), fetch(_fetch)
80{}
81
82
83Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
84 : fetchPolicy(params.smtFetchPolicy),
85 cpu(_cpu),
86 branchPred(nullptr),
91 fetchWidth(params.fetchWidth),
93 retryPkt(NULL),
95 cacheBlkSize(cpu->cacheLineSize()),
99 numThreads(params.numThreads),
100 numFetchingThreads(params.smtNumFetchingThreads),
101 icachePort(this, _cpu),
102 finishTranslationEvent(this), fetchStats(_cpu, this)
103{
105 fatal("numThreads (%d) is larger than compiled limit (%d),\n"
106 "\tincrease MaxThreads in src/cpu/o3/limits.hh\n",
107 numThreads, static_cast<int>(MaxThreads));
108 if (fetchWidth > MaxWidth)
109 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
110 "\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
111 fetchWidth, static_cast<int>(MaxWidth));
113 fatal("fetch buffer size (%u bytes) is greater than the cache "
114 "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
116 fatal("cache block (%u bytes) is not a multiple of the "
117 "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
118
119 for (int i = 0; i < MaxThreads; i++) {
120 fetchStatus[i] = Idle;
121 decoder[i] = nullptr;
122 pc[i].reset(params.isa[0]->newPCState());
123 fetchOffset[i] = 0;
124 macroop[i] = nullptr;
125 delayedCommit[i] = false;
126 memReq[i] = nullptr;
127 stalls[i] = {false, false};
128 fetchBuffer[i] = NULL;
129 fetchBufferPC[i] = 0;
130 fetchBufferValid[i] = false;
131 lastIcacheStall[i] = 0;
132 issuePipelinedIfetch[i] = false;
133 }
134
135 branchPred = params.branchPred;
136
137 for (ThreadID tid = 0; tid < numThreads; tid++) {
138 decoder[tid] = params.decoder[tid];
139 // Create space to buffer the cache line data,
140 // which may not hold the entire cache line.
141 fetchBuffer[tid] = new uint8_t[fetchBufferSize];
142 }
143
144 // Get the size of an instruction.
146}
147
148std::string Fetch::name() const { return cpu->name() + ".fetch"; }
149
150void
158
160 : statistics::Group(cpu, "fetch"),
161 ADD_STAT(predictedBranches, statistics::units::Count::get(),
162 "Number of branches that fetch has predicted taken"),
163 ADD_STAT(cycles, statistics::units::Cycle::get(),
164 "Number of cycles fetch has run and was not squashing or "
165 "blocked"),
166 ADD_STAT(squashCycles, statistics::units::Cycle::get(),
167 "Number of cycles fetch has spent squashing"),
168 ADD_STAT(tlbCycles, statistics::units::Cycle::get(),
169 "Number of cycles fetch has spent waiting for tlb"),
170 ADD_STAT(idleCycles, statistics::units::Cycle::get(),
171 "Number of cycles fetch was idle"),
172 ADD_STAT(blockedCycles, statistics::units::Cycle::get(),
173 "Number of cycles fetch has spent blocked"),
174 ADD_STAT(miscStallCycles, statistics::units::Cycle::get(),
175 "Number of cycles fetch has spent waiting on interrupts, or bad "
176 "addresses, or out of MSHRs"),
177 ADD_STAT(pendingDrainCycles, statistics::units::Cycle::get(),
178 "Number of cycles fetch has spent waiting on pipes to drain"),
179 ADD_STAT(noActiveThreadStallCycles, statistics::units::Cycle::get(),
180 "Number of stall cycles due to no active thread to fetch from"),
181 ADD_STAT(pendingTrapStallCycles, statistics::units::Cycle::get(),
182 "Number of stall cycles due to pending traps"),
183 ADD_STAT(pendingQuiesceStallCycles, statistics::units::Cycle::get(),
184 "Number of stall cycles due to pending quiesce instructions"),
185 ADD_STAT(icacheWaitRetryStallCycles, statistics::units::Cycle::get(),
186 "Number of stall cycles due to full MSHR"),
187 ADD_STAT(cacheLines, statistics::units::Count::get(),
188 "Number of cache lines fetched"),
189 ADD_STAT(icacheSquashes, statistics::units::Count::get(),
190 "Number of outstanding Icache misses that were squashed"),
191 ADD_STAT(tlbSquashes, statistics::units::Count::get(),
192 "Number of outstanding ITLB misses that were squashed"),
193 ADD_STAT(nisnDist, statistics::units::Count::get(),
194 "Number of instructions fetched each cycle (Total)"),
195 ADD_STAT(idleRate, statistics::units::Ratio::get(),
196 "Ratio of cycles fetch was idle",
197 idleCycles / cpu->baseStats.numCycles)
198{
201 cycles
202 .prereq(cycles);
230 .init(/* base value */ 0,
231 /* last value */ fetch->fetchWidth,
232 /* bucket size */ 1)
236}
237void
239{
240 timeBuffer = time_buffer;
241
242 // Create wires to get information from proper places in time buffer.
247}
248
249void
254
255void
257{
258 // Create wire to write information to proper place in fetch time buf.
259 toDecode = ftb_ptr->getWire(0);
260}
261
262void
264{
265 assert(priorityList.empty());
266 resetStage();
267
268 // Fetch needs to start fetching instructions at the very beginning,
269 // so it must start up in active state.
271}
272
273void
275{
276 fetchStatus[tid] = Running;
277 set(pc[tid], cpu->pcState(tid));
278 fetchOffset[tid] = 0;
279 macroop[tid] = NULL;
280 delayedCommit[tid] = false;
281 memReq[tid] = NULL;
282 stalls[tid].decode = false;
283 stalls[tid].drain = false;
284 fetchBufferPC[tid] = 0;
285 fetchBufferValid[tid] = false;
286 fetchQueue[tid].clear();
287
288 // TODO not sure what to do with priorityList for now
289 // priorityList.push_back(tid);
290}
291
292void
294{
295 numInst = 0;
296 interruptPending = false;
297 cacheBlocked = false;
298
299 priorityList.clear();
300
301 // Setup PC and nextPC with initial state.
302 for (ThreadID tid = 0; tid < numThreads; ++tid) {
303 fetchStatus[tid] = Running;
304 set(pc[tid], cpu->pcState(tid));
305 fetchOffset[tid] = 0;
306 macroop[tid] = NULL;
307
308 delayedCommit[tid] = false;
309 memReq[tid] = NULL;
310
311 stalls[tid].decode = false;
312 stalls[tid].drain = false;
313
314 fetchBufferPC[tid] = 0;
315 fetchBufferValid[tid] = false;
316
317 fetchQueue[tid].clear();
318
319 priorityList.push_back(tid);
320 }
321
322 wroteToTimeBuffer = false;
324}
325
326void
328{
329 ThreadID tid = cpu->contextToThread(pkt->req->contextId());
330
331 DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
332 assert(!cpu->switchedOut());
333
334 // Only change the status if it's still waiting on the icache access
335 // to return.
336 if (fetchStatus[tid] != IcacheWaitResponse ||
337 pkt->req != memReq[tid]) {
339 delete pkt;
340 return;
341 }
342
343 memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
344 fetchBufferValid[tid] = true;
345
346 // Wake up the CPU (if it went to sleep and was waiting on
347 // this completion event).
348 cpu->wakeCPU();
349
350 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
351 tid);
352
354
355 // Only switch to IcacheAccessComplete if we're not stalled as well.
356 if (checkStall(tid)) {
357 fetchStatus[tid] = Blocked;
358 } else {
360 }
361
362 pkt->req->setAccessLatency();
363 cpu->ppInstAccessComplete->notify(pkt);
364 // Reset the mem req to NULL.
365 delete pkt;
366 memReq[tid] = NULL;
367}
368
369void
371{
372 for (ThreadID i = 0; i < numThreads; ++i) {
373 stalls[i].decode = false;
374 stalls[i].drain = false;
375 }
376}
377
378void
380{
381 assert(isDrained());
382 assert(retryPkt == NULL);
383 assert(retryTid == InvalidThreadID);
384 assert(!cacheBlocked);
385 assert(!interruptPending);
386
387 for (ThreadID i = 0; i < numThreads; ++i) {
388 assert(!memReq[i]);
389 assert(fetchStatus[i] == Idle || stalls[i].drain);
390 }
391
393}
394
395bool
397{
398 /* Make sure that threads are either idle of that the commit stage
399 * has signaled that draining has completed by setting the drain
400 * stall flag. This effectively forces the pipeline to be disabled
401 * until the whole system is drained (simulation may continue to
402 * drain other components).
403 */
404 for (ThreadID i = 0; i < numThreads; ++i) {
405 // Verify fetch queues are drained
406 if (!fetchQueue[i].empty())
407 return false;
408
409 // Return false if not idle or drain stalled
410 if (fetchStatus[i] != Idle) {
411 if (fetchStatus[i] == Blocked && stalls[i].drain)
412 continue;
413 else
414 return false;
415 }
416 }
417
418 /* The pipeline might start up again in the middle of the drain
419 * cycle if the finish translation event is scheduled, so make
420 * sure that's not the case.
421 */
423}
424
425void
427{
428 assert(cpu->getInstPort().isConnected());
429 resetStage();
430
431}
432
433void
435{
436 assert(cpu->isDraining());
437 assert(!stalls[tid].drain);
438 DPRINTF(Drain, "%i: Thread drained.\n", tid);
439 stalls[tid].drain = true;
440}
441
442void
444{
445 DPRINTF(Fetch, "Waking up from quiesce\n");
446 // Hopefully this is safe
447 // @todo: Allow other threads to wake from quiesce.
448 fetchStatus[0] = Running;
449}
450
451void
453{
454 if (_status == Inactive) {
455 DPRINTF(Activity, "Activating stage.\n");
456
458
459 _status = Active;
460 }
461}
462
463void
465{
466 if (_status == Active) {
467 DPRINTF(Activity, "Deactivating stage.\n");
468
470
472 }
473}
474
475void
477{
478 // Update priority list
479 auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
480 if (thread_it != priorityList.end()) {
481 priorityList.erase(thread_it);
482 }
483}
484
485bool
487{
488 // Do branch prediction check here.
489 // A bit of a misnomer...next_PC is actually the current PC until
490 // this function updates it.
491 bool predict_taken;
492
493 if (!inst->isControl()) {
494 inst->staticInst->advancePC(next_pc);
495 inst->setPredTarg(next_pc);
496 inst->setPredTaken(false);
497 return false;
498 }
499
500 ThreadID tid = inst->threadNumber;
501 predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
502 next_pc, tid);
503
504 if (predict_taken) {
505 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
506 "predicted to be taken to %s\n",
507 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
508 } else {
509 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
510 "predicted to be not taken\n",
511 tid, inst->seqNum, inst->pcState().instAddr());
512 }
513
514 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
515 "predicted to go to %s\n",
516 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
517 inst->setPredTarg(next_pc);
518 inst->setPredTaken(predict_taken);
519
520 cpu->fetchStats[tid]->numBranches++;
521
522 if (predict_taken) {
524 }
525
526 return predict_taken;
527}
528
529bool
531{
532 Fault fault = NoFault;
533
534 assert(!cpu->switchedOut());
535
536 // @todo: not sure if these should block translation.
537 //AlphaDep
538 if (cacheBlocked) {
539 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
540 tid);
541 return false;
542 } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
543 // Hold off fetch from getting new instructions when:
544 // Cache is blocked, or
545 // while an interrupt is pending and we're not in PAL mode, or
546 // fetch is switched out.
547 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
548 tid);
549 return false;
550 }
551
552 // Align the fetch address to the start of a fetch buffer segment.
553 Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
554
555 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
556 tid, fetchBufferBlockPC, vaddr);
557
558 // Setup the memReq to do a read of the first instruction's address.
559 // Set the appropriate read size and flags as well.
560 // Build request here.
561 RequestPtr mem_req = std::make_shared<Request>(
562 fetchBufferBlockPC, fetchBufferSize,
564 cpu->thread[tid]->contextId());
565
566 mem_req->taskId(cpu->taskId());
567
568 memReq[tid] = mem_req;
569
570 // Initiate translation of the icache block
571 fetchStatus[tid] = ItlbWait;
572 FetchTranslation *trans = new FetchTranslation(this);
573 cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
574 trans, BaseMMU::Execute);
575 return true;
576}
577
578void
579Fetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
580{
581 ThreadID tid = cpu->contextToThread(mem_req->contextId());
582 Addr fetchBufferBlockPC = mem_req->getVaddr();
583
584 assert(!cpu->switchedOut());
585
586 // Wake up CPU if it was idle
587 cpu->wakeCPU();
588
589 if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
590 mem_req->getVaddr() != memReq[tid]->getVaddr()) {
591 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
592 tid);
594 return;
595 }
596
597
598 // If translation was successful, attempt to read the icache block.
599 if (fault == NoFault) {
600 // Check that we're not going off into random memory
601 // If we have, just wait around for commit to squash something and put
602 // us on the right track
603 if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
604 warn("Address %#x is outside of physical memory, stopping fetch\n",
605 mem_req->getPaddr());
606 fetchStatus[tid] = NoGoodAddr;
607 memReq[tid] = NULL;
608 return;
609 }
610
611 // Build packet here.
612 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
613 data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
614
615 fetchBufferPC[tid] = fetchBufferBlockPC;
616 fetchBufferValid[tid] = false;
617 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
618
620
621 // Access the cache.
622 if (!icachePort.sendTimingReq(data_pkt)) {
623 assert(retryPkt == NULL);
624 assert(retryTid == InvalidThreadID);
625 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
626
628 retryPkt = data_pkt;
629 retryTid = tid;
630 cacheBlocked = true;
631 } else {
632 DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
633 DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
634 "response.\n", tid);
635 lastIcacheStall[tid] = curTick();
637 // Notify Fetch Request probe when a packet containing a fetch
638 // request is successfully sent
639 ppFetchRequestSent->notify(mem_req);
640 }
641 } else {
642 // Don't send an instruction to decode if we can't handle it.
643 if (!(numInst < fetchWidth) ||
644 !(fetchQueue[tid].size() < fetchQueueSize)) {
649 cpu->clockEdge(Cycles(1)));
650 return;
651 }
653 "[tid:%i] Got back req with addr %#x but expected %#x\n",
654 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
655 // Translation faulted, icache request won't be sent.
656 memReq[tid] = NULL;
657
658 // Send the fault to commit. This thread will not do anything
659 // until commit handles the fault. The only other way it can
660 // wake up is if a squash comes along and changes the PC.
661 const PCStateBase &fetch_pc = *pc[tid];
662
663 DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
664 // We will use a nop in ordier to carry the fault.
665 DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
666 fetch_pc, fetch_pc, false);
667 instruction->setNotAnInst();
668
669 instruction->setPredTarg(fetch_pc);
670 instruction->fault = fault;
671 wroteToTimeBuffer = true;
672
673 DPRINTF(Activity, "Activity this cycle.\n");
675
677
678 DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
679 DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
680 tid, fault->name(), *pc[tid]);
681 }
683}
684
685void
686Fetch::doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst,
687 ThreadID tid)
688{
689 DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
690 tid, new_pc);
691
692 set(pc[tid], new_pc);
693 fetchOffset[tid] = 0;
694 if (squashInst && squashInst->pcState().instAddr() == new_pc.instAddr() &&
695 !squashInst->isLastMicroop())
696 macroop[tid] = squashInst->macroop;
697 else
698 macroop[tid] = NULL;
699 decoder[tid]->reset();
700
701 // Clear the icache miss if it's outstanding.
702 if (fetchStatus[tid] == IcacheWaitResponse) {
703 DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
704 tid);
705 memReq[tid] = NULL;
706 } else if (fetchStatus[tid] == ItlbWait) {
707 DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
708 tid);
709 memReq[tid] = NULL;
710 }
711
712 // Get rid of the retrying packet if it was from this thread.
713 if (retryTid == tid) {
714 assert(cacheBlocked);
715 if (retryPkt) {
716 delete retryPkt;
717 }
718 retryPkt = NULL;
720 }
721
722 fetchStatus[tid] = Squashing;
723
724 // Empty fetch queue
725 fetchQueue[tid].clear();
726
727 // microops are being squashed, it is not known wheather the
728 // youngest non-squashed microop was marked delayed commit
729 // or not. Setting the flag to true ensures that the
730 // interrupts are not handled when they cannot be, though
731 // some opportunities to handle interrupts may be missed.
732 delayedCommit[tid] = true;
733
735}
736
737void
738Fetch::squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst,
739 const InstSeqNum seq_num, ThreadID tid)
740{
741 DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
742
743 doSquash(new_pc, squashInst, tid);
744
745 // Tell the CPU to remove any instructions that are in flight between
746 // fetch and decode.
747 cpu->removeInstsUntil(seq_num, tid);
748}
749
750bool
752{
753 bool ret_val = false;
754
755 if (stalls[tid].drain) {
756 assert(cpu->isDraining());
757 DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
758 ret_val = true;
759 }
760
761 return ret_val;
762}
763
766{
767 //Check Running
770
771 while (threads != end) {
772 ThreadID tid = *threads++;
773
774 if (fetchStatus[tid] == Running ||
775 fetchStatus[tid] == Squashing ||
777
778 if (_status == Inactive) {
779 DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
780
781 if (fetchStatus[tid] == IcacheAccessComplete) {
782 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
783 "completion\n",tid);
784 }
785
787 }
788
789 return Active;
790 }
791 }
792
793 // Stage is switching from active to inactive, notify CPU of it.
794 if (_status == Active) {
795 DPRINTF(Activity, "Deactivating stage.\n");
796
798 }
799
800 return Inactive;
801}
802
803void
804Fetch::squash(const PCStateBase &new_pc, const InstSeqNum seq_num,
805 DynInstPtr squashInst, ThreadID tid)
806{
807 DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
808
809 doSquash(new_pc, squashInst, tid);
810
811 // Tell the CPU to remove any instructions that are not in the ROB.
813}
814
815void
817{
820 bool status_change = false;
821
822 wroteToTimeBuffer = false;
823
824 for (ThreadID i = 0; i < numThreads; ++i) {
825 issuePipelinedIfetch[i] = false;
826 }
827
828 while (threads != end) {
829 ThreadID tid = *threads++;
830
831 // Check the signals for each thread to determine the proper status
832 // for each thread.
833 bool updated_status = checkSignalsAndUpdate(tid);
834 status_change = status_change || updated_status;
835 }
836
837 DPRINTF(Fetch, "Running stage.\n");
838
839 if (FullSystem) {
840 if (fromCommit->commitInfo[0].interruptPending) {
841 interruptPending = true;
842 }
843
844 if (fromCommit->commitInfo[0].clearInterrupt) {
845 interruptPending = false;
846 }
847 }
848
850 threadFetched++) {
851 // Fetch each of the actively fetching threads.
852 fetch(status_change);
853 }
854
855 // Record number of instructions fetched this cycle for distribution.
857
858 if (status_change) {
859 // Change the fetch stage status if there was a status change.
861 }
862
863 // Issue the next I-cache request if possible.
864 for (ThreadID i = 0; i < numThreads; ++i) {
865 if (issuePipelinedIfetch[i]) {
867 }
868 }
869
870 // Send instructions enqueued into the fetch queue to decode.
871 // Limit rate by fetchWidth. Stall if decode is stalled.
872 unsigned insts_to_decode = 0;
873 unsigned available_insts = 0;
874
875 for (auto tid : *activeThreads) {
876 if (!stalls[tid].decode) {
877 available_insts += fetchQueue[tid].size();
878 }
879 }
880
881 // Pick a random thread to start trying to grab instructions from
882 auto tid_itr = activeThreads->begin();
883 std::advance(tid_itr,
884 random_mt.random<uint8_t>(0, activeThreads->size() - 1));
885
886 while (available_insts != 0 && insts_to_decode < decodeWidth) {
887 ThreadID tid = *tid_itr;
888 if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
889 const auto& inst = fetchQueue[tid].front();
890 toDecode->insts[toDecode->size++] = inst;
891 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
892 "from fetch queue. Fetch queue size: %i.\n",
893 tid, inst->seqNum, fetchQueue[tid].size());
894
895 wroteToTimeBuffer = true;
896 fetchQueue[tid].pop_front();
897 insts_to_decode++;
898 available_insts--;
899 }
900
901 tid_itr++;
902 // Wrap around if at end of active threads list
903 if (tid_itr == activeThreads->end())
904 tid_itr = activeThreads->begin();
905 }
906
907 // If there was activity this cycle, inform the CPU of it.
908 if (wroteToTimeBuffer) {
909 DPRINTF(Activity, "Activity this cycle.\n");
911 }
912
913 // Reset the number of the instruction we've fetched.
914 numInst = 0;
915}
916
917bool
919{
920 // Update the per thread stall statuses.
921 if (fromDecode->decodeBlock[tid]) {
922 stalls[tid].decode = true;
923 }
924
925 if (fromDecode->decodeUnblock[tid]) {
926 assert(stalls[tid].decode);
927 assert(!fromDecode->decodeBlock[tid]);
928 stalls[tid].decode = false;
929 }
930
931 // Check squash signals from commit.
932 if (fromCommit->commitInfo[tid].squash) {
933
934 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
935 "from commit.\n",tid);
936 // In any case, squash.
937 squash(*fromCommit->commitInfo[tid].pc,
938 fromCommit->commitInfo[tid].doneSeqNum,
939 fromCommit->commitInfo[tid].squashInst, tid);
940
941 // If it was a branch mispredict on a control instruction, update the
942 // branch predictor with that instruction, otherwise just kill the
943 // invalid state we generated in after sequence number
944 if (fromCommit->commitInfo[tid].mispredictInst &&
945 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
946 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
947 *fromCommit->commitInfo[tid].pc,
948 fromCommit->commitInfo[tid].branchTaken, tid);
949 } else {
950 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
951 tid);
952 }
953
954 return true;
955 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
956 // Update the branch predictor if it wasn't a squashed instruction
957 // that was broadcasted.
958 branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
959 }
960
961 // Check squash signals from decode.
962 if (fromDecode->decodeInfo[tid].squash) {
963 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
964 "from decode.\n",tid);
965
966 // Update the branch predictor.
967 if (fromDecode->decodeInfo[tid].branchMispredict) {
968 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
969 *fromDecode->decodeInfo[tid].nextPC,
970 fromDecode->decodeInfo[tid].branchTaken, tid);
971 } else {
972 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
973 tid);
974 }
975
976 if (fetchStatus[tid] != Squashing) {
977
978 DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
979 *fromDecode->decodeInfo[tid].nextPC);
980 // Squash unless we're already squashing
981 squashFromDecode(*fromDecode->decodeInfo[tid].nextPC,
982 fromDecode->decodeInfo[tid].squashInst,
983 fromDecode->decodeInfo[tid].doneSeqNum,
984 tid);
985
986 return true;
987 }
988 }
989
990 if (checkStall(tid) &&
993 fetchStatus[tid] != ItlbWait &&
994 fetchStatus[tid] != QuiescePending) {
995 DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
996
997 fetchStatus[tid] = Blocked;
998
999 return true;
1000 }
1001
1002 if (fetchStatus[tid] == Blocked ||
1003 fetchStatus[tid] == Squashing) {
1004 // Switch status to running if fetch isn't being told to block or
1005 // squash this cycle.
1006 DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
1007 tid);
1008
1009 fetchStatus[tid] = Running;
1010
1011 return true;
1012 }
1013
1014 // If we've reached this point, we have not gotten any signals that
1015 // cause fetch to change its status. Fetch remains the same as before.
1016 return false;
1017}
1018
1021 StaticInstPtr curMacroop, const PCStateBase &this_pc,
1022 const PCStateBase &next_pc, bool trace)
1023{
1024 // Get a sequence number.
1026
1027 DynInst::Arrays arrays;
1028 arrays.numSrcs = staticInst->numSrcRegs();
1029 arrays.numDests = staticInst->numDestRegs();
1030
1031 // Create a new DynInst from the instruction fetched.
1032 DynInstPtr instruction = new (arrays) DynInst(
1033 arrays, staticInst, curMacroop, this_pc, next_pc, seq, cpu);
1034 instruction->setTid(tid);
1035
1036 instruction->setThreadState(cpu->thread[tid]);
1037
1038 DPRINTF(Fetch, "[tid:%i] Instruction PC %s created [sn:%lli].\n",
1039 tid, this_pc, seq);
1040
1041 DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
1042 instruction->staticInst->disassemble(this_pc.instAddr()));
1043
1044#if TRACING_ON
1045 if (trace) {
1046 instruction->traceData =
1048 instruction->staticInst, this_pc, curMacroop);
1049 }
1050#else
1051 instruction->traceData = NULL;
1052#endif
1053
1054 // Add instruction to the CPU's list of instructions.
1055 instruction->setInstListIt(cpu->addInst(instruction));
1056
1057 // Write the instruction to the first slot in the queue
1058 // that heads to decode.
1059 assert(numInst < fetchWidth);
1060 fetchQueue[tid].push_back(instruction);
1061 assert(fetchQueue[tid].size() <= fetchQueueSize);
1062 DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
1063 tid, fetchQueue[tid].size(), fetchQueueSize);
1064 //toDecode->insts[toDecode->size++] = instruction;
1065
1066 // Keep track of if we can take an interrupt at this boundary
1067 delayedCommit[tid] = instruction->isDelayedCommit();
1068
1069 return instruction;
1070}
1071
1072void
1073Fetch::fetch(bool &status_change)
1074{
1076 // Start actual fetch
1079
1080 assert(!cpu->switchedOut());
1081
1082 if (tid == InvalidThreadID) {
1083 // Breaks looping condition in tick()
1085
1086 if (numThreads == 1) { // @todo Per-thread stats
1087 profileStall(0);
1088 }
1089
1090 return;
1091 }
1092
1093 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1094
1095 // The current PC.
1096 PCStateBase &this_pc = *pc[tid];
1097
1098 Addr pcOffset = fetchOffset[tid];
1099 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1100
1101 bool inRom = isRomMicroPC(this_pc.microPC());
1102
1103 // If returning from the delay of a cache miss, then update the status
1104 // to running, otherwise do the cache access. Possibly move this up
1105 // to tick() function.
1106 if (fetchStatus[tid] == IcacheAccessComplete) {
1107 DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
1108
1109 fetchStatus[tid] = Running;
1110 status_change = true;
1111 } else if (fetchStatus[tid] == Running) {
1112 // Align the fetch PC so its at the start of a fetch buffer segment.
1113 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1114
1115 // If buffer is no longer valid or fetchAddr has moved to point
1116 // to the next cache block, AND we have no remaining ucode
1117 // from a macro-op, then start fetch from icache.
1118 if (!(fetchBufferValid[tid] &&
1119 fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom &&
1120 !macroop[tid]) {
1121 DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
1122 "instruction, starting at PC %s.\n", tid, this_pc);
1123
1124 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1125
1126 if (fetchStatus[tid] == IcacheWaitResponse) {
1127 cpu->fetchStats[tid]->icacheStallCycles++;
1128 }
1129 else if (fetchStatus[tid] == ItlbWait)
1131 else
1133 return;
1134 } else if (checkInterrupt(this_pc.instAddr()) &&
1135 !delayedCommit[tid]) {
1136 // Stall CPU if an interrupt is posted and we're not issuing
1137 // an delayed commit micro-op currently (delayed commit
1138 // instructions are not interruptable by interrupts, only faults)
1140 DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
1141 return;
1142 }
1143 } else {
1144 if (fetchStatus[tid] == Idle) {
1146 DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
1147 }
1148
1149 // Status is Idle, so fetch should do nothing.
1150 return;
1151 }
1152
1154
1155 std::unique_ptr<PCStateBase> next_pc(this_pc.clone());
1156
1157 StaticInstPtr staticInst = NULL;
1158 StaticInstPtr curMacroop = macroop[tid];
1159
1160 // If the read of the first instruction was successful, then grab the
1161 // instructions from the rest of the cache line and put them into the
1162 // queue heading to decode.
1163
1164 DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
1165 "decode.\n", tid);
1166
1167 // Need to keep track of whether or not a predicted branch
1168 // ended this fetch block.
1169 bool predictedBranch = false;
1170
1171 // Need to halt fetch if quiesce instruction detected
1172 bool quiesce = false;
1173
1174 const unsigned numInsts = fetchBufferSize / instSize;
1175 unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1176
1177 auto *dec_ptr = decoder[tid];
1178 const Addr pc_mask = dec_ptr->pcMask();
1179
1180 // Loop through instruction memory from the cache.
1181 // Keep issuing while fetchWidth is available and branch is not
1182 // predicted taken
1183 while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1184 && !predictedBranch && !quiesce) {
1185 // We need to process more memory if we aren't going to get a
1186 // StaticInst from the rom, the current macroop, or what's already
1187 // in the decoder.
1188 bool needMem = !inRom && !curMacroop && !dec_ptr->instReady();
1189 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1190 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1191
1192 if (needMem) {
1193 // If buffer is no longer valid or fetchAddr has moved to point
1194 // to the next cache block then start fetch from icache.
1195 if (!fetchBufferValid[tid] ||
1196 fetchBufferBlockPC != fetchBufferPC[tid])
1197 break;
1198
1199 if (blkOffset >= numInsts) {
1200 // We need to process more memory, but we've run out of the
1201 // current block.
1202 break;
1203 }
1204
1205 memcpy(dec_ptr->moreBytesPtr(),
1206 fetchBuffer[tid] + blkOffset * instSize, instSize);
1207 decoder[tid]->moreBytes(this_pc, fetchAddr);
1208
1209 if (dec_ptr->needMoreBytes()) {
1210 blkOffset++;
1211 fetchAddr += instSize;
1212 pcOffset += instSize;
1213 }
1214 }
1215
1216 // Extract as many instructions and/or microops as we can from
1217 // the memory we've processed so far.
1218 do {
1219 if (!(curMacroop || inRom)) {
1220 if (dec_ptr->instReady()) {
1221 staticInst = dec_ptr->decode(this_pc);
1222
1223 // Increment stat of fetched instructions.
1224 cpu->fetchStats[tid]->numInsts++;
1225
1226 if (staticInst->isMacroop()) {
1227 curMacroop = staticInst;
1228 } else {
1229 pcOffset = 0;
1230 }
1231 } else {
1232 // We need more bytes for this instruction so blkOffset and
1233 // pcOffset will be updated
1234 break;
1235 }
1236 }
1237 // Whether we're moving to a new macroop because we're at the
1238 // end of the current one, or the branch predictor incorrectly
1239 // thinks we are...
1240 bool newMacro = false;
1241 if (curMacroop || inRom) {
1242 if (inRom) {
1243 staticInst = dec_ptr->fetchRomMicroop(
1244 this_pc.microPC(), curMacroop);
1245 } else {
1246 staticInst = curMacroop->fetchMicroop(this_pc.microPC());
1247 }
1248 newMacro |= staticInst->isLastMicroop();
1249 }
1250
1251 DynInstPtr instruction = buildInst(
1252 tid, staticInst, curMacroop, this_pc, *next_pc, true);
1253
1254 ppFetch->notify(instruction);
1255 numInst++;
1256
1257#if TRACING_ON
1258 if (debug::O3PipeView) {
1259 instruction->fetchTick = curTick();
1260 }
1261#endif
1262
1263 set(next_pc, this_pc);
1264
1265 // If we're branching after this instruction, quit fetching
1266 // from the same block.
1267 predictedBranch |= this_pc.branching();
1268 predictedBranch |= lookupAndUpdateNextPC(instruction, *next_pc);
1269 if (predictedBranch) {
1270 DPRINTF(Fetch, "Branch detected with PC = %s\n", this_pc);
1271 }
1272
1273 newMacro |= this_pc.instAddr() != next_pc->instAddr();
1274
1275 // Move to the next instruction, unless we have a branch.
1276 set(this_pc, *next_pc);
1277 inRom = isRomMicroPC(this_pc.microPC());
1278
1279 if (newMacro) {
1280 fetchAddr = this_pc.instAddr() & pc_mask;
1281 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1282 pcOffset = 0;
1283 curMacroop = NULL;
1284 }
1285
1286 if (instruction->isQuiesce()) {
1287 DPRINTF(Fetch,
1288 "Quiesce instruction encountered, halting fetch!\n");
1290 status_change = true;
1291 quiesce = true;
1292 break;
1293 }
1294 } while ((curMacroop || dec_ptr->instReady()) &&
1295 numInst < fetchWidth &&
1296 fetchQueue[tid].size() < fetchQueueSize);
1297
1298 // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1299 // or not.
1300 inRom = isRomMicroPC(this_pc.microPC());
1301 }
1302
1303 if (predictedBranch) {
1304 DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
1305 "instruction encountered.\n", tid);
1306 } else if (numInst >= fetchWidth) {
1307 DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
1308 "for this cycle.\n", tid);
1309 } else if (blkOffset >= fetchBufferSize) {
1310 DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
1311 "fetch buffer.\n", tid);
1312 }
1313
1314 macroop[tid] = curMacroop;
1315 fetchOffset[tid] = pcOffset;
1316
1317 if (numInst > 0) {
1318 wroteToTimeBuffer = true;
1319 }
1320
1321 // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1322 // a state that would preclude fetching
1323 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1324 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1325 issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1327 fetchStatus[tid] != ItlbWait &&
1328 fetchStatus[tid] != IcacheWaitRetry &&
1329 fetchStatus[tid] != QuiescePending &&
1330 !curMacroop;
1331}
1332
1333void
1335{
1336 if (retryPkt != NULL) {
1337 assert(cacheBlocked);
1338 assert(retryTid != InvalidThreadID);
1340
1343 // Notify Fetch Request probe when a retryPkt is successfully sent.
1344 // Note that notify must be called before retryPkt is set to NULL.
1346 retryPkt = NULL;
1348 cacheBlocked = false;
1349 }
1350 } else {
1351 assert(retryTid == InvalidThreadID);
1352 // Access has been squashed since it was sent out. Just clear
1353 // the cache being blocked.
1354 cacheBlocked = false;
1355 }
1356}
1357
1359// //
1360// SMT FETCH POLICY MAINTAINED HERE //
1361// //
1365{
1366 if (numThreads > 1) {
1367 switch (fetchPolicy) {
1368 case SMTFetchPolicy::RoundRobin:
1369 return roundRobin();
1370 case SMTFetchPolicy::IQCount:
1371 return iqCount();
1372 case SMTFetchPolicy::LSQCount:
1373 return lsqCount();
1374 case SMTFetchPolicy::Branch:
1375 return branchCount();
1376 default:
1377 return InvalidThreadID;
1378 }
1379 } else {
1381 if (thread == activeThreads->end()) {
1382 return InvalidThreadID;
1383 }
1384
1385 ThreadID tid = *thread;
1386
1387 if (fetchStatus[tid] == Running ||
1389 fetchStatus[tid] == Idle) {
1390 return tid;
1391 } else {
1392 return InvalidThreadID;
1393 }
1394 }
1395}
1396
1397
1400{
1403
1404 ThreadID high_pri;
1405
1406 while (pri_iter != end) {
1407 high_pri = *pri_iter;
1408
1409 assert(high_pri <= numThreads);
1410
1411 if (fetchStatus[high_pri] == Running ||
1412 fetchStatus[high_pri] == IcacheAccessComplete ||
1413 fetchStatus[high_pri] == Idle) {
1414
1415 priorityList.erase(pri_iter);
1416 priorityList.push_back(high_pri);
1417
1418 return high_pri;
1419 }
1420
1421 pri_iter++;
1422 }
1423
1424 return InvalidThreadID;
1425}
1426
1429{
1430 //sorted from lowest->highest
1431 std::priority_queue<unsigned, std::vector<unsigned>,
1432 std::greater<unsigned> > PQ;
1433 std::map<unsigned, ThreadID> threadMap;
1434
1437
1438 while (threads != end) {
1439 ThreadID tid = *threads++;
1440 unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1441
1442 //we can potentially get tid collisions if two threads
1443 //have the same iqCount, but this should be rare.
1444 PQ.push(iqCount);
1445 threadMap[iqCount] = tid;
1446 }
1447
1448 while (!PQ.empty()) {
1449 ThreadID high_pri = threadMap[PQ.top()];
1450
1451 if (fetchStatus[high_pri] == Running ||
1452 fetchStatus[high_pri] == IcacheAccessComplete ||
1453 fetchStatus[high_pri] == Idle)
1454 return high_pri;
1455 else
1456 PQ.pop();
1457
1458 }
1459
1460 return InvalidThreadID;
1461}
1462
1465{
1466 //sorted from lowest->highest
1467 std::priority_queue<unsigned, std::vector<unsigned>,
1468 std::greater<unsigned> > PQ;
1469 std::map<unsigned, ThreadID> threadMap;
1470
1473
1474 while (threads != end) {
1475 ThreadID tid = *threads++;
1476 unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1477
1478 //we can potentially get tid collisions if two threads
1479 //have the same iqCount, but this should be rare.
1480 PQ.push(ldstqCount);
1481 threadMap[ldstqCount] = tid;
1482 }
1483
1484 while (!PQ.empty()) {
1485 ThreadID high_pri = threadMap[PQ.top()];
1486
1487 if (fetchStatus[high_pri] == Running ||
1488 fetchStatus[high_pri] == IcacheAccessComplete ||
1489 fetchStatus[high_pri] == Idle)
1490 return high_pri;
1491 else
1492 PQ.pop();
1493 }
1494
1495 return InvalidThreadID;
1496}
1497
1500{
1501 panic("Branch Count Fetch policy unimplemented\n");
1502 return InvalidThreadID;
1503}
1504
1505void
1507{
1508 if (!issuePipelinedIfetch[tid]) {
1509 return;
1510 }
1511
1512 // The next PC to access.
1513 const PCStateBase &this_pc = *pc[tid];
1514
1515 if (isRomMicroPC(this_pc.microPC())) {
1516 return;
1517 }
1518
1519 Addr pcOffset = fetchOffset[tid];
1520 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1521
1522 // Align the fetch PC so its at the start of a fetch buffer segment.
1523 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1524
1525 // Unless buffer already got the block, fetch it from icache.
1526 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1527 DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
1528 "starting at PC %s.\n", tid, this_pc);
1529
1530 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1531 }
1532}
1533
1534void
1536{
1537 DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1538
1539 // @todo Per-thread stats
1540
1541 if (stalls[tid].drain) {
1543 DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1544 } else if (activeThreads->empty()) {
1546 DPRINTF(Fetch, "Fetch has no active thread!\n");
1547 } else if (fetchStatus[tid] == Blocked) {
1549 DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
1550 } else if (fetchStatus[tid] == Squashing) {
1552 DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
1553 } else if (fetchStatus[tid] == IcacheWaitResponse) {
1554 cpu->fetchStats[tid]->icacheStallCycles++;
1555 DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
1556 tid);
1557 } else if (fetchStatus[tid] == ItlbWait) {
1559 DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
1560 "finish!\n", tid);
1561 } else if (fetchStatus[tid] == TrapPending) {
1563 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
1564 tid);
1565 } else if (fetchStatus[tid] == QuiescePending) {
1567 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
1568 "instruction!\n", tid);
1569 } else if (fetchStatus[tid] == IcacheWaitRetry) {
1571 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
1572 tid);
1573 } else if (fetchStatus[tid] == NoGoodAddr) {
1574 DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
1575 tid);
1576 } else {
1577 DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
1578 "(Status: %i)\n",
1579 tid, fetchStatus[tid]);
1580 }
1581}
1582
1583bool
1585{
1586 DPRINTF(O3CPU, "Fetch unit received timing\n");
1587 // We shouldn't ever get a cacheable block in Modified state
1588 assert(pkt->req->isUncacheable() ||
1589 !(pkt->cacheResponding() && !pkt->hasSharers()));
1590 fetch->processCacheCompletion(pkt);
1591
1592 return true;
1593}
1594
1595void
1597{
1598 fetch->recvReqRetry();
1599}
1600
1601} // namespace o3
1602} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
RequestorID instRequestorId() const
Reads this CPU's unique instruction requestor ID.
Definition base.hh:195
uint32_t taskId() const
Get cpu task id.
Definition base.hh:211
trace::InstTracer * getTracer()
Provide access to the tracer pointer.
Definition base.hh:272
std::vector< std::unique_ptr< FetchCPUStats > > fetchStats
Definition base.hh:819
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition base.hh:299
bool switchedOut() const
Determine if the CPU is switched out.
Definition base.hh:373
virtual void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode)
Definition mmu.cc:111
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
size_t moreBytesSize() const
Definition decoder.hh:96
virtual void reset()
Definition decoder.hh:63
virtual void moreBytes(const PCStateBase &pc, Addr fetchPC)=0
Feed data to the decoder.
Addr pcMask() const
Definition decoder.hh:97
virtual std::string name() const
Definition named.hh:47
virtual bool branching() const =0
MicroPC microPC() const
Returns the current micropc.
Definition pcstate.hh:119
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
Definition pcstate.hh:108
virtual PCStateBase * clone() const =0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
bool cacheResponding() const
Definition packet.hh:659
bool hasSharers() const
Definition packet.hh:686
bool isConnected() const
Is this port currently connected to a peer?
Definition port.hh:133
ProbePointArg generates a point for the class of Arg.
Definition probe.hh:264
void notify(const Arg &arg)
called at the ProbePoint call site, passes arg to each listener.
Definition probe.hh:313
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603
@ INST_FETCH
The request was an instruction fetch.
Definition request.hh:115
uint8_t numSrcRegs() const
Number of source registers.
virtual StaticInstPtr fetchMicroop(MicroPC upc) const
Return the microop that goes with a particular micropc.
uint8_t numDestRegs() const
Number of destination registers.
bool isMacroop() const
bool isLastMicroop() const
bool isMemAddr(Addr addr) const
Check if a physical address is within a range of a memory that is part of the global address map.
Definition system.cc:288
wire getWire(int idx)
Definition timebuf.hh:232
void update(const InstSeqNum &done_sn, ThreadID tid)
Tells the branch predictor to commit any updates until the given sequence number.
bool predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, PCStateBase &pc, ThreadID tid)
Predicts whether or not the instruction is a taken branch, and the target of the branch if it is take...
Definition bpred_unit.cc:99
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition bpred_unit.cc:89
void squash(const InstSeqNum &squashed_sn, ThreadID tid)
Squashes all outstanding updates until a given sequence number.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
ListIt addInst(const DynInstPtr &inst)
Function to add instruction onto the head of the list of the instructions.
Definition cpu.cc:1133
ProbePointArg< PacketPtr > * ppInstAccessComplete
Definition cpu.hh:176
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition cpu.hh:533
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition cpu.hh:488
Port & getInstPort() override
Used by the fetch unit to get a hold of the instruction port.
Definition cpu.hh:564
void removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
Remove all instructions younger than the given sequence number.
Definition cpu.cc:1217
bool isDraining() const
Is the CPU draining?
Definition cpu.hh:238
void removeInstsNotInROB(ThreadID tid)
Remove all instructions that are not currently in the ROB.
Definition cpu.cc:1173
void deactivateStage(const StageIdx idx)
Changes a stage's status to inactive within the activity recorder.
Definition cpu.hh:499
BaseMMU * mmu
Definition cpu.hh:110
System * system
Pointer to the system.
Definition cpu.hh:530
InstSeqNum getAndIncrementInstSeq()
Get the current instruction sequence number, and increment it.
Definition cpu.hh:284
void pcState(const PCStateBase &new_pc_state, ThreadID tid)
Sets the commit PC state of a specific thread.
Definition cpu.cc:1120
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition cpu.hh:515
void activateStage(const StageIdx idx)
Changes a stage's status to active within the activity recorder.
Definition cpu.hh:492
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition cpu.cc:1316
void setReq(const RequestPtr &_req)
Definition fetch.hh:141
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition fetch.cc:1584
IcachePort(Fetch *_fetch, CPU *_cpu)
Default constructor.
Definition fetch.cc:78
virtual void recvReqRetry()
Handles doing a retry of a failed fetch.
Definition fetch.cc:1596
Fetch class handles both single threaded and SMT fetch.
Definition fetch.hh:79
gem5::o3::Fetch::FetchStatGroup fetchStats
bool wroteToTimeBuffer
Variable that tracks if fetch has written to the time buffer this cycle.
Definition fetch.hh:430
void deactivateThread(ThreadID tid)
For priority-based fetch policies, need to keep update priorityList.
Definition fetch.cc:476
FetchStatus
Overall fetch status.
Definition fetch.hh:163
std::list< ThreadID > * activeThreads
List of Active Threads.
Definition fetch.hh:505
TimeBuffer< TimeStruct >::wire fromCommit
Wire to get commit's information from backwards time buffer.
Definition fetch.hh:406
Cycles renameToFetchDelay
Rename to fetch delay.
Definition fetch.hh:449
StaticInstPtr macroop[MaxThreads]
Definition fetch.hh:419
void fetch(bool &status_change)
Does the actual fetching of instructions and passing them on to the next stage.
Definition fetch.cc:1073
void takeOverFrom()
Takes over from another CPU's thread.
Definition fetch.cc:426
uint8_t * fetchBuffer[MaxThreads]
The fetch data that is being fetched and buffered.
Definition fetch.hh:484
void doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:686
TimeBuffer< FetchStruct >::wire toDecode
Wire used to write any information heading to decode.
Definition fetch.hh:410
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
Definition fetch.cc:250
bool lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &pc)
Looks up in the branch predictor to see if the next PC should be either next PC+=MachInst or a branch...
Definition fetch.cc:486
ThreadStatus fetchStatus[MaxThreads]
Per-thread status.
Definition fetch.hh:190
ThreadID numThreads
Number of threads.
Definition fetch.hh:508
TimeBuffer< TimeStruct >::wire fromDecode
Wire to get decode's information from backwards time buffer.
Definition fetch.hh:397
ProbePointArg< DynInstPtr > * ppFetch
Probe points.
Definition fetch.hh:199
TimeBuffer< TimeStruct >::wire fromRename
Wire to get rename's information from backwards time buffer.
Definition fetch.hh:400
void squash(const PCStateBase &new_pc, const InstSeqNum seq_num, DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:804
void squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:738
FetchStatus updateFetchStatus()
Updates overall fetch stage status; to be called at the end of each cycle.
Definition fetch.cc:765
ThreadID getFetchingThread()
Returns the appropriate thread to fetch, given the fetch policy.
Definition fetch.cc:1364
bool fetchBufferValid[MaxThreads]
Whether or not the fetch buffer data is valid.
Definition fetch.hh:496
void startupStage()
Initialize stage.
Definition fetch.cc:263
void pipelineIcacheAccesses(ThreadID tid)
Pipeline the next I-cache access to the current one.
Definition fetch.cc:1506
std::string name() const
Returns the name of fetch.
Definition fetch.cc:148
void wakeFromQuiesce()
Tells fetch to wake up from a quiesce instruction.
Definition fetch.cc:443
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
Definition fetch.cc:452
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
Definition fetch.cc:464
int numInst
Tracks how many instructions has been fetched this cycle.
Definition fetch.hh:433
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Fetches the cache line that contains the fetch PC.
Definition fetch.cc:530
Cycles decodeToFetchDelay
Decode to fetch delay.
Definition fetch.hh:446
bool issuePipelinedIfetch[MaxThreads]
Set to true if a pipelined I-cache request should be issued.
Definition fetch.hh:525
Addr fetchBufferAlignPC(Addr addr)
Align a PC to the start of a fetch buffer block.
Definition fetch.hh:352
FetchStatus _status
Fetch status.
Definition fetch.hh:187
bool delayedCommit[MaxThreads]
Can the fetch stage redirect from an interrupt on this instruction?
Definition fetch.hh:422
ThreadID threadFetched
Thread ID being fetched.
Definition fetch.hh:514
SMTFetchPolicy fetchPolicy
Fetch policy.
Definition fetch.hh:193
Addr cacheBlkSize
Cache block size.
Definition fetch.hh:473
branch_prediction::BPredUnit * branchPred
BPredUnit.
Definition fetch.hh:413
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition fetch.cc:379
unsigned fetchWidth
The width of fetch in instructions.
Definition fetch.hh:458
unsigned fetchQueueSize
The size of the fetch queue in micro-ops.
Definition fetch.hh:490
InstDecoder * decoder[MaxThreads]
The decoder.
Definition fetch.hh:358
TimeBuffer< TimeStruct >::wire fromIEW
Wire to get iew's information from backwards time buffer.
Definition fetch.hh:403
void regProbePoints()
Registers probes.
Definition fetch.cc:151
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
Definition fetch.cc:918
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
Definition fetch.cc:751
IcachePort icachePort
Instruction port.
Definition fetch.hh:522
@ IcacheAccessComplete
Definition fetch.hh:181
void setTimeBuffer(TimeBuffer< TimeStruct > *time_buffer)
Sets the main backwards communication time buffer pointer.
Definition fetch.cc:238
void processCacheCompletion(PacketPtr pkt)
Processes cache completion event.
Definition fetch.cc:327
ThreadID iqCount()
Returns the appropriate thread to fetch using the IQ count policy.
Definition fetch.cc:1428
Addr fetchBufferMask
Mask to align a fetch address to a fetch buffer boundary.
Definition fetch.hh:481
void recvReqRetry()
Handles retrying the fetch access.
Definition fetch.cc:1334
bool checkInterrupt(Addr pc)
Check if an interrupt is pending and that we need to handle.
Definition fetch.hh:305
Cycles iewToFetchDelay
IEW to fetch delay.
Definition fetch.hh:452
void resetStage()
Reset this pipeline stage.
Definition fetch.cc:293
Fetch(CPU *_cpu, const BaseO3CPUParams &params)
Fetch constructor.
Definition fetch.cc:83
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
Definition fetch.cc:434
Counter lastIcacheStall[MaxThreads]
Icache stall statistics.
Definition fetch.hh:502
int instSize
Size of instructions.
Definition fetch.hh:499
ProbePointArg< RequestPtr > * ppFetchRequestSent
To probe when a fetch request is successfully sent.
Definition fetch.hh:201
Cycles commitToFetchDelay
Commit to fetch delay.
Definition fetch.hh:455
RequestPtr memReq[MaxThreads]
Memory request used to access cache.
Definition fetch.hh:425
TimeBuffer< TimeStruct > * timeBuffer
Time buffer interface.
Definition fetch.hh:394
void profileStall(ThreadID tid)
Profile the reasons of fetch stall.
Definition fetch.cc:1535
ThreadID roundRobin()
Returns the appropriate thread to fetch using a round robin policy.
Definition fetch.cc:1399
Addr fetchBufferPC[MaxThreads]
The PC of the first instruction loaded into the fetch buffer.
Definition fetch.hh:487
void drainResume()
Resume after a drain.
Definition fetch.cc:370
void clearStates(ThreadID tid)
Clear all thread-specific states.
Definition fetch.cc:274
void finishTranslation(const Fault &fault, const RequestPtr &mem_req)
Definition fetch.cc:579
bool interruptPending
Checks if there is an interrupt pending.
Definition fetch.hh:519
std::unique_ptr< PCStateBase > pc[MaxThreads]
Definition fetch.hh:415
ThreadID lsqCount()
Returns the appropriate thread to fetch using the LSQ count policy.
Definition fetch.cc:1464
Stalls stalls[MaxThreads]
Tracks which stages are telling fetch to stall.
Definition fetch.hh:443
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, StaticInstPtr curMacroop, const PCStateBase &this_pc, const PCStateBase &next_pc, bool trace)
Definition fetch.cc:1020
bool isDrained() const
Has the stage drained?
Definition fetch.cc:396
Addr fetchOffset[MaxThreads]
Definition fetch.hh:417
std::deque< DynInstPtr > fetchQueue[MaxThreads]
Queue of fetched instructions.
Definition fetch.hh:493
PacketPtr retryPkt
The packet that is waiting to be retried.
Definition fetch.hh:467
std::list< ThreadID > priorityList
List that has the threads organized by priority.
Definition fetch.hh:196
FinishTranslationEvent finishTranslationEvent
Event used to delay fault generation of translation faults.
Definition fetch.hh:528
ThreadID retryTid
The thread that is waiting on the cache to tell fetch to retry.
Definition fetch.hh:470
void tick()
Ticks the fetch stage, processing all inputs signals and fetching as many instructions as possible.
Definition fetch.cc:816
ThreadID numFetchingThreads
Number of threads that are actively fetching.
Definition fetch.hh:511
unsigned fetchBufferSize
The size of the fetch buffer in bytes.
Definition fetch.hh:478
void setFetchQueue(TimeBuffer< FetchStruct > *fq_ptr)
Sets pointer to time buffer used to communicate to the next stage.
Definition fetch.cc:256
CPU * cpu
Pointer to the O3CPU.
Definition fetch.hh:391
unsigned decodeWidth
The width of decode in instructions.
Definition fetch.hh:461
bool cacheBlocked
Is the cache blocked? If so no threads can access it.
Definition fetch.hh:464
ThreadID branchCount()
Returns the appropriate thread to fetch using the branch count policy.
Definition fetch.cc:1499
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Statistics container.
Definition group.hh:93
virtual InstRecord * getInstRecord(Tick when, ThreadContext *tc, const StaticInstPtr staticInst, const PCStateBase &pc, const StaticInstPtr macroStaticInst=nullptr)=0
STL list class.
Definition stl.hh:51
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
Random random_mt
Definition random.cc:99
std::enable_if_t< std::is_integral_v< T >, T > random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
Definition random.hh:90
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
ProbeManager * getProbeManager()
Get the probe manager for this object.
#define warn(...)
Definition logging.hh:256
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 12, 11 > set
static constexpr int MaxThreads
Definition limits.hh:38
static constexpr int MaxWidth
Definition limits.hh:37
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition root.cc:220
static bool isRomMicroPC(MicroPC upc)
Definition types.hh:166
StaticInstPtr nopStaticInstPtr
Pointer to a statically allocated generic "nop" instruction object.
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
Declaration of the Packet class.
statistics::Scalar icacheSquashes
Total number of outstanding icache accesses that were dropped due to a squash.
Definition fetch.hh:569
statistics::Scalar pendingDrainCycles
Total number of cycles spent in waiting for drains.
Definition fetch.hh:554
statistics::Scalar cacheLines
Stat for total number of fetched cache lines.
Definition fetch.hh:565
statistics::Scalar blockedCycles
Total number of cycles spent blocked.
Definition fetch.hh:550
statistics::Scalar idleCycles
Stat for total number of cycles spent blocked due to other stages in the pipeline.
Definition fetch.hh:548
statistics::Scalar predictedBranches
Stat for total number of predicted branches.
Definition fetch.hh:537
statistics::Scalar noActiveThreadStallCycles
Total number of stall cycles caused by no active threads to run.
Definition fetch.hh:556
statistics::Scalar pendingQuiesceStallCycles
Total number of stall cycles caused by pending quiesce instructions.
Definition fetch.hh:561
statistics::Scalar icacheWaitRetryStallCycles
Total number of stall cycles caused by I-cache wait retrys.
Definition fetch.hh:563
statistics::Scalar pendingTrapStallCycles
Total number of stall cycles caused by pending traps.
Definition fetch.hh:558
statistics::Scalar cycles
Stat for total number of cycles spent fetching.
Definition fetch.hh:539
statistics::Scalar miscStallCycles
Total number of cycles spent in any other state.
Definition fetch.hh:552
statistics::Scalar tlbCycles
Stat for total number of cycles spent waiting for translation.
Definition fetch.hh:543
statistics::Scalar squashCycles
Stat for total number of cycles spent squashing.
Definition fetch.hh:541
FetchStatGroup(CPU *cpu, Fetch *fetch)
Definition fetch.cc:159
statistics::Formula idleRate
Rate of how often fetch was idle.
Definition fetch.hh:577
statistics::Scalar tlbSquashes
Total number of outstanding tlb accesses that were dropped due to a squash.
Definition fetch.hh:573
statistics::Distribution nisnDist
Distribution of number of instructions fetched each cycle.
Definition fetch.hh:575
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:01 for gem5 by doxygen 1.11.0