gem5 v24.1.0.1
Loading...
Searching...
No Matches
fetch.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * Copyright (c) 2012-2013 AMD
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/fetch.hh"
43
44#include <algorithm>
45#include <cstring>
46#include <list>
47#include <map>
48#include <queue>
49
50#include "arch/generic/tlb.hh"
51#include "base/types.hh"
52#include "cpu/base.hh"
53#include "cpu/exetrace.hh"
55#include "cpu/o3/cpu.hh"
56#include "cpu/o3/dyn_inst.hh"
57#include "cpu/o3/limits.hh"
58#include "debug/Activity.hh"
59#include "debug/Drain.hh"
60#include "debug/Fetch.hh"
61#include "debug/O3CPU.hh"
62#include "debug/O3PipeView.hh"
63#include "mem/packet.hh"
64#include "params/BaseO3CPU.hh"
65#include "sim/byteswap.hh"
66#include "sim/core.hh"
67#include "sim/eventq.hh"
68#include "sim/full_system.hh"
69#include "sim/system.hh"
70
71namespace gem5
72{
73
74namespace o3
75{
76
78 RequestPort(_cpu->name() + ".icache_port"), fetch(_fetch)
79{}
80
81
82Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
83 : fetchPolicy(params.smtFetchPolicy),
84 cpu(_cpu),
85 branchPred(nullptr),
90 fetchWidth(params.fetchWidth),
92 retryPkt(NULL),
94 cacheBlkSize(cpu->cacheLineSize()),
98 numThreads(params.numThreads),
99 numFetchingThreads(params.smtNumFetchingThreads),
100 icachePort(this, _cpu),
101 finishTranslationEvent(this), fetchStats(_cpu, this)
102{
104 fatal("numThreads (%d) is larger than compiled limit (%d),\n"
105 "\tincrease MaxThreads in src/cpu/o3/limits.hh\n",
106 numThreads, static_cast<int>(MaxThreads));
107 if (fetchWidth > MaxWidth)
108 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
109 "\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
110 fetchWidth, static_cast<int>(MaxWidth));
112 fatal("fetch buffer size (%u bytes) is greater than the cache "
113 "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
115 fatal("cache block (%u bytes) is not a multiple of the "
116 "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
117
118 for (int i = 0; i < MaxThreads; i++) {
119 fetchStatus[i] = Idle;
120 decoder[i] = nullptr;
121 pc[i].reset(params.isa[0]->newPCState());
122 fetchOffset[i] = 0;
123 macroop[i] = nullptr;
124 delayedCommit[i] = false;
125 memReq[i] = nullptr;
126 stalls[i] = {false, false};
127 fetchBuffer[i] = NULL;
128 fetchBufferPC[i] = 0;
129 fetchBufferValid[i] = false;
130 lastIcacheStall[i] = 0;
131 issuePipelinedIfetch[i] = false;
132 }
133
134 branchPred = params.branchPred;
135
136 for (ThreadID tid = 0; tid < numThreads; tid++) {
137 decoder[tid] = params.decoder[tid];
138 // Create space to buffer the cache line data,
139 // which may not hold the entire cache line.
140 fetchBuffer[tid] = new uint8_t[fetchBufferSize];
141 }
142
143 // Get the size of an instruction.
145}
146
147std::string Fetch::name() const { return cpu->name() + ".fetch"; }
148
149void
157
159 : statistics::Group(cpu, "fetch"),
160 ADD_STAT(predictedBranches, statistics::units::Count::get(),
161 "Number of branches that fetch has predicted taken"),
162 ADD_STAT(cycles, statistics::units::Cycle::get(),
163 "Number of cycles fetch has run and was not squashing or "
164 "blocked"),
165 ADD_STAT(squashCycles, statistics::units::Cycle::get(),
166 "Number of cycles fetch has spent squashing"),
167 ADD_STAT(tlbCycles, statistics::units::Cycle::get(),
168 "Number of cycles fetch has spent waiting for tlb"),
169 ADD_STAT(idleCycles, statistics::units::Cycle::get(),
170 "Number of cycles fetch was idle"),
171 ADD_STAT(blockedCycles, statistics::units::Cycle::get(),
172 "Number of cycles fetch has spent blocked"),
173 ADD_STAT(miscStallCycles, statistics::units::Cycle::get(),
174 "Number of cycles fetch has spent waiting on interrupts, or bad "
175 "addresses, or out of MSHRs"),
176 ADD_STAT(pendingDrainCycles, statistics::units::Cycle::get(),
177 "Number of cycles fetch has spent waiting on pipes to drain"),
178 ADD_STAT(noActiveThreadStallCycles, statistics::units::Cycle::get(),
179 "Number of stall cycles due to no active thread to fetch from"),
180 ADD_STAT(pendingTrapStallCycles, statistics::units::Cycle::get(),
181 "Number of stall cycles due to pending traps"),
182 ADD_STAT(pendingQuiesceStallCycles, statistics::units::Cycle::get(),
183 "Number of stall cycles due to pending quiesce instructions"),
184 ADD_STAT(icacheWaitRetryStallCycles, statistics::units::Cycle::get(),
185 "Number of stall cycles due to full MSHR"),
186 ADD_STAT(cacheLines, statistics::units::Count::get(),
187 "Number of cache lines fetched"),
188 ADD_STAT(icacheSquashes, statistics::units::Count::get(),
189 "Number of outstanding Icache misses that were squashed"),
190 ADD_STAT(tlbSquashes, statistics::units::Count::get(),
191 "Number of outstanding ITLB misses that were squashed"),
192 ADD_STAT(nisnDist, statistics::units::Count::get(),
193 "Number of instructions fetched each cycle (Total)"),
194 ADD_STAT(idleRate, statistics::units::Ratio::get(),
195 "Ratio of cycles fetch was idle",
196 idleCycles / cpu->baseStats.numCycles)
197{
200 cycles
201 .prereq(cycles);
229 .init(/* base value */ 0,
230 /* last value */ fetch->fetchWidth,
231 /* bucket size */ 1)
235}
236void
238{
239 timeBuffer = time_buffer;
240
241 // Create wires to get information from proper places in time buffer.
246}
247
248void
253
254void
256{
257 // Create wire to write information to proper place in fetch time buf.
258 toDecode = ftb_ptr->getWire(0);
259}
260
261void
263{
264 assert(priorityList.empty());
265 resetStage();
266
267 // Fetch needs to start fetching instructions at the very beginning,
268 // so it must start up in active state.
270}
271
272void
274{
275 fetchStatus[tid] = Running;
276 set(pc[tid], cpu->pcState(tid));
277 fetchOffset[tid] = 0;
278 macroop[tid] = NULL;
279 delayedCommit[tid] = false;
280 memReq[tid] = NULL;
281 stalls[tid].decode = false;
282 stalls[tid].drain = false;
283 fetchBufferPC[tid] = 0;
284 fetchBufferValid[tid] = false;
285 fetchQueue[tid].clear();
286
287 // TODO not sure what to do with priorityList for now
288 // priorityList.push_back(tid);
289}
290
291void
293{
294 numInst = 0;
295 interruptPending = false;
296 cacheBlocked = false;
297
298 priorityList.clear();
299
300 // Setup PC and nextPC with initial state.
301 for (ThreadID tid = 0; tid < numThreads; ++tid) {
302 fetchStatus[tid] = Running;
303 set(pc[tid], cpu->pcState(tid));
304 fetchOffset[tid] = 0;
305 macroop[tid] = NULL;
306
307 delayedCommit[tid] = false;
308 memReq[tid] = NULL;
309
310 stalls[tid].decode = false;
311 stalls[tid].drain = false;
312
313 fetchBufferPC[tid] = 0;
314 fetchBufferValid[tid] = false;
315
316 fetchQueue[tid].clear();
317
318 priorityList.push_back(tid);
319 }
320
321 wroteToTimeBuffer = false;
323}
324
325void
327{
328 ThreadID tid = cpu->contextToThread(pkt->req->contextId());
329
330 DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
331 assert(!cpu->switchedOut());
332
333 // Only change the status if it's still waiting on the icache access
334 // to return.
335 if (fetchStatus[tid] != IcacheWaitResponse ||
336 pkt->req != memReq[tid]) {
338 delete pkt;
339 return;
340 }
341
342 memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
343 fetchBufferValid[tid] = true;
344
345 // Wake up the CPU (if it went to sleep and was waiting on
346 // this completion event).
347 cpu->wakeCPU();
348
349 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
350 tid);
351
353
354 // Only switch to IcacheAccessComplete if we're not stalled as well.
355 if (checkStall(tid)) {
356 fetchStatus[tid] = Blocked;
357 } else {
359 }
360
361 pkt->req->setAccessLatency();
362 cpu->ppInstAccessComplete->notify(pkt);
363 // Reset the mem req to NULL.
364 delete pkt;
365 memReq[tid] = NULL;
366}
367
368void
370{
371 for (ThreadID i = 0; i < numThreads; ++i) {
372 stalls[i].decode = false;
373 stalls[i].drain = false;
374 }
375}
376
377void
379{
380 assert(isDrained());
381 assert(retryPkt == NULL);
382 assert(retryTid == InvalidThreadID);
383 assert(!cacheBlocked);
384 assert(!interruptPending);
385
386 for (ThreadID i = 0; i < numThreads; ++i) {
387 assert(!memReq[i]);
388 assert(fetchStatus[i] == Idle || stalls[i].drain);
389 }
390
392}
393
394bool
396{
397 /* Make sure that threads are either idle of that the commit stage
398 * has signaled that draining has completed by setting the drain
399 * stall flag. This effectively forces the pipeline to be disabled
400 * until the whole system is drained (simulation may continue to
401 * drain other components).
402 */
403 for (ThreadID i = 0; i < numThreads; ++i) {
404 // Verify fetch queues are drained
405 if (!fetchQueue[i].empty())
406 return false;
407
408 // Return false if not idle or drain stalled
409 if (fetchStatus[i] != Idle) {
410 if (fetchStatus[i] == Blocked && stalls[i].drain)
411 continue;
412 else
413 return false;
414 }
415 }
416
417 /* The pipeline might start up again in the middle of the drain
418 * cycle if the finish translation event is scheduled, so make
419 * sure that's not the case.
420 */
422}
423
424void
426{
427 assert(cpu->getInstPort().isConnected());
428 resetStage();
429
430}
431
432void
434{
435 assert(cpu->isDraining());
436 assert(!stalls[tid].drain);
437 DPRINTF(Drain, "%i: Thread drained.\n", tid);
438 stalls[tid].drain = true;
439}
440
441void
443{
444 DPRINTF(Fetch, "Waking up from quiesce\n");
445 // Hopefully this is safe
446 // @todo: Allow other threads to wake from quiesce.
447 fetchStatus[0] = Running;
448}
449
450void
452{
453 if (_status == Inactive) {
454 DPRINTF(Activity, "Activating stage.\n");
455
457
458 _status = Active;
459 }
460}
461
462void
464{
465 if (_status == Active) {
466 DPRINTF(Activity, "Deactivating stage.\n");
467
469
471 }
472}
473
474void
476{
477 // Update priority list
478 auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
479 if (thread_it != priorityList.end()) {
480 priorityList.erase(thread_it);
481 }
482}
483
484bool
486{
487 // Do branch prediction check here.
488 // A bit of a misnomer...next_PC is actually the current PC until
489 // this function updates it.
490 bool predict_taken;
491
492 if (!inst->isControl()) {
493 inst->staticInst->advancePC(next_pc);
494 inst->setPredTarg(next_pc);
495 inst->setPredTaken(false);
496 return false;
497 }
498
499 ThreadID tid = inst->threadNumber;
500 predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
501 next_pc, tid);
502
503 if (predict_taken) {
504 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
505 "predicted to be taken to %s\n",
506 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
507 } else {
508 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
509 "predicted to be not taken\n",
510 tid, inst->seqNum, inst->pcState().instAddr());
511 }
512
513 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
514 "predicted to go to %s\n",
515 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
516 inst->setPredTarg(next_pc);
517 inst->setPredTaken(predict_taken);
518
519 cpu->fetchStats[tid]->numBranches++;
520
521 if (predict_taken) {
523 }
524
525 return predict_taken;
526}
527
528bool
530{
531 Fault fault = NoFault;
532
533 assert(!cpu->switchedOut());
534
535 // @todo: not sure if these should block translation.
536 //AlphaDep
537 if (cacheBlocked) {
538 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
539 tid);
540 return false;
541 } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
542 // Hold off fetch from getting new instructions when:
543 // Cache is blocked, or
544 // while an interrupt is pending and we're not in PAL mode, or
545 // fetch is switched out.
546 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
547 tid);
548 return false;
549 }
550
551 // Align the fetch address to the start of a fetch buffer segment.
552 Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
553
554 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
555 tid, fetchBufferBlockPC, vaddr);
556
557 // Setup the memReq to do a read of the first instruction's address.
558 // Set the appropriate read size and flags as well.
559 // Build request here.
560 RequestPtr mem_req = std::make_shared<Request>(
561 fetchBufferBlockPC, fetchBufferSize,
563 cpu->thread[tid]->contextId());
564
565 mem_req->taskId(cpu->taskId());
566
567 memReq[tid] = mem_req;
568
569 // Initiate translation of the icache block
570 fetchStatus[tid] = ItlbWait;
571 FetchTranslation *trans = new FetchTranslation(this);
572 cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
573 trans, BaseMMU::Execute);
574 return true;
575}
576
577void
578Fetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
579{
580 ThreadID tid = cpu->contextToThread(mem_req->contextId());
581 Addr fetchBufferBlockPC = mem_req->getVaddr();
582
583 assert(!cpu->switchedOut());
584
585 // Wake up CPU if it was idle
586 cpu->wakeCPU();
587
588 if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
589 mem_req->getVaddr() != memReq[tid]->getVaddr()) {
590 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
591 tid);
593 return;
594 }
595
596
597 // If translation was successful, attempt to read the icache block.
598 if (fault == NoFault) {
599 // Check that we're not going off into random memory
600 // If we have, just wait around for commit to squash something and put
601 // us on the right track
602 if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
603 warn("Address %#x is outside of physical memory, stopping fetch\n",
604 mem_req->getPaddr());
605 fetchStatus[tid] = NoGoodAddr;
606 memReq[tid] = NULL;
607 return;
608 }
609
610 // Build packet here.
611 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
612 data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
613
614 fetchBufferPC[tid] = fetchBufferBlockPC;
615 fetchBufferValid[tid] = false;
616 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
617
619
620 // Access the cache.
621 if (!icachePort.sendTimingReq(data_pkt)) {
622 assert(retryPkt == NULL);
623 assert(retryTid == InvalidThreadID);
624 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
625
627 retryPkt = data_pkt;
628 retryTid = tid;
629 cacheBlocked = true;
630 } else {
631 DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
632 DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
633 "response.\n", tid);
634 lastIcacheStall[tid] = curTick();
636 // Notify Fetch Request probe when a packet containing a fetch
637 // request is successfully sent
638 ppFetchRequestSent->notify(mem_req);
639 }
640 } else {
641 // Don't send an instruction to decode if we can't handle it.
642 if (!(numInst < fetchWidth) ||
643 !(fetchQueue[tid].size() < fetchQueueSize)) {
648 cpu->clockEdge(Cycles(1)));
649 return;
650 }
652 "[tid:%i] Got back req with addr %#x but expected %#x\n",
653 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
654 // Translation faulted, icache request won't be sent.
655 memReq[tid] = NULL;
656
657 // Send the fault to commit. This thread will not do anything
658 // until commit handles the fault. The only other way it can
659 // wake up is if a squash comes along and changes the PC.
660 const PCStateBase &fetch_pc = *pc[tid];
661
662 DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
663 // We will use a nop in ordier to carry the fault.
664 DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
665 fetch_pc, fetch_pc, false);
666 instruction->setNotAnInst();
667
668 instruction->setPredTarg(fetch_pc);
669 instruction->fault = fault;
670 wroteToTimeBuffer = true;
671
672 DPRINTF(Activity, "Activity this cycle.\n");
674
676
677 DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
678 DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
679 tid, fault->name(), *pc[tid]);
680 }
682}
683
684void
685Fetch::doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst,
686 ThreadID tid)
687{
688 DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
689 tid, new_pc);
690
691 set(pc[tid], new_pc);
692 fetchOffset[tid] = 0;
693 if (squashInst && squashInst->pcState().instAddr() == new_pc.instAddr() &&
694 !squashInst->isLastMicroop())
695 macroop[tid] = squashInst->macroop;
696 else
697 macroop[tid] = NULL;
698 decoder[tid]->reset();
699
700 // Clear the icache miss if it's outstanding.
701 if (fetchStatus[tid] == IcacheWaitResponse) {
702 DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
703 tid);
704 memReq[tid] = NULL;
705 } else if (fetchStatus[tid] == ItlbWait) {
706 DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
707 tid);
708 memReq[tid] = NULL;
709 }
710
711 // Get rid of the retrying packet if it was from this thread.
712 if (retryTid == tid) {
713 assert(cacheBlocked);
714 if (retryPkt) {
715 delete retryPkt;
716 }
717 retryPkt = NULL;
719 }
720
721 fetchStatus[tid] = Squashing;
722
723 // Empty fetch queue
724 fetchQueue[tid].clear();
725
726 // microops are being squashed, it is not known wheather the
727 // youngest non-squashed microop was marked delayed commit
728 // or not. Setting the flag to true ensures that the
729 // interrupts are not handled when they cannot be, though
730 // some opportunities to handle interrupts may be missed.
731 delayedCommit[tid] = true;
732
734}
735
736void
737Fetch::squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst,
738 const InstSeqNum seq_num, ThreadID tid)
739{
740 DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
741
742 doSquash(new_pc, squashInst, tid);
743
744 // Tell the CPU to remove any instructions that are in flight between
745 // fetch and decode.
746 cpu->removeInstsUntil(seq_num, tid);
747}
748
749bool
751{
752 bool ret_val = false;
753
754 if (stalls[tid].drain) {
755 assert(cpu->isDraining());
756 DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
757 ret_val = true;
758 }
759
760 return ret_val;
761}
762
765{
766 //Check Running
769
770 while (threads != end) {
771 ThreadID tid = *threads++;
772
773 if (fetchStatus[tid] == Running ||
774 fetchStatus[tid] == Squashing ||
776
777 if (_status == Inactive) {
778 DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
779
780 if (fetchStatus[tid] == IcacheAccessComplete) {
781 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
782 "completion\n",tid);
783 }
784
786 }
787
788 return Active;
789 }
790 }
791
792 // Stage is switching from active to inactive, notify CPU of it.
793 if (_status == Active) {
794 DPRINTF(Activity, "Deactivating stage.\n");
795
797 }
798
799 return Inactive;
800}
801
802void
803Fetch::squash(const PCStateBase &new_pc, const InstSeqNum seq_num,
804 DynInstPtr squashInst, ThreadID tid)
805{
806 DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
807
808 doSquash(new_pc, squashInst, tid);
809
810 // Tell the CPU to remove any instructions that are not in the ROB.
812}
813
814void
816{
819 bool status_change = false;
820
821 wroteToTimeBuffer = false;
822
823 for (ThreadID i = 0; i < numThreads; ++i) {
824 issuePipelinedIfetch[i] = false;
825 }
826
827 while (threads != end) {
828 ThreadID tid = *threads++;
829
830 // Check the signals for each thread to determine the proper status
831 // for each thread.
832 bool updated_status = checkSignalsAndUpdate(tid);
833 status_change = status_change || updated_status;
834 }
835
836 DPRINTF(Fetch, "Running stage.\n");
837
838 if (FullSystem) {
839 if (fromCommit->commitInfo[0].interruptPending) {
840 interruptPending = true;
841 }
842
843 if (fromCommit->commitInfo[0].clearInterrupt) {
844 interruptPending = false;
845 }
846 }
847
849 threadFetched++) {
850 // Fetch each of the actively fetching threads.
851 fetch(status_change);
852 }
853
854 // Record number of instructions fetched this cycle for distribution.
856
857 if (status_change) {
858 // Change the fetch stage status if there was a status change.
860 }
861
862 // Issue the next I-cache request if possible.
863 for (ThreadID i = 0; i < numThreads; ++i) {
864 if (issuePipelinedIfetch[i]) {
866 }
867 }
868
869 // Send instructions enqueued into the fetch queue to decode.
870 // Limit rate by fetchWidth. Stall if decode is stalled.
871 unsigned insts_to_decode = 0;
872 unsigned available_insts = 0;
873
874 for (auto tid : *activeThreads) {
875 if (!stalls[tid].decode) {
876 available_insts += fetchQueue[tid].size();
877 }
878 }
879
880 // Pick a random thread to start trying to grab instructions from
881 auto tid_itr = activeThreads->begin();
882 std::advance(tid_itr,
883 rng->random<uint8_t>(0, activeThreads->size() - 1));
884
885 while (available_insts != 0 && insts_to_decode < decodeWidth) {
886 ThreadID tid = *tid_itr;
887 if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
888 const auto& inst = fetchQueue[tid].front();
889 toDecode->insts[toDecode->size++] = inst;
890 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
891 "from fetch queue. Fetch queue size: %i.\n",
892 tid, inst->seqNum, fetchQueue[tid].size());
893
894 wroteToTimeBuffer = true;
895 fetchQueue[tid].pop_front();
896 insts_to_decode++;
897 available_insts--;
898 }
899
900 tid_itr++;
901 // Wrap around if at end of active threads list
902 if (tid_itr == activeThreads->end())
903 tid_itr = activeThreads->begin();
904 }
905
906 // If there was activity this cycle, inform the CPU of it.
907 if (wroteToTimeBuffer) {
908 DPRINTF(Activity, "Activity this cycle.\n");
910 }
911
912 // Reset the number of the instruction we've fetched.
913 numInst = 0;
914}
915
916bool
918{
919 // Update the per thread stall statuses.
920 if (fromDecode->decodeBlock[tid]) {
921 stalls[tid].decode = true;
922 }
923
924 if (fromDecode->decodeUnblock[tid]) {
925 assert(stalls[tid].decode);
926 assert(!fromDecode->decodeBlock[tid]);
927 stalls[tid].decode = false;
928 }
929
930 // Check squash signals from commit.
931 if (fromCommit->commitInfo[tid].squash) {
932
933 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
934 "from commit.\n",tid);
935 // In any case, squash.
936 squash(*fromCommit->commitInfo[tid].pc,
937 fromCommit->commitInfo[tid].doneSeqNum,
938 fromCommit->commitInfo[tid].squashInst, tid);
939
940 // If it was a branch mispredict on a control instruction, update the
941 // branch predictor with that instruction, otherwise just kill the
942 // invalid state we generated in after sequence number
943 if (fromCommit->commitInfo[tid].mispredictInst &&
944 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
945 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
946 *fromCommit->commitInfo[tid].pc,
947 fromCommit->commitInfo[tid].branchTaken, tid);
948 } else {
949 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
950 tid);
951 }
952
953 return true;
954 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
955 // Update the branch predictor if it wasn't a squashed instruction
956 // that was broadcasted.
957 branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
958 }
959
960 // Check squash signals from decode.
961 if (fromDecode->decodeInfo[tid].squash) {
962 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
963 "from decode.\n",tid);
964
965 // Update the branch predictor.
966 if (fromDecode->decodeInfo[tid].branchMispredict) {
967 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
968 *fromDecode->decodeInfo[tid].nextPC,
969 fromDecode->decodeInfo[tid].branchTaken, tid);
970 } else {
971 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
972 tid);
973 }
974
975 if (fetchStatus[tid] != Squashing) {
976
977 DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
978 *fromDecode->decodeInfo[tid].nextPC);
979 // Squash unless we're already squashing
980 squashFromDecode(*fromDecode->decodeInfo[tid].nextPC,
981 fromDecode->decodeInfo[tid].squashInst,
982 fromDecode->decodeInfo[tid].doneSeqNum,
983 tid);
984
985 return true;
986 }
987 }
988
989 if (checkStall(tid) &&
992 fetchStatus[tid] != ItlbWait &&
993 fetchStatus[tid] != QuiescePending) {
994 DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
995
996 fetchStatus[tid] = Blocked;
997
998 return true;
999 }
1000
1001 if (fetchStatus[tid] == Blocked ||
1002 fetchStatus[tid] == Squashing) {
1003 // Switch status to running if fetch isn't being told to block or
1004 // squash this cycle.
1005 DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
1006 tid);
1007
1008 fetchStatus[tid] = Running;
1009
1010 return true;
1011 }
1012
1013 // If we've reached this point, we have not gotten any signals that
1014 // cause fetch to change its status. Fetch remains the same as before.
1015 return false;
1016}
1017
1020 StaticInstPtr curMacroop, const PCStateBase &this_pc,
1021 const PCStateBase &next_pc, bool trace)
1022{
1023 // Get a sequence number.
1025
1026 DynInst::Arrays arrays;
1027 arrays.numSrcs = staticInst->numSrcRegs();
1028 arrays.numDests = staticInst->numDestRegs();
1029
1030 // Create a new DynInst from the instruction fetched.
1031 DynInstPtr instruction = new (arrays) DynInst(
1032 arrays, staticInst, curMacroop, this_pc, next_pc, seq, cpu);
1033 instruction->setTid(tid);
1034
1035 instruction->setThreadState(cpu->thread[tid]);
1036
1037 DPRINTF(Fetch, "[tid:%i] Instruction PC %s created [sn:%lli].\n",
1038 tid, this_pc, seq);
1039
1040 DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
1041 instruction->staticInst->disassemble(this_pc.instAddr()));
1042
1043#if TRACING_ON
1044 if (trace) {
1045 instruction->traceData =
1047 instruction->staticInst, this_pc, curMacroop);
1048 }
1049#else
1050 instruction->traceData = NULL;
1051#endif
1052
1053 // Add instruction to the CPU's list of instructions.
1054 instruction->setInstListIt(cpu->addInst(instruction));
1055
1056 // Write the instruction to the first slot in the queue
1057 // that heads to decode.
1058 assert(numInst < fetchWidth);
1059 fetchQueue[tid].push_back(instruction);
1060 assert(fetchQueue[tid].size() <= fetchQueueSize);
1061 DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
1062 tid, fetchQueue[tid].size(), fetchQueueSize);
1063 //toDecode->insts[toDecode->size++] = instruction;
1064
1065 // Keep track of if we can take an interrupt at this boundary
1066 delayedCommit[tid] = instruction->isDelayedCommit();
1067
1068 return instruction;
1069}
1070
1071void
1072Fetch::fetch(bool &status_change)
1073{
1075 // Start actual fetch
1078
1079 assert(!cpu->switchedOut());
1080
1081 if (tid == InvalidThreadID) {
1082 // Breaks looping condition in tick()
1084
1085 if (numThreads == 1) { // @todo Per-thread stats
1086 profileStall(0);
1087 }
1088
1089 return;
1090 }
1091
1092 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1093
1094 // The current PC.
1095 PCStateBase &this_pc = *pc[tid];
1096
1097 Addr pcOffset = fetchOffset[tid];
1098 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1099
1100 bool inRom = isRomMicroPC(this_pc.microPC());
1101
1102 // If returning from the delay of a cache miss, then update the status
1103 // to running, otherwise do the cache access. Possibly move this up
1104 // to tick() function.
1105 if (fetchStatus[tid] == IcacheAccessComplete) {
1106 DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
1107
1108 fetchStatus[tid] = Running;
1109 status_change = true;
1110 } else if (fetchStatus[tid] == Running) {
1111 // Align the fetch PC so its at the start of a fetch buffer segment.
1112 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1113
1114 // If buffer is no longer valid or fetchAddr has moved to point
1115 // to the next cache block, AND we have no remaining ucode
1116 // from a macro-op, then start fetch from icache.
1117 if (!(fetchBufferValid[tid] &&
1118 fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom &&
1119 !macroop[tid]) {
1120 DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
1121 "instruction, starting at PC %s.\n", tid, this_pc);
1122
1123 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1124
1125 if (fetchStatus[tid] == IcacheWaitResponse) {
1126 cpu->fetchStats[tid]->icacheStallCycles++;
1127 }
1128 else if (fetchStatus[tid] == ItlbWait)
1130 else
1132 return;
1133 } else if (checkInterrupt(this_pc.instAddr()) &&
1134 !delayedCommit[tid]) {
1135 // Stall CPU if an interrupt is posted and we're not issuing
1136 // an delayed commit micro-op currently (delayed commit
1137 // instructions are not interruptable by interrupts, only faults)
1139 DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
1140 return;
1141 }
1142 } else {
1143 if (fetchStatus[tid] == Idle) {
1145 DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
1146 }
1147
1148 // Status is Idle, so fetch should do nothing.
1149 return;
1150 }
1151
1153
1154 std::unique_ptr<PCStateBase> next_pc(this_pc.clone());
1155
1156 StaticInstPtr staticInst = NULL;
1157 StaticInstPtr curMacroop = macroop[tid];
1158
1159 // If the read of the first instruction was successful, then grab the
1160 // instructions from the rest of the cache line and put them into the
1161 // queue heading to decode.
1162
1163 DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
1164 "decode.\n", tid);
1165
1166 // Need to keep track of whether or not a predicted branch
1167 // ended this fetch block.
1168 bool predictedBranch = false;
1169
1170 // Need to halt fetch if quiesce instruction detected
1171 bool quiesce = false;
1172
1173 const unsigned numInsts = fetchBufferSize / instSize;
1174 unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1175
1176 auto *dec_ptr = decoder[tid];
1177 const Addr pc_mask = dec_ptr->pcMask();
1178
1179 // Loop through instruction memory from the cache.
1180 // Keep issuing while fetchWidth is available and branch is not
1181 // predicted taken
1182 while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1183 && !predictedBranch && !quiesce) {
1184 // We need to process more memory if we aren't going to get a
1185 // StaticInst from the rom, the current macroop, or what's already
1186 // in the decoder.
1187 bool needMem = !inRom && !curMacroop && !dec_ptr->instReady();
1188 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1189 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1190
1191 if (needMem) {
1192 // If buffer is no longer valid or fetchAddr has moved to point
1193 // to the next cache block then start fetch from icache.
1194 if (!fetchBufferValid[tid] ||
1195 fetchBufferBlockPC != fetchBufferPC[tid])
1196 break;
1197
1198 if (blkOffset >= numInsts) {
1199 // We need to process more memory, but we've run out of the
1200 // current block.
1201 break;
1202 }
1203
1204 memcpy(dec_ptr->moreBytesPtr(),
1205 fetchBuffer[tid] + blkOffset * instSize, instSize);
1206 decoder[tid]->moreBytes(this_pc, fetchAddr);
1207
1208 if (dec_ptr->needMoreBytes()) {
1209 blkOffset++;
1210 fetchAddr += instSize;
1211 pcOffset += instSize;
1212 }
1213 }
1214
1215 // Extract as many instructions and/or microops as we can from
1216 // the memory we've processed so far.
1217 do {
1218 if (!(curMacroop || inRom)) {
1219 if (dec_ptr->instReady()) {
1220 staticInst = dec_ptr->decode(this_pc);
1221
1222 // Increment stat of fetched instructions.
1223 cpu->fetchStats[tid]->numInsts++;
1224
1225 if (staticInst->isMacroop()) {
1226 curMacroop = staticInst;
1227 } else {
1228 pcOffset = 0;
1229 }
1230 } else {
1231 // We need more bytes for this instruction so blkOffset and
1232 // pcOffset will be updated
1233 break;
1234 }
1235 }
1236 // Whether we're moving to a new macroop because we're at the
1237 // end of the current one, or the branch predictor incorrectly
1238 // thinks we are...
1239 bool newMacro = false;
1240 if (curMacroop || inRom) {
1241 if (inRom) {
1242 staticInst = dec_ptr->fetchRomMicroop(
1243 this_pc.microPC(), curMacroop);
1244 } else {
1245 staticInst = curMacroop->fetchMicroop(this_pc.microPC());
1246 }
1247 newMacro |= staticInst->isLastMicroop();
1248 }
1249
1250 DynInstPtr instruction = buildInst(
1251 tid, staticInst, curMacroop, this_pc, *next_pc, true);
1252
1253 ppFetch->notify(instruction);
1254 numInst++;
1255
1256#if TRACING_ON
1257 if (debug::O3PipeView) {
1258 instruction->fetchTick = curTick();
1259 }
1260#endif
1261
1262 set(next_pc, this_pc);
1263
1264 // If we're branching after this instruction, quit fetching
1265 // from the same block.
1266 predictedBranch |= this_pc.branching();
1267 predictedBranch |= lookupAndUpdateNextPC(instruction, *next_pc);
1268 if (predictedBranch) {
1269 DPRINTF(Fetch, "Branch detected with PC = %s\n", this_pc);
1270 }
1271
1272 newMacro |= this_pc.instAddr() != next_pc->instAddr();
1273
1274 // Move to the next instruction, unless we have a branch.
1275 set(this_pc, *next_pc);
1276 inRom = isRomMicroPC(this_pc.microPC());
1277
1278 if (newMacro) {
1279 fetchAddr = this_pc.instAddr() & pc_mask;
1280 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1281 pcOffset = 0;
1282 curMacroop = NULL;
1283 }
1284
1285 if (instruction->isQuiesce()) {
1286 DPRINTF(Fetch,
1287 "Quiesce instruction encountered, halting fetch!\n");
1289 status_change = true;
1290 quiesce = true;
1291 break;
1292 }
1293 } while ((curMacroop || dec_ptr->instReady()) &&
1294 numInst < fetchWidth &&
1295 fetchQueue[tid].size() < fetchQueueSize);
1296
1297 // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1298 // or not.
1299 inRom = isRomMicroPC(this_pc.microPC());
1300 }
1301
1302 if (predictedBranch) {
1303 DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
1304 "instruction encountered.\n", tid);
1305 } else if (numInst >= fetchWidth) {
1306 DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
1307 "for this cycle.\n", tid);
1308 } else if (blkOffset >= fetchBufferSize) {
1309 DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
1310 "fetch buffer.\n", tid);
1311 }
1312
1313 macroop[tid] = curMacroop;
1314 fetchOffset[tid] = pcOffset;
1315
1316 if (numInst > 0) {
1317 wroteToTimeBuffer = true;
1318 }
1319
1320 // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1321 // a state that would preclude fetching
1322 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1323 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1324 issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1326 fetchStatus[tid] != ItlbWait &&
1327 fetchStatus[tid] != IcacheWaitRetry &&
1328 fetchStatus[tid] != QuiescePending &&
1329 !curMacroop;
1330}
1331
1332void
1334{
1335 if (retryPkt != NULL) {
1336 assert(cacheBlocked);
1337 assert(retryTid != InvalidThreadID);
1339
1342 // Notify Fetch Request probe when a retryPkt is successfully sent.
1343 // Note that notify must be called before retryPkt is set to NULL.
1345 retryPkt = NULL;
1347 cacheBlocked = false;
1348 }
1349 } else {
1350 assert(retryTid == InvalidThreadID);
1351 // Access has been squashed since it was sent out. Just clear
1352 // the cache being blocked.
1353 cacheBlocked = false;
1354 }
1355}
1356
1358// //
1359// SMT FETCH POLICY MAINTAINED HERE //
1360// //
1364{
1365 if (numThreads > 1) {
1366 switch (fetchPolicy) {
1367 case SMTFetchPolicy::RoundRobin:
1368 return roundRobin();
1369 case SMTFetchPolicy::IQCount:
1370 return iqCount();
1371 case SMTFetchPolicy::LSQCount:
1372 return lsqCount();
1373 case SMTFetchPolicy::Branch:
1374 return branchCount();
1375 default:
1376 return InvalidThreadID;
1377 }
1378 } else {
1380 if (thread == activeThreads->end()) {
1381 return InvalidThreadID;
1382 }
1383
1384 ThreadID tid = *thread;
1385
1386 if (fetchStatus[tid] == Running ||
1388 fetchStatus[tid] == Idle) {
1389 return tid;
1390 } else {
1391 return InvalidThreadID;
1392 }
1393 }
1394}
1395
1396
1399{
1402
1403 ThreadID high_pri;
1404
1405 while (pri_iter != end) {
1406 high_pri = *pri_iter;
1407
1408 assert(high_pri <= numThreads);
1409
1410 if (fetchStatus[high_pri] == Running ||
1411 fetchStatus[high_pri] == IcacheAccessComplete ||
1412 fetchStatus[high_pri] == Idle) {
1413
1414 priorityList.erase(pri_iter);
1415 priorityList.push_back(high_pri);
1416
1417 return high_pri;
1418 }
1419
1420 pri_iter++;
1421 }
1422
1423 return InvalidThreadID;
1424}
1425
1428{
1429 //sorted from lowest->highest
1430 std::priority_queue<unsigned, std::vector<unsigned>,
1431 std::greater<unsigned> > PQ;
1432 std::map<unsigned, ThreadID> threadMap;
1433
1436
1437 while (threads != end) {
1438 ThreadID tid = *threads++;
1439 unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1440
1441 //we can potentially get tid collisions if two threads
1442 //have the same iqCount, but this should be rare.
1443 PQ.push(iqCount);
1444 threadMap[iqCount] = tid;
1445 }
1446
1447 while (!PQ.empty()) {
1448 ThreadID high_pri = threadMap[PQ.top()];
1449
1450 if (fetchStatus[high_pri] == Running ||
1451 fetchStatus[high_pri] == IcacheAccessComplete ||
1452 fetchStatus[high_pri] == Idle)
1453 return high_pri;
1454 else
1455 PQ.pop();
1456
1457 }
1458
1459 return InvalidThreadID;
1460}
1461
1464{
1465 //sorted from lowest->highest
1466 std::priority_queue<unsigned, std::vector<unsigned>,
1467 std::greater<unsigned> > PQ;
1468 std::map<unsigned, ThreadID> threadMap;
1469
1472
1473 while (threads != end) {
1474 ThreadID tid = *threads++;
1475 unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1476
1477 //we can potentially get tid collisions if two threads
1478 //have the same iqCount, but this should be rare.
1479 PQ.push(ldstqCount);
1480 threadMap[ldstqCount] = tid;
1481 }
1482
1483 while (!PQ.empty()) {
1484 ThreadID high_pri = threadMap[PQ.top()];
1485
1486 if (fetchStatus[high_pri] == Running ||
1487 fetchStatus[high_pri] == IcacheAccessComplete ||
1488 fetchStatus[high_pri] == Idle)
1489 return high_pri;
1490 else
1491 PQ.pop();
1492 }
1493
1494 return InvalidThreadID;
1495}
1496
1499{
1500 panic("Branch Count Fetch policy unimplemented\n");
1501 return InvalidThreadID;
1502}
1503
1504void
1506{
1507 if (!issuePipelinedIfetch[tid]) {
1508 return;
1509 }
1510
1511 // The next PC to access.
1512 const PCStateBase &this_pc = *pc[tid];
1513
1514 if (isRomMicroPC(this_pc.microPC())) {
1515 return;
1516 }
1517
1518 Addr pcOffset = fetchOffset[tid];
1519 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1520
1521 // Align the fetch PC so its at the start of a fetch buffer segment.
1522 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1523
1524 // Unless buffer already got the block, fetch it from icache.
1525 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1526 DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
1527 "starting at PC %s.\n", tid, this_pc);
1528
1529 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1530 }
1531}
1532
1533void
1535{
1536 DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1537
1538 // @todo Per-thread stats
1539
1540 if (stalls[tid].drain) {
1542 DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1543 } else if (activeThreads->empty()) {
1545 DPRINTF(Fetch, "Fetch has no active thread!\n");
1546 } else if (fetchStatus[tid] == Blocked) {
1548 DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
1549 } else if (fetchStatus[tid] == Squashing) {
1551 DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
1552 } else if (fetchStatus[tid] == IcacheWaitResponse) {
1553 cpu->fetchStats[tid]->icacheStallCycles++;
1554 DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
1555 tid);
1556 } else if (fetchStatus[tid] == ItlbWait) {
1558 DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
1559 "finish!\n", tid);
1560 } else if (fetchStatus[tid] == TrapPending) {
1562 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
1563 tid);
1564 } else if (fetchStatus[tid] == QuiescePending) {
1566 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
1567 "instruction!\n", tid);
1568 } else if (fetchStatus[tid] == IcacheWaitRetry) {
1570 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
1571 tid);
1572 } else if (fetchStatus[tid] == NoGoodAddr) {
1573 DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
1574 tid);
1575 } else {
1576 DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
1577 "(Status: %i)\n",
1578 tid, fetchStatus[tid]);
1579 }
1580}
1581
1582bool
1584{
1585 DPRINTF(O3CPU, "Fetch unit received timing\n");
1586 // We shouldn't ever get a cacheable block in Modified state
1587 assert(pkt->req->isUncacheable() ||
1588 !(pkt->cacheResponding() && !pkt->hasSharers()));
1589 fetch->processCacheCompletion(pkt);
1590
1591 return true;
1592}
1593
1594void
1596{
1597 fetch->recvReqRetry();
1598}
1599
1600} // namespace o3
1601} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
RequestorID instRequestorId() const
Reads this CPU's unique instruction requestor ID.
Definition base.hh:220
uint32_t taskId() const
Get cpu task id.
Definition base.hh:236
trace::InstTracer * getTracer()
Provide access to the tracer pointer.
Definition base.hh:299
std::vector< std::unique_ptr< FetchCPUStats > > fetchStats
Definition base.hh:846
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition base.hh:326
bool switchedOut() const
Determine if the CPU is switched out.
Definition base.hh:400
virtual void translateTiming(const RequestPtr &req, ThreadContext *tc, Translation *translation, Mode mode)
Definition mmu.cc:118
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
size_t moreBytesSize() const
Definition decoder.hh:96
virtual void reset()
Definition decoder.hh:63
virtual void moreBytes(const PCStateBase &pc, Addr fetchPC)=0
Feed data to the decoder.
Addr pcMask() const
Definition decoder.hh:97
virtual std::string name() const
Definition named.hh:47
virtual bool branching() const =0
MicroPC microPC() const
Returns the current micropc.
Definition pcstate.hh:119
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
Definition pcstate.hh:108
virtual PCStateBase * clone() const =0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
bool cacheResponding() const
Definition packet.hh:659
bool hasSharers() const
Definition packet.hh:686
bool isConnected() const
Is this port currently connected to a peer?
Definition port.hh:133
ProbePointArg generates a point for the class of Arg.
Definition probe.hh:264
void notify(const Arg &arg)
called at the ProbePoint call site, passes arg to each listener.
Definition probe.hh:313
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603
@ INST_FETCH
The request was an instruction fetch.
Definition request.hh:115
uint8_t numSrcRegs() const
Number of source registers.
virtual StaticInstPtr fetchMicroop(MicroPC upc) const
Return the microop that goes with a particular micropc.
uint8_t numDestRegs() const
Number of destination registers.
bool isMacroop() const
bool isLastMicroop() const
bool isMemAddr(Addr addr) const
Check if a physical address is within a range of a memory that is part of the global address map.
Definition system.cc:288
wire getWire(int idx)
Definition timebuf.hh:232
void update(const InstSeqNum &done_sn, ThreadID tid)
Tells the branch predictor to commit any updates until the given sequence number.
bool predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, PCStateBase &pc, ThreadID tid)
Predicts whether or not the instruction is a taken branch, and the target of the branch if it is take...
Definition bpred_unit.cc:99
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition bpred_unit.cc:89
void squash(const InstSeqNum &squashed_sn, ThreadID tid)
Squashes all outstanding updates until a given sequence number.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
ListIt addInst(const DynInstPtr &inst)
Function to add instruction onto the head of the list of the instructions.
Definition cpu.cc:1133
ProbePointArg< PacketPtr > * ppInstAccessComplete
Definition cpu.hh:176
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition cpu.hh:533
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition cpu.hh:488
Port & getInstPort() override
Used by the fetch unit to get a hold of the instruction port.
Definition cpu.hh:564
void removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
Remove all instructions younger than the given sequence number.
Definition cpu.cc:1217
bool isDraining() const
Is the CPU draining?
Definition cpu.hh:238
void removeInstsNotInROB(ThreadID tid)
Remove all instructions that are not currently in the ROB.
Definition cpu.cc:1173
void deactivateStage(const StageIdx idx)
Changes a stage's status to inactive within the activity recorder.
Definition cpu.hh:499
BaseMMU * mmu
Definition cpu.hh:110
System * system
Pointer to the system.
Definition cpu.hh:530
InstSeqNum getAndIncrementInstSeq()
Get the current instruction sequence number, and increment it.
Definition cpu.hh:284
void pcState(const PCStateBase &new_pc_state, ThreadID tid)
Sets the commit PC state of a specific thread.
Definition cpu.cc:1120
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition cpu.hh:515
void activateStage(const StageIdx idx)
Changes a stage's status to active within the activity recorder.
Definition cpu.hh:492
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition cpu.cc:1316
void setReq(const RequestPtr &_req)
Definition fetch.hh:142
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition fetch.cc:1583
IcachePort(Fetch *_fetch, CPU *_cpu)
Default constructor.
Definition fetch.cc:77
virtual void recvReqRetry()
Handles doing a retry of a failed fetch.
Definition fetch.cc:1595
Fetch class handles both single threaded and SMT fetch.
Definition fetch.hh:80
gem5::o3::Fetch::FetchStatGroup fetchStats
bool wroteToTimeBuffer
Variable that tracks if fetch has written to the time buffer this cycle.
Definition fetch.hh:433
void deactivateThread(ThreadID tid)
For priority-based fetch policies, need to keep update priorityList.
Definition fetch.cc:475
FetchStatus
Overall fetch status.
Definition fetch.hh:164
std::list< ThreadID > * activeThreads
List of Active Threads.
Definition fetch.hh:508
TimeBuffer< TimeStruct >::wire fromCommit
Wire to get commit's information from backwards time buffer.
Definition fetch.hh:409
Cycles renameToFetchDelay
Rename to fetch delay.
Definition fetch.hh:452
StaticInstPtr macroop[MaxThreads]
Definition fetch.hh:422
void fetch(bool &status_change)
Does the actual fetching of instructions and passing them on to the next stage.
Definition fetch.cc:1072
void takeOverFrom()
Takes over from another CPU's thread.
Definition fetch.cc:425
uint8_t * fetchBuffer[MaxThreads]
The fetch data that is being fetched and buffered.
Definition fetch.hh:487
void doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:685
Random::RandomPtr rng
Definition fetch.hh:204
TimeBuffer< FetchStruct >::wire toDecode
Wire used to write any information heading to decode.
Definition fetch.hh:413
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
Definition fetch.cc:249
bool lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &pc)
Looks up in the branch predictor to see if the next PC should be either next PC+=MachInst or a branch...
Definition fetch.cc:485
ThreadStatus fetchStatus[MaxThreads]
Per-thread status.
Definition fetch.hh:191
ThreadID numThreads
Number of threads.
Definition fetch.hh:511
TimeBuffer< TimeStruct >::wire fromDecode
Wire to get decode's information from backwards time buffer.
Definition fetch.hh:400
ProbePointArg< DynInstPtr > * ppFetch
Probe points.
Definition fetch.hh:200
TimeBuffer< TimeStruct >::wire fromRename
Wire to get rename's information from backwards time buffer.
Definition fetch.hh:403
void squash(const PCStateBase &new_pc, const InstSeqNum seq_num, DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:803
void squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:737
FetchStatus updateFetchStatus()
Updates overall fetch stage status; to be called at the end of each cycle.
Definition fetch.cc:764
ThreadID getFetchingThread()
Returns the appropriate thread to fetch, given the fetch policy.
Definition fetch.cc:1363
bool fetchBufferValid[MaxThreads]
Whether or not the fetch buffer data is valid.
Definition fetch.hh:499
void startupStage()
Initialize stage.
Definition fetch.cc:262
void pipelineIcacheAccesses(ThreadID tid)
Pipeline the next I-cache access to the current one.
Definition fetch.cc:1505
std::string name() const
Returns the name of fetch.
Definition fetch.cc:147
void wakeFromQuiesce()
Tells fetch to wake up from a quiesce instruction.
Definition fetch.cc:442
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
Definition fetch.cc:451
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
Definition fetch.cc:463
int numInst
Tracks how many instructions has been fetched this cycle.
Definition fetch.hh:436
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Fetches the cache line that contains the fetch PC.
Definition fetch.cc:529
Cycles decodeToFetchDelay
Decode to fetch delay.
Definition fetch.hh:449
bool issuePipelinedIfetch[MaxThreads]
Set to true if a pipelined I-cache request should be issued.
Definition fetch.hh:528
Addr fetchBufferAlignPC(Addr addr)
Align a PC to the start of a fetch buffer block.
Definition fetch.hh:355
FetchStatus _status
Fetch status.
Definition fetch.hh:188
bool delayedCommit[MaxThreads]
Can the fetch stage redirect from an interrupt on this instruction?
Definition fetch.hh:425
ThreadID threadFetched
Thread ID being fetched.
Definition fetch.hh:517
SMTFetchPolicy fetchPolicy
Fetch policy.
Definition fetch.hh:194
Addr cacheBlkSize
Cache block size.
Definition fetch.hh:476
branch_prediction::BPredUnit * branchPred
BPredUnit.
Definition fetch.hh:416
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition fetch.cc:378
unsigned fetchWidth
The width of fetch in instructions.
Definition fetch.hh:461
unsigned fetchQueueSize
The size of the fetch queue in micro-ops.
Definition fetch.hh:493
InstDecoder * decoder[MaxThreads]
The decoder.
Definition fetch.hh:361
TimeBuffer< TimeStruct >::wire fromIEW
Wire to get iew's information from backwards time buffer.
Definition fetch.hh:406
void regProbePoints()
Registers probes.
Definition fetch.cc:150
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
Definition fetch.cc:917
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
Definition fetch.cc:750
IcachePort icachePort
Instruction port.
Definition fetch.hh:525
@ IcacheAccessComplete
Definition fetch.hh:182
void setTimeBuffer(TimeBuffer< TimeStruct > *time_buffer)
Sets the main backwards communication time buffer pointer.
Definition fetch.cc:237
void processCacheCompletion(PacketPtr pkt)
Processes cache completion event.
Definition fetch.cc:326
ThreadID iqCount()
Returns the appropriate thread to fetch using the IQ count policy.
Definition fetch.cc:1427
Addr fetchBufferMask
Mask to align a fetch address to a fetch buffer boundary.
Definition fetch.hh:484
void recvReqRetry()
Handles retrying the fetch access.
Definition fetch.cc:1333
bool checkInterrupt(Addr pc)
Check if an interrupt is pending and that we need to handle.
Definition fetch.hh:308
Cycles iewToFetchDelay
IEW to fetch delay.
Definition fetch.hh:455
void resetStage()
Reset this pipeline stage.
Definition fetch.cc:292
Fetch(CPU *_cpu, const BaseO3CPUParams &params)
Fetch constructor.
Definition fetch.cc:82
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
Definition fetch.cc:433
Counter lastIcacheStall[MaxThreads]
Icache stall statistics.
Definition fetch.hh:505
int instSize
Size of instructions.
Definition fetch.hh:502
ProbePointArg< RequestPtr > * ppFetchRequestSent
To probe when a fetch request is successfully sent.
Definition fetch.hh:202
Cycles commitToFetchDelay
Commit to fetch delay.
Definition fetch.hh:458
RequestPtr memReq[MaxThreads]
Memory request used to access cache.
Definition fetch.hh:428
TimeBuffer< TimeStruct > * timeBuffer
Time buffer interface.
Definition fetch.hh:397
void profileStall(ThreadID tid)
Profile the reasons of fetch stall.
Definition fetch.cc:1534
ThreadID roundRobin()
Returns the appropriate thread to fetch using a round robin policy.
Definition fetch.cc:1398
Addr fetchBufferPC[MaxThreads]
The PC of the first instruction loaded into the fetch buffer.
Definition fetch.hh:490
void drainResume()
Resume after a drain.
Definition fetch.cc:369
void clearStates(ThreadID tid)
Clear all thread-specific states.
Definition fetch.cc:273
void finishTranslation(const Fault &fault, const RequestPtr &mem_req)
Definition fetch.cc:578
bool interruptPending
Checks if there is an interrupt pending.
Definition fetch.hh:522
std::unique_ptr< PCStateBase > pc[MaxThreads]
Definition fetch.hh:418
ThreadID lsqCount()
Returns the appropriate thread to fetch using the LSQ count policy.
Definition fetch.cc:1463
Stalls stalls[MaxThreads]
Tracks which stages are telling fetch to stall.
Definition fetch.hh:446
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, StaticInstPtr curMacroop, const PCStateBase &this_pc, const PCStateBase &next_pc, bool trace)
Definition fetch.cc:1019
bool isDrained() const
Has the stage drained?
Definition fetch.cc:395
Addr fetchOffset[MaxThreads]
Definition fetch.hh:420
std::deque< DynInstPtr > fetchQueue[MaxThreads]
Queue of fetched instructions.
Definition fetch.hh:496
PacketPtr retryPkt
The packet that is waiting to be retried.
Definition fetch.hh:470
std::list< ThreadID > priorityList
List that has the threads organized by priority.
Definition fetch.hh:197
FinishTranslationEvent finishTranslationEvent
Event used to delay fault generation of translation faults.
Definition fetch.hh:531
ThreadID retryTid
The thread that is waiting on the cache to tell fetch to retry.
Definition fetch.hh:473
void tick()
Ticks the fetch stage, processing all inputs signals and fetching as many instructions as possible.
Definition fetch.cc:815
ThreadID numFetchingThreads
Number of threads that are actively fetching.
Definition fetch.hh:514
unsigned fetchBufferSize
The size of the fetch buffer in bytes.
Definition fetch.hh:481
void setFetchQueue(TimeBuffer< FetchStruct > *fq_ptr)
Sets pointer to time buffer used to communicate to the next stage.
Definition fetch.cc:255
CPU * cpu
Pointer to the O3CPU.
Definition fetch.hh:394
unsigned decodeWidth
The width of decode in instructions.
Definition fetch.hh:464
bool cacheBlocked
Is the cache blocked? If so no threads can access it.
Definition fetch.hh:467
ThreadID branchCount()
Returns the appropriate thread to fetch using the branch count policy.
Definition fetch.cc:1498
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Statistics container.
Definition group.hh:93
virtual InstRecord * getInstRecord(Tick when, ThreadContext *tc, const StaticInstPtr staticInst, const PCStateBase &pc, const StaticInstPtr macroStaticInst=nullptr)=0
STL list class.
Definition stl.hh:51
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
ProbeManager * getProbeManager()
Get the probe manager for this object.
#define warn(...)
Definition logging.hh:256
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 12, 11 > set
static constexpr int MaxThreads
Definition limits.hh:38
static constexpr int MaxWidth
Definition limits.hh:37
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition root.cc:220
static bool isRomMicroPC(MicroPC upc)
Definition types.hh:166
StaticInstPtr nopStaticInstPtr
Pointer to a statically allocated generic "nop" instruction object.
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
Declaration of the Packet class.
statistics::Scalar icacheSquashes
Total number of outstanding icache accesses that were dropped due to a squash.
Definition fetch.hh:572
statistics::Scalar pendingDrainCycles
Total number of cycles spent in waiting for drains.
Definition fetch.hh:557
statistics::Scalar cacheLines
Stat for total number of fetched cache lines.
Definition fetch.hh:568
statistics::Scalar blockedCycles
Total number of cycles spent blocked.
Definition fetch.hh:553
statistics::Scalar idleCycles
Stat for total number of cycles spent blocked due to other stages in the pipeline.
Definition fetch.hh:551
statistics::Scalar predictedBranches
Stat for total number of predicted branches.
Definition fetch.hh:540
statistics::Scalar noActiveThreadStallCycles
Total number of stall cycles caused by no active threads to run.
Definition fetch.hh:559
statistics::Scalar pendingQuiesceStallCycles
Total number of stall cycles caused by pending quiesce instructions.
Definition fetch.hh:564
statistics::Scalar icacheWaitRetryStallCycles
Total number of stall cycles caused by I-cache wait retrys.
Definition fetch.hh:566
statistics::Scalar pendingTrapStallCycles
Total number of stall cycles caused by pending traps.
Definition fetch.hh:561
statistics::Scalar cycles
Stat for total number of cycles spent fetching.
Definition fetch.hh:542
statistics::Scalar miscStallCycles
Total number of cycles spent in any other state.
Definition fetch.hh:555
statistics::Scalar tlbCycles
Stat for total number of cycles spent waiting for translation.
Definition fetch.hh:546
statistics::Scalar squashCycles
Stat for total number of cycles spent squashing.
Definition fetch.hh:544
FetchStatGroup(CPU *cpu, Fetch *fetch)
Definition fetch.cc:158
statistics::Formula idleRate
Rate of how often fetch was idle.
Definition fetch.hh:580
statistics::Scalar tlbSquashes
Total number of outstanding tlb accesses that were dropped due to a squash.
Definition fetch.hh:576
statistics::Distribution nisnDist
Distribution of number of instructions fetched each cycle.
Definition fetch.hh:578
const std::string & name()
Definition trace.cc:48

Generated on Mon Jan 13 2025 04:28:31 for gem5 by doxygen 1.9.8