gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
fetch.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014 ARM Limited
3 * Copyright (c) 2012-2013 AMD
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/fetch.hh"
43
44#include <algorithm>
45#include <cstring>
46#include <list>
47#include <map>
48#include <queue>
49
50#include "arch/generic/tlb.hh"
51#include "base/types.hh"
52#include "cpu/base.hh"
53#include "cpu/exetrace.hh"
55#include "cpu/o3/cpu.hh"
56#include "cpu/o3/dyn_inst.hh"
57#include "cpu/o3/limits.hh"
58#include "debug/Activity.hh"
59#include "debug/Drain.hh"
60#include "debug/Fetch.hh"
61#include "debug/O3CPU.hh"
62#include "debug/O3PipeView.hh"
63#include "mem/packet.hh"
64#include "params/BaseO3CPU.hh"
65#include "sim/byteswap.hh"
66#include "sim/core.hh"
67#include "sim/eventq.hh"
68#include "sim/full_system.hh"
69#include "sim/system.hh"
70
71namespace gem5
72{
73
74namespace o3
75{
76
78 RequestPort(_cpu->name() + ".icache_port"), fetch(_fetch)
79{}
80
81
82Fetch::Fetch(CPU *_cpu, const BaseO3CPUParams &params)
83 : fetchPolicy(params.smtFetchPolicy),
84 cpu(_cpu),
85 branchPred(nullptr),
90 fetchWidth(params.fetchWidth),
92 retryPkt(NULL),
94 cacheBlkSize(cpu->cacheLineSize()),
98 numThreads(params.numThreads),
99 numFetchingThreads(params.smtNumFetchingThreads),
100 icachePort(this, _cpu),
101 finishTranslationEvent(this), fetchStats(_cpu, this)
102{
104 fatal("numThreads (%d) is larger than compiled limit (%d),\n"
105 "\tincrease MaxThreads in src/cpu/o3/limits.hh\n",
106 numThreads, static_cast<int>(MaxThreads));
107 if (fetchWidth > MaxWidth)
108 fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
109 "\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
110 fetchWidth, static_cast<int>(MaxWidth));
112 fatal("fetch buffer size (%u bytes) is greater than the cache "
113 "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
115 fatal("cache block (%u bytes) is not a multiple of the "
116 "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
117
118 for (int i = 0; i < MaxThreads; i++) {
119 fetchStatus[i] = Idle;
120 decoder[i] = nullptr;
121 pc[i].reset(params.isa[0]->newPCState());
122 fetchOffset[i] = 0;
123 macroop[i] = nullptr;
124 delayedCommit[i] = false;
125 memReq[i] = nullptr;
126 stalls[i] = {false, false};
127 fetchBuffer[i] = NULL;
128 fetchBufferPC[i] = 0;
129 fetchBufferValid[i] = false;
130 lastIcacheStall[i] = 0;
131 issuePipelinedIfetch[i] = false;
132 }
133
134 branchPred = params.branchPred;
135
136 for (ThreadID tid = 0; tid < numThreads; tid++) {
137 decoder[tid] = params.decoder[tid];
138 // Create space to buffer the cache line data,
139 // which may not hold the entire cache line.
140 fetchBuffer[tid] = new uint8_t[fetchBufferSize];
141 }
142
143 // Get the size of an instruction.
144 instSize = decoder[0]->moreBytesSize();
145}
146
147std::string Fetch::name() const { return cpu->name() + ".fetch"; }
148
149void
151{
152 ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
153 ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
154 "FetchRequest");
155
156}
157
159 : statistics::Group(cpu, "fetch"),
161 "Number of branches that fetch has predicted taken"),
162 ADD_STAT(cycles, statistics::units::Cycle::get(),
163 "Number of cycles fetch has run and was not squashing or "
164 "blocked"),
165 ADD_STAT(squashCycles, statistics::units::Cycle::get(),
166 "Number of cycles fetch has spent squashing"),
167 ADD_STAT(tlbCycles, statistics::units::Cycle::get(),
168 "Number of cycles fetch has spent waiting for tlb"),
169 ADD_STAT(idleCycles, statistics::units::Cycle::get(),
170 "Number of cycles fetch was idle"),
171 ADD_STAT(blockedCycles, statistics::units::Cycle::get(),
172 "Number of cycles fetch has spent blocked"),
174 "Number of cycles fetch has spent waiting on interrupts, or bad "
175 "addresses, or out of MSHRs"),
177 "Number of cycles fetch has spent waiting on pipes to drain"),
179 "Number of stall cycles due to no active thread to fetch from"),
181 "Number of stall cycles due to pending traps"),
183 "Number of stall cycles due to pending quiesce instructions"),
185 "Number of stall cycles due to full MSHR"),
186 ADD_STAT(cacheLines, statistics::units::Count::get(),
187 "Number of cache lines fetched"),
188 ADD_STAT(icacheSquashes, statistics::units::Count::get(),
189 "Number of outstanding Icache misses that were squashed"),
190 ADD_STAT(tlbSquashes, statistics::units::Count::get(),
191 "Number of outstanding ITLB misses that were squashed"),
192 ADD_STAT(nisnDist, statistics::units::Count::get(),
193 "Number of instructions fetched each cycle (Total)"),
194 ADD_STAT(idleRate, statistics::units::Ratio::get(),
195 "Ratio of cycles fetch was idle",
196 idleCycles / cpu->baseStats.numCycles)
197{
199 .prereq(predictedBranches);
200 cycles
201 .prereq(cycles);
203 .prereq(squashCycles);
205 .prereq(tlbCycles);
207 .prereq(idleCycles);
209 .prereq(blockedCycles);
211 .prereq(cacheLines);
213 .prereq(miscStallCycles);
215 .prereq(pendingDrainCycles);
219 .prereq(pendingTrapStallCycles);
225 .prereq(icacheSquashes);
227 .prereq(tlbSquashes);
229 .init(/* base value */ 0,
230 /* last value */ fetch->fetchWidth,
231 /* bucket size */ 1)
232 .flags(statistics::pdf);
234 .prereq(idleRate);
235}
236void
238{
239 timeBuffer = time_buffer;
240
241 // Create wires to get information from proper places in time buffer.
246}
247
248void
253
254void
256{
257 // Create wire to write information to proper place in fetch time buf.
258 toDecode = ftb_ptr->getWire(0);
259}
260
261void
263{
264 assert(priorityList.empty());
265 resetStage();
266
267 // Fetch needs to start fetching instructions at the very beginning,
268 // so it must start up in active state.
270}
271
272void
274{
275 fetchStatus[tid] = Running;
276 set(pc[tid], cpu->pcState(tid));
277 fetchOffset[tid] = 0;
278 macroop[tid] = NULL;
279 delayedCommit[tid] = false;
280 memReq[tid] = NULL;
281 stalls[tid].decode = false;
282 stalls[tid].drain = false;
283 fetchBufferPC[tid] = 0;
284 fetchBufferValid[tid] = false;
285 fetchQueue[tid].clear();
286
287 // TODO not sure what to do with priorityList for now
288 // priorityList.push_back(tid);
289
290 // Clear out any of this thread's instructions being sent to decode.
291 for (int i = -cpu->fetchQueue.getPast();
292 i <= cpu->fetchQueue.getFuture(); ++i) {
293 FetchStruct& fetch_struct = cpu->fetchQueue[i];
294 removeCommThreadInsts(tid, fetch_struct);
295 }
296}
297
298void
300{
301 numInst = 0;
302 interruptPending = false;
303 cacheBlocked = false;
304
305 priorityList.clear();
306
307 // Setup PC and nextPC with initial state.
308 for (ThreadID tid = 0; tid < numThreads; ++tid) {
309 fetchStatus[tid] = Running;
310 set(pc[tid], cpu->pcState(tid));
311 fetchOffset[tid] = 0;
312 macroop[tid] = NULL;
313
314 delayedCommit[tid] = false;
315 memReq[tid] = NULL;
316
317 stalls[tid].decode = false;
318 stalls[tid].drain = false;
319
320 fetchBufferPC[tid] = 0;
321 fetchBufferValid[tid] = false;
322
323 fetchQueue[tid].clear();
324
325 priorityList.push_back(tid);
326 }
327
328 wroteToTimeBuffer = false;
330}
331
332void
334{
335 ThreadID tid = cpu->contextToThread(pkt->req->contextId());
336
337 DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
338 assert(!cpu->switchedOut());
339
340 // Only change the status if it's still waiting on the icache access
341 // to return.
342 if (fetchStatus[tid] != IcacheWaitResponse ||
343 pkt->req != memReq[tid]) {
344 ++fetchStats.icacheSquashes;
345 delete pkt;
346 return;
347 }
348
349 memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
350 fetchBufferValid[tid] = true;
351
352 // Wake up the CPU (if it went to sleep and was waiting on
353 // this completion event).
354 cpu->wakeCPU();
355
356 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
357 tid);
358
360
361 // Only switch to IcacheAccessComplete if we're not stalled as well.
362 if (checkStall(tid)) {
363 fetchStatus[tid] = Blocked;
364 } else {
366 }
367
368 pkt->req->setAccessLatency();
369 cpu->ppInstAccessComplete->notify(pkt);
370 // Reset the mem req to NULL.
371 delete pkt;
372 memReq[tid] = NULL;
373}
374
375void
377{
378 for (ThreadID i = 0; i < numThreads; ++i) {
379 stalls[i].decode = false;
380 stalls[i].drain = false;
381 }
382}
383
384void
386{
387 assert(isDrained());
388 assert(retryPkt == NULL);
389 assert(retryTid == InvalidThreadID);
390 assert(!cacheBlocked);
391 assert(!interruptPending);
392
393 for (ThreadID i = 0; i < numThreads; ++i) {
394 assert(!memReq[i]);
395 assert(fetchStatus[i] == Idle || stalls[i].drain);
396 }
397
398 branchPred->drainSanityCheck();
399}
400
401bool
403{
404 /* Make sure that threads are either idle of that the commit stage
405 * has signaled that draining has completed by setting the drain
406 * stall flag. This effectively forces the pipeline to be disabled
407 * until the whole system is drained (simulation may continue to
408 * drain other components).
409 */
410 for (ThreadID i = 0; i < numThreads; ++i) {
411 // Verify fetch queues are drained
412 if (!fetchQueue[i].empty())
413 return false;
414
415 // Return false if not idle or drain stalled
416 if (fetchStatus[i] != Idle) {
417 if (fetchStatus[i] == Blocked && stalls[i].drain)
418 continue;
419 else
420 return false;
421 }
422 }
423
424 /* The pipeline might start up again in the middle of the drain
425 * cycle if the finish translation event is scheduled, so make
426 * sure that's not the case.
427 */
428 return !finishTranslationEvent.scheduled();
429}
430
431void
433{
434 assert(cpu->getInstPort().isConnected());
435 resetStage();
436
437}
438
439void
441{
442 assert(cpu->isDraining());
443 assert(!stalls[tid].drain);
444 DPRINTF(Drain, "%i: Thread drained.\n", tid);
445 stalls[tid].drain = true;
446}
447
448void
450{
451 DPRINTF(Fetch, "Waking up from quiesce\n");
452 // Hopefully this is safe
453 // @todo: Allow other threads to wake from quiesce.
454 fetchStatus[0] = Running;
455}
456
457void
459{
460 if (_status == Inactive) {
461 DPRINTF(Activity, "Activating stage.\n");
462
463 cpu->activateStage(CPU::FetchIdx);
464
465 _status = Active;
466 }
467}
468
469void
471{
472 if (_status == Active) {
473 DPRINTF(Activity, "Deactivating stage.\n");
474
475 cpu->deactivateStage(CPU::FetchIdx);
476
478 }
479}
480
481void
483{
484 // Update priority list
485 auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
486 if (thread_it != priorityList.end()) {
487 priorityList.erase(thread_it);
488 }
489}
490
491bool
493{
494 // Do branch prediction check here.
495 // A bit of a misnomer...next_PC is actually the current PC until
496 // this function updates it.
497 bool predict_taken;
498
499 if (!inst->isControl()) {
500 inst->staticInst->advancePC(next_pc);
501 inst->setPredTarg(next_pc);
502 inst->setPredTaken(false);
503 return false;
504 }
505
506 ThreadID tid = inst->threadNumber;
507 predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
508 next_pc, tid);
509
510 if (predict_taken) {
511 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
512 "predicted to be taken to %s\n",
513 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
514 } else {
515 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
516 "predicted to be not taken\n",
517 tid, inst->seqNum, inst->pcState().instAddr());
518 }
519
520 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
521 "predicted to go to %s\n",
522 tid, inst->seqNum, inst->pcState().instAddr(), next_pc);
523 inst->setPredTarg(next_pc);
524 inst->setPredTaken(predict_taken);
525
526 cpu->fetchStats[tid]->numBranches++;
527
528 if (predict_taken) {
529 ++fetchStats.predictedBranches;
530 }
531
532 return predict_taken;
533}
534
535bool
537{
538 Fault fault = NoFault;
539
540 assert(!cpu->switchedOut());
541
542 // @todo: not sure if these should block translation.
543 //AlphaDep
544 if (cacheBlocked) {
545 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
546 tid);
547 return false;
548 } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
549 // Hold off fetch from getting new instructions when:
550 // Cache is blocked, or
551 // while an interrupt is pending and we're not in PAL mode, or
552 // fetch is switched out.
553 DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
554 tid);
555 return false;
556 }
557
558 // Align the fetch address to the start of a fetch buffer segment.
559 Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
560
561 DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
562 tid, fetchBufferBlockPC, vaddr);
563
564 // Setup the memReq to do a read of the first instruction's address.
565 // Set the appropriate read size and flags as well.
566 // Build request here.
567 RequestPtr mem_req = std::make_shared<Request>(
568 fetchBufferBlockPC, fetchBufferSize,
569 Request::INST_FETCH, cpu->instRequestorId(), pc,
570 cpu->thread[tid]->contextId());
571
572 mem_req->taskId(cpu->taskId());
573
574 memReq[tid] = mem_req;
575
576 // Initiate translation of the icache block
577 fetchStatus[tid] = ItlbWait;
578 FetchTranslation *trans = new FetchTranslation(this);
579 cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
580 trans, BaseMMU::Execute);
581 return true;
582}
583
584void
585Fetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
586{
587 ThreadID tid = cpu->contextToThread(mem_req->contextId());
588 Addr fetchBufferBlockPC = mem_req->getVaddr();
589
590 assert(!cpu->switchedOut());
591
592 // Wake up CPU if it was idle
593 cpu->wakeCPU();
594
595 if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
596 mem_req->getVaddr() != memReq[tid]->getVaddr()) {
597 DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
598 tid);
599 ++fetchStats.tlbSquashes;
600 return;
601 }
602
603
604 // If translation was successful, attempt to read the icache block.
605 if (fault == NoFault) {
606 // Check that we're not going off into random memory
607 // If we have, just wait around for commit to squash something and put
608 // us on the right track
609 if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
610 warn("Address %#x is outside of physical memory, stopping fetch\n",
611 mem_req->getPaddr());
612 fetchStatus[tid] = NoGoodAddr;
613 memReq[tid] = NULL;
614 return;
615 }
616
617 // Build packet here.
618 PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
619 data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
620
621 fetchBufferPC[tid] = fetchBufferBlockPC;
622 fetchBufferValid[tid] = false;
623 DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
624
625 fetchStats.cacheLines++;
626
627 // Access the cache.
628 if (!icachePort.sendTimingReq(data_pkt)) {
629 assert(retryPkt == NULL);
630 assert(retryTid == InvalidThreadID);
631 DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
632
634 retryPkt = data_pkt;
635 retryTid = tid;
636 cacheBlocked = true;
637 } else {
638 DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
639 DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
640 "response.\n", tid);
641 lastIcacheStall[tid] = curTick();
643 // Notify Fetch Request probe when a packet containing a fetch
644 // request is successfully sent
645 ppFetchRequestSent->notify(mem_req);
646 }
647 } else {
648 // Don't send an instruction to decode if we can't handle it.
649 if (!(numInst < fetchWidth) ||
650 !(fetchQueue[tid].size() < fetchQueueSize)) {
651 assert(!finishTranslationEvent.scheduled());
652 finishTranslationEvent.setFault(fault);
653 finishTranslationEvent.setReq(mem_req);
654 cpu->schedule(finishTranslationEvent,
655 cpu->clockEdge(Cycles(1)));
656 return;
657 }
659 "[tid:%i] Got back req with addr %#x but expected %#x\n",
660 tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
661 // Translation faulted, icache request won't be sent.
662 memReq[tid] = NULL;
663
664 // Send the fault to commit. This thread will not do anything
665 // until commit handles the fault. The only other way it can
666 // wake up is if a squash comes along and changes the PC.
667 const PCStateBase &fetch_pc = *pc[tid];
668
669 DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
670 // We will use a nop in ordier to carry the fault.
671 DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
672 fetch_pc, fetch_pc, false);
673 instruction->setNotAnInst();
674
675 instruction->setPredTarg(fetch_pc);
676 instruction->fault = fault;
677 wroteToTimeBuffer = true;
678
679 DPRINTF(Activity, "Activity this cycle.\n");
680 cpu->activityThisCycle();
681
683
684 DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
685 DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
686 tid, fault->name(), *pc[tid]);
687 }
689}
690
691void
692Fetch::doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst,
693 ThreadID tid)
694{
695 DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
696 tid, new_pc);
697
698 set(pc[tid], new_pc);
699 fetchOffset[tid] = 0;
700 if (squashInst && squashInst->pcState().instAddr() == new_pc.instAddr() &&
701 !squashInst->isLastMicroop())
702 macroop[tid] = squashInst->macroop;
703 else
704 macroop[tid] = NULL;
705 decoder[tid]->reset();
706
707 // Clear the icache miss if it's outstanding.
708 if (fetchStatus[tid] == IcacheWaitResponse) {
709 DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
710 tid);
711 memReq[tid] = NULL;
712 } else if (fetchStatus[tid] == ItlbWait) {
713 DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
714 tid);
715 memReq[tid] = NULL;
716 }
717
718 // Get rid of the retrying packet if it was from this thread.
719 if (retryTid == tid) {
720 assert(cacheBlocked);
721 if (retryPkt) {
722 delete retryPkt;
723 }
724 retryPkt = NULL;
726 }
727
728 fetchStatus[tid] = Squashing;
729
730 // Empty fetch queue
731 fetchQueue[tid].clear();
732
733 // microops are being squashed, it is not known wheather the
734 // youngest non-squashed microop was marked delayed commit
735 // or not. Setting the flag to true ensures that the
736 // interrupts are not handled when they cannot be, though
737 // some opportunities to handle interrupts may be missed.
738 delayedCommit[tid] = true;
739
740 ++fetchStats.squashCycles;
741}
742
743void
744Fetch::squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst,
745 const InstSeqNum seq_num, ThreadID tid)
746{
747 DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
748
749 doSquash(new_pc, squashInst, tid);
750
751 // Tell the CPU to remove any instructions that are in flight between
752 // fetch and decode.
753 cpu->removeInstsUntil(seq_num, tid);
754}
755
756bool
758{
759 bool ret_val = false;
760
761 if (stalls[tid].drain) {
762 assert(cpu->isDraining());
763 DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
764 ret_val = true;
765 }
766
767 return ret_val;
768}
769
772{
773 //Check Running
774 for (ThreadID tid : *activeThreads) {
775 if (fetchStatus[tid] == Running ||
776 fetchStatus[tid] == Squashing ||
778
779 if (_status == Inactive) {
780 DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
781
782 if (fetchStatus[tid] == IcacheAccessComplete) {
783 DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
784 "completion\n",tid);
785 }
786
787 cpu->activateStage(CPU::FetchIdx);
788 }
789
790 return Active;
791 }
792 }
793
794 // Stage is switching from active to inactive, notify CPU of it.
795 if (_status == Active) {
796 DPRINTF(Activity, "Deactivating stage.\n");
797
798 cpu->deactivateStage(CPU::FetchIdx);
799 }
800
801 return Inactive;
802}
803
804void
805Fetch::squash(const PCStateBase &new_pc, const InstSeqNum seq_num,
806 DynInstPtr squashInst, ThreadID tid)
807{
808 DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
809
810 doSquash(new_pc, squashInst, tid);
811
812 // Tell the CPU to remove any instructions that are not in the ROB.
813 cpu->removeInstsNotInROB(tid);
814}
815
816void
818{
819 bool status_change = false;
820
821 wroteToTimeBuffer = false;
822
823 for (ThreadID i = 0; i < numThreads; ++i) {
824 issuePipelinedIfetch[i] = false;
825 }
826
827 for (ThreadID tid : *activeThreads) {
828 // Check the signals for each thread to determine the proper status
829 // for each thread.
830 bool updated_status = checkSignalsAndUpdate(tid);
831 status_change = status_change || updated_status;
832 }
833
834 DPRINTF(Fetch, "Running stage.\n");
835
836 if (FullSystem) {
837 if (fromCommit->commitInfo[0].interruptPending) {
838 interruptPending = true;
839 }
840
841 if (fromCommit->commitInfo[0].clearInterrupt) {
842 interruptPending = false;
843 }
844 }
845
847 threadFetched++) {
848 // Fetch each of the actively fetching threads.
849 fetch(status_change);
850 }
851
852 // Record number of instructions fetched this cycle for distribution.
853 fetchStats.nisnDist.sample(numInst);
854
855 if (status_change) {
856 // Change the fetch stage status if there was a status change.
858 }
859
860 // Issue the next I-cache request if possible.
861 for (ThreadID i = 0; i < numThreads; ++i) {
862 if (issuePipelinedIfetch[i]) {
864 }
865 }
866
867 // Send instructions enqueued into the fetch queue to decode.
868 // Limit rate by fetchWidth. Stall if decode is stalled.
869 unsigned insts_to_decode = 0;
870 unsigned available_insts = 0;
871
872 for (auto tid : *activeThreads) {
873 if (!stalls[tid].decode) {
874 available_insts += fetchQueue[tid].size();
875 }
876 }
877
878 // Pick a random thread to start trying to grab instructions from
879 auto tid_itr = activeThreads->begin();
880 std::advance(tid_itr,
881 rng->random<uint8_t>(0, activeThreads->size() - 1));
882
883 while (available_insts != 0 && insts_to_decode < decodeWidth) {
884 ThreadID tid = *tid_itr;
885 if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
886 const auto& inst = fetchQueue[tid].front();
887 toDecode->insts[toDecode->size++] = inst;
888 DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
889 "from fetch queue. Fetch queue size: %i.\n",
890 tid, inst->seqNum, fetchQueue[tid].size());
891
892 wroteToTimeBuffer = true;
893 fetchQueue[tid].pop_front();
894 insts_to_decode++;
895 available_insts--;
896 }
897
898 tid_itr++;
899 // Wrap around if at end of active threads list
900 if (tid_itr == activeThreads->end())
901 tid_itr = activeThreads->begin();
902 }
903
904 // If there was activity this cycle, inform the CPU of it.
905 if (wroteToTimeBuffer) {
906 DPRINTF(Activity, "Activity this cycle.\n");
907 cpu->activityThisCycle();
908 }
909
910 // Reset the number of the instruction we've fetched.
911 numInst = 0;
912}
913
914bool
916{
917 // Update the per thread stall statuses.
918 if (fromDecode->decodeBlock[tid]) {
919 stalls[tid].decode = true;
920 }
921
922 if (fromDecode->decodeUnblock[tid]) {
923 assert(stalls[tid].decode);
924 assert(!fromDecode->decodeBlock[tid]);
925 stalls[tid].decode = false;
926 }
927
928 // Check squash signals from commit.
929 if (fromCommit->commitInfo[tid].squash) {
930
931 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
932 "from commit.\n",tid);
933 // In any case, squash.
934 squash(*fromCommit->commitInfo[tid].pc,
935 fromCommit->commitInfo[tid].doneSeqNum,
936 fromCommit->commitInfo[tid].squashInst, tid);
937
938 // If it was a branch mispredict on a control instruction, update the
939 // branch predictor with that instruction, otherwise just kill the
940 // invalid state we generated in after sequence number
941 if (fromCommit->commitInfo[tid].mispredictInst &&
942 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
943 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
944 *fromCommit->commitInfo[tid].pc,
945 fromCommit->commitInfo[tid].branchTaken, tid);
946 } else {
947 branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
948 tid);
949 }
950
951 return true;
952 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
953 // Update the branch predictor if it wasn't a squashed instruction
954 // that was broadcasted.
955 branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
956 }
957
958 // Check squash signals from decode.
959 if (fromDecode->decodeInfo[tid].squash) {
960 DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
961 "from decode.\n",tid);
962
963 // Update the branch predictor.
964 if (fromDecode->decodeInfo[tid].branchMispredict) {
965 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
966 *fromDecode->decodeInfo[tid].nextPC,
967 fromDecode->decodeInfo[tid].branchTaken, tid);
968 } else {
969 branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
970 tid);
971 }
972
973 if (fetchStatus[tid] != Squashing) {
974
975 DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
976 *fromDecode->decodeInfo[tid].nextPC);
977 // Squash unless we're already squashing
978 squashFromDecode(*fromDecode->decodeInfo[tid].nextPC,
979 fromDecode->decodeInfo[tid].squashInst,
980 fromDecode->decodeInfo[tid].doneSeqNum,
981 tid);
982
983 return true;
984 }
985 }
986
987 if (checkStall(tid) &&
990 fetchStatus[tid] != ItlbWait &&
991 fetchStatus[tid] != QuiescePending) {
992 DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
993
994 fetchStatus[tid] = Blocked;
995
996 return true;
997 }
998
999 if (fetchStatus[tid] == Blocked ||
1000 fetchStatus[tid] == Squashing) {
1001 // Switch status to running if fetch isn't being told to block or
1002 // squash this cycle.
1003 DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
1004 tid);
1005
1006 fetchStatus[tid] = Running;
1007
1008 return true;
1009 }
1010
1011 // If we've reached this point, we have not gotten any signals that
1012 // cause fetch to change its status. Fetch remains the same as before.
1013 return false;
1014}
1015
1018 StaticInstPtr curMacroop, const PCStateBase &this_pc,
1019 const PCStateBase &next_pc, bool trace)
1020{
1021 // Get a sequence number.
1022 InstSeqNum seq = cpu->getAndIncrementInstSeq();
1023
1024 DynInst::Arrays arrays;
1025 arrays.numSrcs = staticInst->numSrcRegs();
1026 arrays.numDests = staticInst->numDestRegs();
1027
1028 // Create a new DynInst from the instruction fetched.
1029 DynInstPtr instruction = new (arrays) DynInst(
1030 arrays, staticInst, curMacroop, this_pc, next_pc, seq, cpu);
1031 instruction->setTid(tid);
1032
1033 instruction->setThreadState(cpu->thread[tid]);
1034
1035 DPRINTF(Fetch, "[tid:%i] Instruction PC %s created [sn:%lli].\n",
1036 tid, this_pc, seq);
1037
1038 DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
1039 instruction->staticInst->disassemble(this_pc.instAddr()));
1040
1041#if TRACING_ON
1042 if (trace) {
1043 instruction->traceData =
1044 cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
1045 instruction->staticInst, this_pc, curMacroop);
1046 }
1047#else
1048 instruction->traceData = NULL;
1049#endif
1050
1051 // Add instruction to the CPU's list of instructions.
1052 instruction->setInstListIt(cpu->addInst(instruction));
1053
1054 // Write the instruction to the first slot in the queue
1055 // that heads to decode.
1056 assert(numInst < fetchWidth);
1057 fetchQueue[tid].push_back(instruction);
1058 assert(fetchQueue[tid].size() <= fetchQueueSize);
1059 DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
1060 tid, fetchQueue[tid].size(), fetchQueueSize);
1061 //toDecode->insts[toDecode->size++] = instruction;
1062
1063 // Keep track of if we can take an interrupt at this boundary
1064 delayedCommit[tid] = instruction->isDelayedCommit();
1065
1066 return instruction;
1067}
1068
1069void
1070Fetch::fetch(bool &status_change)
1071{
1073 // Start actual fetch
1076
1077 assert(!cpu->switchedOut());
1078
1079 if (tid == InvalidThreadID) {
1080 // Breaks looping condition in tick()
1082
1083 if (numThreads == 1) { // @todo Per-thread stats
1084 profileStall(0);
1085 }
1086
1087 return;
1088 }
1089
1090 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
1091
1092 // The current PC.
1093 PCStateBase &this_pc = *pc[tid];
1094
1095 Addr pcOffset = fetchOffset[tid];
1096 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1097
1098 bool inRom = isRomMicroPC(this_pc.microPC());
1099
1100 // If returning from the delay of a cache miss, then update the status
1101 // to running, otherwise do the cache access. Possibly move this up
1102 // to tick() function.
1103 if (fetchStatus[tid] == IcacheAccessComplete) {
1104 DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
1105
1106 fetchStatus[tid] = Running;
1107 status_change = true;
1108 } else if (fetchStatus[tid] == Running) {
1109 // Align the fetch PC so its at the start of a fetch buffer segment.
1110 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1111
1112 // If buffer is no longer valid or fetchAddr has moved to point
1113 // to the next cache block, AND we have no remaining ucode
1114 // from a macro-op, then start fetch from icache.
1115 if (!(fetchBufferValid[tid] &&
1116 fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom &&
1117 !macroop[tid]) {
1118 DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
1119 "instruction, starting at PC %s.\n", tid, this_pc);
1120
1121 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1122
1123 if (fetchStatus[tid] == IcacheWaitResponse) {
1124 cpu->fetchStats[tid]->icacheStallCycles++;
1125 }
1126 else if (fetchStatus[tid] == ItlbWait)
1127 ++fetchStats.tlbCycles;
1128 else
1129 ++fetchStats.miscStallCycles;
1130 return;
1131 } else if (checkInterrupt(this_pc.instAddr()) &&
1132 !delayedCommit[tid]) {
1133 // Stall CPU if an interrupt is posted and we're not issuing
1134 // an delayed commit micro-op currently (delayed commit
1135 // instructions are not interruptable by interrupts, only faults)
1136 ++fetchStats.miscStallCycles;
1137 DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
1138 return;
1139 }
1140 } else {
1141 if (fetchStatus[tid] == Idle) {
1142 ++fetchStats.idleCycles;
1143 DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
1144 }
1145
1146 // Status is Idle, so fetch should do nothing.
1147 return;
1148 }
1149
1150 ++fetchStats.cycles;
1151
1152 std::unique_ptr<PCStateBase> next_pc(this_pc.clone());
1153
1154 StaticInstPtr staticInst = NULL;
1155 StaticInstPtr curMacroop = macroop[tid];
1156
1157 // If the read of the first instruction was successful, then grab the
1158 // instructions from the rest of the cache line and put them into the
1159 // queue heading to decode.
1160
1161 DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
1162 "decode.\n", tid);
1163
1164 // Need to keep track of whether or not a predicted branch
1165 // ended this fetch block.
1166 bool predictedBranch = false;
1167
1168 // Need to halt fetch if quiesce instruction detected
1169 bool quiesce = false;
1170
1171 const unsigned numInsts = fetchBufferSize / instSize;
1172 unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1173
1174 auto *dec_ptr = decoder[tid];
1175 const Addr pc_mask = dec_ptr->pcMask();
1176
1177 // Loop through instruction memory from the cache.
1178 // Keep issuing while fetchWidth is available and branch is not
1179 // predicted taken
1180 while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
1181 && !predictedBranch && !quiesce) {
1182 // We need to process more memory if we aren't going to get a
1183 // StaticInst from the rom, the current macroop, or what's already
1184 // in the decoder.
1185 bool needMem = !inRom && !curMacroop && !dec_ptr->instReady();
1186 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1187 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1188
1189 if (needMem) {
1190 // If buffer is no longer valid or fetchAddr has moved to point
1191 // to the next cache block then start fetch from icache.
1192 if (!fetchBufferValid[tid] ||
1193 fetchBufferBlockPC != fetchBufferPC[tid])
1194 break;
1195
1196 if (blkOffset >= numInsts) {
1197 // We need to process more memory, but we've run out of the
1198 // current block.
1199 break;
1200 }
1201
1202 memcpy(dec_ptr->moreBytesPtr(),
1203 fetchBuffer[tid] + blkOffset * instSize, instSize);
1204 decoder[tid]->moreBytes(this_pc, fetchAddr);
1205
1206 if (dec_ptr->needMoreBytes()) {
1207 blkOffset++;
1208 fetchAddr += instSize;
1209 pcOffset += instSize;
1210 }
1211 }
1212
1213 // Extract as many instructions and/or microops as we can from
1214 // the memory we've processed so far.
1215 do {
1216 if (!(curMacroop || inRom)) {
1217 if (dec_ptr->instReady()) {
1218 staticInst = dec_ptr->decode(this_pc);
1219
1220 // Increment stat of fetched instructions.
1221 cpu->fetchStats[tid]->numInsts++;
1222
1223 if (staticInst->isMacroop()) {
1224 curMacroop = staticInst;
1225 } else {
1226 pcOffset = 0;
1227 }
1228 } else {
1229 // We need more bytes for this instruction so blkOffset and
1230 // pcOffset will be updated
1231 break;
1232 }
1233 }
1234 // Whether we're moving to a new macroop because we're at the
1235 // end of the current one, or the branch predictor incorrectly
1236 // thinks we are...
1237 bool newMacro = false;
1238 if (curMacroop || inRom) {
1239 if (inRom) {
1240 staticInst = dec_ptr->fetchRomMicroop(
1241 this_pc.microPC(), curMacroop);
1242 } else {
1243 staticInst = curMacroop->fetchMicroop(this_pc.microPC());
1244 }
1245 newMacro |= staticInst->isLastMicroop();
1246 }
1247
1248 DynInstPtr instruction = buildInst(
1249 tid, staticInst, curMacroop, this_pc, *next_pc, true);
1250
1251 ppFetch->notify(instruction);
1252 numInst++;
1253
1254#if TRACING_ON
1255 if (debug::O3PipeView) {
1256 instruction->fetchTick = curTick();
1257 }
1258#endif
1259
1260 set(next_pc, this_pc);
1261
1262 // If we're branching after this instruction, quit fetching
1263 // from the same block.
1264 predictedBranch |= this_pc.branching();
1265 predictedBranch |= lookupAndUpdateNextPC(instruction, *next_pc);
1266 if (predictedBranch) {
1267 DPRINTF(Fetch, "Branch detected with PC = %s\n", this_pc);
1268 }
1269
1270 newMacro |= this_pc.instAddr() != next_pc->instAddr();
1271
1272 // Move to the next instruction, unless we have a branch.
1273 set(this_pc, *next_pc);
1274 inRom = isRomMicroPC(this_pc.microPC());
1275
1276 if (newMacro) {
1277 fetchAddr = this_pc.instAddr() & pc_mask;
1278 blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
1279 pcOffset = 0;
1280 curMacroop = NULL;
1281 }
1282
1283 if (instruction->isQuiesce()) {
1284 DPRINTF(Fetch,
1285 "Quiesce instruction encountered, halting fetch!\n");
1287 status_change = true;
1288 quiesce = true;
1289 break;
1290 }
1291 } while ((curMacroop || dec_ptr->instReady()) &&
1292 numInst < fetchWidth &&
1293 fetchQueue[tid].size() < fetchQueueSize);
1294
1295 // Re-evaluate whether the next instruction to fetch is in micro-op ROM
1296 // or not.
1297 inRom = isRomMicroPC(this_pc.microPC());
1298 }
1299
1300 if (predictedBranch) {
1301 DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
1302 "instruction encountered.\n", tid);
1303 } else if (numInst >= fetchWidth) {
1304 DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
1305 "for this cycle.\n", tid);
1306 } else if (blkOffset >= fetchBufferSize) {
1307 DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
1308 "fetch buffer.\n", tid);
1309 }
1310
1311 macroop[tid] = curMacroop;
1312 fetchOffset[tid] = pcOffset;
1313
1314 if (numInst > 0) {
1315 wroteToTimeBuffer = true;
1316 }
1317
1318 // pipeline a fetch if we're crossing a fetch buffer boundary and not in
1319 // a state that would preclude fetching
1320 fetchAddr = (this_pc.instAddr() + pcOffset) & pc_mask;
1321 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1322 issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
1324 fetchStatus[tid] != ItlbWait &&
1325 fetchStatus[tid] != IcacheWaitRetry &&
1326 fetchStatus[tid] != QuiescePending &&
1327 !curMacroop;
1328}
1329
1330void
1332{
1333 if (retryPkt != NULL) {
1334 assert(cacheBlocked);
1335 assert(retryTid != InvalidThreadID);
1337
1338 if (icachePort.sendTimingReq(retryPkt)) {
1340 // Notify Fetch Request probe when a retryPkt is successfully sent.
1341 // Note that notify must be called before retryPkt is set to NULL.
1342 ppFetchRequestSent->notify(retryPkt->req);
1343 retryPkt = NULL;
1345 cacheBlocked = false;
1346 }
1347 } else {
1348 assert(retryTid == InvalidThreadID);
1349 // Access has been squashed since it was sent out. Just clear
1350 // the cache being blocked.
1351 cacheBlocked = false;
1352 }
1353}
1354
1356// //
1357// SMT FETCH POLICY MAINTAINED HERE //
1358// //
1362{
1363 if (numThreads > 1) {
1364 switch (fetchPolicy) {
1365 case SMTFetchPolicy::RoundRobin:
1366 return roundRobin();
1367 case SMTFetchPolicy::IQCount:
1368 return iqCount();
1369 case SMTFetchPolicy::LSQCount:
1370 return lsqCount();
1371 case SMTFetchPolicy::Branch:
1372 return branchCount();
1373 default:
1374 return InvalidThreadID;
1375 }
1376 } else {
1377 auto thread = activeThreads->begin();
1378 if (thread == activeThreads->end()) {
1379 return InvalidThreadID;
1380 }
1381
1382 ThreadID tid = *thread;
1383
1384 if (fetchStatus[tid] == Running ||
1386 fetchStatus[tid] == Idle) {
1387 return tid;
1388 } else {
1389 return InvalidThreadID;
1390 }
1391 }
1392}
1393
1394
1397{
1398 auto pri_iter = priorityList.begin();
1399 auto end = priorityList.end();
1400
1401 ThreadID high_pri;
1402
1403 while (pri_iter != end) {
1404 high_pri = *pri_iter;
1405
1406 assert(high_pri <= numThreads);
1407
1408 if (fetchStatus[high_pri] == Running ||
1409 fetchStatus[high_pri] == IcacheAccessComplete ||
1410 fetchStatus[high_pri] == Idle) {
1411
1412 priorityList.erase(pri_iter);
1413 priorityList.push_back(high_pri);
1414
1415 return high_pri;
1416 }
1417
1418 pri_iter++;
1419 }
1420
1421 return InvalidThreadID;
1422}
1423
1426{
1427 //sorted from lowest->highest
1428 std::priority_queue<unsigned, std::vector<unsigned>,
1429 std::greater<unsigned> > PQ;
1430 std::map<unsigned, ThreadID> threadMap;
1431
1432 for (ThreadID tid : *activeThreads) {
1433 unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
1434
1435 //we can potentially get tid collisions if two threads
1436 //have the same iqCount, but this should be rare.
1437 PQ.push(iqCount);
1438 threadMap[iqCount] = tid;
1439 }
1440
1441 while (!PQ.empty()) {
1442 ThreadID high_pri = threadMap[PQ.top()];
1443
1444 if (fetchStatus[high_pri] == Running ||
1445 fetchStatus[high_pri] == IcacheAccessComplete ||
1446 fetchStatus[high_pri] == Idle)
1447 return high_pri;
1448 else
1449 PQ.pop();
1450
1451 }
1452
1453 return InvalidThreadID;
1454}
1455
1458{
1459 //sorted from lowest->highest
1460 std::priority_queue<unsigned, std::vector<unsigned>,
1461 std::greater<unsigned> > PQ;
1462 std::map<unsigned, ThreadID> threadMap;
1463
1464 for (ThreadID tid : *activeThreads) {
1465 unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
1466
1467 //we can potentially get tid collisions if two threads
1468 //have the same iqCount, but this should be rare.
1469 PQ.push(ldstqCount);
1470 threadMap[ldstqCount] = tid;
1471 }
1472
1473 while (!PQ.empty()) {
1474 ThreadID high_pri = threadMap[PQ.top()];
1475
1476 if (fetchStatus[high_pri] == Running ||
1477 fetchStatus[high_pri] == IcacheAccessComplete ||
1478 fetchStatus[high_pri] == Idle)
1479 return high_pri;
1480 else
1481 PQ.pop();
1482 }
1483
1484 return InvalidThreadID;
1485}
1486
1489{
1490 panic("Branch Count Fetch policy unimplemented\n");
1491 return InvalidThreadID;
1492}
1493
1494void
1496{
1497 if (!issuePipelinedIfetch[tid]) {
1498 return;
1499 }
1500
1501 // The next PC to access.
1502 const PCStateBase &this_pc = *pc[tid];
1503
1504 if (isRomMicroPC(this_pc.microPC())) {
1505 return;
1506 }
1507
1508 Addr pcOffset = fetchOffset[tid];
1509 Addr fetchAddr = (this_pc.instAddr() + pcOffset) & decoder[tid]->pcMask();
1510
1511 // Align the fetch PC so its at the start of a fetch buffer segment.
1512 Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
1513
1514 // Unless buffer already got the block, fetch it from icache.
1515 if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
1516 DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
1517 "starting at PC %s.\n", tid, this_pc);
1518
1519 fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
1520 }
1521}
1522
1523void
1525{
1526 DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
1527
1528 // @todo Per-thread stats
1529
1530 if (stalls[tid].drain) {
1531 ++fetchStats.pendingDrainCycles;
1532 DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
1533 } else if (activeThreads->empty()) {
1534 ++fetchStats.noActiveThreadStallCycles;
1535 DPRINTF(Fetch, "Fetch has no active thread!\n");
1536 } else if (fetchStatus[tid] == Blocked) {
1537 ++fetchStats.blockedCycles;
1538 DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
1539 } else if (fetchStatus[tid] == Squashing) {
1540 ++fetchStats.squashCycles;
1541 DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
1542 } else if (fetchStatus[tid] == IcacheWaitResponse) {
1543 cpu->fetchStats[tid]->icacheStallCycles++;
1544 DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
1545 tid);
1546 } else if (fetchStatus[tid] == ItlbWait) {
1547 ++fetchStats.tlbCycles;
1548 DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
1549 "finish!\n", tid);
1550 } else if (fetchStatus[tid] == TrapPending) {
1551 ++fetchStats.pendingTrapStallCycles;
1552 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
1553 tid);
1554 } else if (fetchStatus[tid] == QuiescePending) {
1555 ++fetchStats.pendingQuiesceStallCycles;
1556 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
1557 "instruction!\n", tid);
1558 } else if (fetchStatus[tid] == IcacheWaitRetry) {
1559 ++fetchStats.icacheWaitRetryStallCycles;
1560 DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
1561 tid);
1562 } else if (fetchStatus[tid] == NoGoodAddr) {
1563 DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
1564 tid);
1565 } else {
1566 DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
1567 "(Status: %i)\n",
1568 tid, fetchStatus[tid]);
1569 }
1570}
1571
1572bool
1574{
1575 DPRINTF(O3CPU, "Fetch unit received timing\n");
1576 // We shouldn't ever get a cacheable block in Modified state
1577 assert(pkt->req->isUncacheable() ||
1578 !(pkt->cacheResponding() && !pkt->hasSharers()));
1579 fetch->processCacheCompletion(pkt);
1580
1581 return true;
1582}
1583
1584void
1586{
1587 fetch->recvReqRetry();
1588}
1589
1590} // namespace o3
1591} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
virtual bool branching() const =0
MicroPC microPC() const
Returns the current micropc.
Definition pcstate.hh:119
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
Definition pcstate.hh:108
virtual PCStateBase * clone() const =0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
const T * getConstPtr() const
Definition packet.hh:1234
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
bool cacheResponding() const
Definition packet.hh:659
bool hasSharers() const
Definition packet.hh:686
ProbePointArg generates a point for the class of Arg.
Definition probe.hh:273
RequestPort(const std::string &name, SimObject *_owner, PortID id=InvalidPortID)
Request port.
Definition port.cc:125
@ INST_FETCH
The request was an instruction fetch.
Definition request.hh:115
uint8_t numSrcRegs() const
Number of source registers.
virtual StaticInstPtr fetchMicroop(MicroPC upc) const
Return the microop that goes with a particular micropc.
uint8_t numDestRegs() const
Number of destination registers.
bool isMacroop() const
bool isLastMicroop() const
wire getWire(int idx)
Definition timebuf.hh:232
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
Fetch * fetch
Pointer to fetch.
Definition fetch.hh:89
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition fetch.cc:1573
IcachePort(Fetch *_fetch, CPU *_cpu)
Default constructor.
Definition fetch.cc:77
virtual void recvReqRetry()
Handles doing a retry of a failed fetch.
Definition fetch.cc:1585
gem5::o3::Fetch::FetchStatGroup fetchStats
bool wroteToTimeBuffer
Variable that tracks if fetch has written to the time buffer this cycle.
Definition fetch.hh:433
void deactivateThread(ThreadID tid)
For priority-based fetch policies, need to keep update priorityList.
Definition fetch.cc:482
FetchStatus
Overall fetch status.
Definition fetch.hh:164
std::list< ThreadID > * activeThreads
List of Active Threads.
Definition fetch.hh:508
TimeBuffer< TimeStruct >::wire fromCommit
Wire to get commit's information from backwards time buffer.
Definition fetch.hh:409
Cycles renameToFetchDelay
Rename to fetch delay.
Definition fetch.hh:452
StaticInstPtr macroop[MaxThreads]
Definition fetch.hh:422
void fetch(bool &status_change)
Does the actual fetching of instructions and passing them on to the next stage.
Definition fetch.cc:1070
void takeOverFrom()
Takes over from another CPU's thread.
Definition fetch.cc:432
uint8_t * fetchBuffer[MaxThreads]
The fetch data that is being fetched and buffered.
Definition fetch.hh:487
void doSquash(const PCStateBase &new_pc, const DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:692
Random::RandomPtr rng
Definition fetch.hh:204
TimeBuffer< FetchStruct >::wire toDecode
Wire used to write any information heading to decode.
Definition fetch.hh:413
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
Definition fetch.cc:249
bool lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &pc)
Looks up in the branch predictor to see if the next PC should be either next PC+=MachInst or a branch...
Definition fetch.cc:492
ThreadStatus fetchStatus[MaxThreads]
Per-thread status.
Definition fetch.hh:191
ThreadID numThreads
Number of threads.
Definition fetch.hh:511
TimeBuffer< TimeStruct >::wire fromDecode
Wire to get decode's information from backwards time buffer.
Definition fetch.hh:400
ProbePointArg< DynInstPtr > * ppFetch
Probe points.
Definition fetch.hh:200
TimeBuffer< TimeStruct >::wire fromRename
Wire to get rename's information from backwards time buffer.
Definition fetch.hh:403
void squash(const PCStateBase &new_pc, const InstSeqNum seq_num, DynInstPtr squashInst, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:805
void squashFromDecode(const PCStateBase &new_pc, const DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
Squashes a specific thread and resets the PC.
Definition fetch.cc:744
FetchStatus updateFetchStatus()
Updates overall fetch stage status; to be called at the end of each cycle.
Definition fetch.cc:771
ThreadID getFetchingThread()
Returns the appropriate thread to fetch, given the fetch policy.
Definition fetch.cc:1361
bool fetchBufferValid[MaxThreads]
Whether or not the fetch buffer data is valid.
Definition fetch.hh:499
void startupStage()
Initialize stage.
Definition fetch.cc:262
void pipelineIcacheAccesses(ThreadID tid)
Pipeline the next I-cache access to the current one.
Definition fetch.cc:1495
std::string name() const
Returns the name of fetch.
Definition fetch.cc:147
void wakeFromQuiesce()
Tells fetch to wake up from a quiesce instruction.
Definition fetch.cc:449
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
Definition fetch.cc:458
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
Definition fetch.cc:470
int numInst
Tracks how many instructions has been fetched this cycle.
Definition fetch.hh:436
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Fetches the cache line that contains the fetch PC.
Definition fetch.cc:536
Cycles decodeToFetchDelay
Decode to fetch delay.
Definition fetch.hh:449
bool issuePipelinedIfetch[MaxThreads]
Set to true if a pipelined I-cache request should be issued.
Definition fetch.hh:528
Addr fetchBufferAlignPC(Addr addr)
Align a PC to the start of a fetch buffer block.
Definition fetch.hh:355
FetchStatus _status
Fetch status.
Definition fetch.hh:188
bool delayedCommit[MaxThreads]
Can the fetch stage redirect from an interrupt on this instruction?
Definition fetch.hh:425
ThreadID threadFetched
Thread ID being fetched.
Definition fetch.hh:517
SMTFetchPolicy fetchPolicy
Fetch policy.
Definition fetch.hh:194
Addr cacheBlkSize
Cache block size.
Definition fetch.hh:476
branch_prediction::BPredUnit * branchPred
BPredUnit.
Definition fetch.hh:416
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition fetch.cc:385
unsigned fetchWidth
The width of fetch in instructions.
Definition fetch.hh:461
unsigned fetchQueueSize
The size of the fetch queue in micro-ops.
Definition fetch.hh:493
InstDecoder * decoder[MaxThreads]
The decoder.
Definition fetch.hh:361
TimeBuffer< TimeStruct >::wire fromIEW
Wire to get iew's information from backwards time buffer.
Definition fetch.hh:406
void regProbePoints()
Registers probes.
Definition fetch.cc:150
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
Definition fetch.cc:915
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
Definition fetch.cc:757
IcachePort icachePort
Instruction port.
Definition fetch.hh:525
@ IcacheAccessComplete
Definition fetch.hh:182
void setTimeBuffer(TimeBuffer< TimeStruct > *time_buffer)
Sets the main backwards communication time buffer pointer.
Definition fetch.cc:237
void processCacheCompletion(PacketPtr pkt)
Processes cache completion event.
Definition fetch.cc:333
ThreadID iqCount()
Returns the appropriate thread to fetch using the IQ count policy.
Definition fetch.cc:1425
Addr fetchBufferMask
Mask to align a fetch address to a fetch buffer boundary.
Definition fetch.hh:484
void recvReqRetry()
Handles retrying the fetch access.
Definition fetch.cc:1331
bool checkInterrupt(Addr pc)
Check if an interrupt is pending and that we need to handle.
Definition fetch.hh:308
Cycles iewToFetchDelay
IEW to fetch delay.
Definition fetch.hh:455
void resetStage()
Reset this pipeline stage.
Definition fetch.cc:299
Fetch(CPU *_cpu, const BaseO3CPUParams &params)
Fetch constructor.
Definition fetch.cc:82
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
Definition fetch.cc:440
Counter lastIcacheStall[MaxThreads]
Icache stall statistics.
Definition fetch.hh:505
int instSize
Size of instructions.
Definition fetch.hh:502
ProbePointArg< RequestPtr > * ppFetchRequestSent
To probe when a fetch request is successfully sent.
Definition fetch.hh:202
Cycles commitToFetchDelay
Commit to fetch delay.
Definition fetch.hh:458
RequestPtr memReq[MaxThreads]
Memory request used to access cache.
Definition fetch.hh:428
TimeBuffer< TimeStruct > * timeBuffer
Time buffer interface.
Definition fetch.hh:397
void profileStall(ThreadID tid)
Profile the reasons of fetch stall.
Definition fetch.cc:1524
ThreadID roundRobin()
Returns the appropriate thread to fetch using a round robin policy.
Definition fetch.cc:1396
Addr fetchBufferPC[MaxThreads]
The PC of the first instruction loaded into the fetch buffer.
Definition fetch.hh:490
void drainResume()
Resume after a drain.
Definition fetch.cc:376
void clearStates(ThreadID tid)
Clear all thread-specific states.
Definition fetch.cc:273
void finishTranslation(const Fault &fault, const RequestPtr &mem_req)
Definition fetch.cc:585
bool interruptPending
Checks if there is an interrupt pending.
Definition fetch.hh:522
std::unique_ptr< PCStateBase > pc[MaxThreads]
Definition fetch.hh:418
ThreadID lsqCount()
Returns the appropriate thread to fetch using the LSQ count policy.
Definition fetch.cc:1457
Stalls stalls[MaxThreads]
Tracks which stages are telling fetch to stall.
Definition fetch.hh:446
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, StaticInstPtr curMacroop, const PCStateBase &this_pc, const PCStateBase &next_pc, bool trace)
Definition fetch.cc:1017
bool isDrained() const
Has the stage drained?
Definition fetch.cc:402
Addr fetchOffset[MaxThreads]
Definition fetch.hh:420
std::deque< DynInstPtr > fetchQueue[MaxThreads]
Queue of fetched instructions.
Definition fetch.hh:496
PacketPtr retryPkt
The packet that is waiting to be retried.
Definition fetch.hh:470
std::list< ThreadID > priorityList
List that has the threads organized by priority.
Definition fetch.hh:197
FinishTranslationEvent finishTranslationEvent
Event used to delay fault generation of translation faults.
Definition fetch.hh:531
ThreadID retryTid
The thread that is waiting on the cache to tell fetch to retry.
Definition fetch.hh:473
void tick()
Ticks the fetch stage, processing all inputs signals and fetching as many instructions as possible.
Definition fetch.cc:817
ThreadID numFetchingThreads
Number of threads that are actively fetching.
Definition fetch.hh:514
unsigned fetchBufferSize
The size of the fetch buffer in bytes.
Definition fetch.hh:481
void setFetchQueue(TimeBuffer< FetchStruct > *fq_ptr)
Sets pointer to time buffer used to communicate to the next stage.
Definition fetch.cc:255
CPU * cpu
Pointer to the O3CPU.
Definition fetch.hh:394
unsigned decodeWidth
The width of decode in instructions.
Definition fetch.hh:464
bool cacheBlocked
Is the cache blocked?
Definition fetch.hh:467
ThreadID branchCount()
Returns the appropriate thread to fetch using the branch count policy.
Definition fetch.cc:1488
STL list class.
Definition stl.hh:51
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
#define warn(...)
Definition logging.hh:288
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 12, 11 > set
static constexpr int MaxThreads
Definition limits.hh:38
void removeCommThreadInsts(ThreadID tid, CommStruct &comm_struct)
Remove instructions belonging to given thread from the given comm struct's instruction array.
Definition comm.hh:235
RefCountingPtr< DynInst > DynInstPtr
static constexpr int MaxWidth
Definition limits.hh:37
Units for Stats.
Definition units.hh:113
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< FaultBase > Fault
Definition types.hh:249
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
const ThreadID InvalidThreadID
Definition types.hh:236
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition root.cc:220
Packet * PacketPtr
RefCountingPtr< StaticInst > StaticInstPtr
static bool isRomMicroPC(MicroPC upc)
Definition types.hh:166
StaticInstPtr nopStaticInstPtr
Pointer to a statically allocated generic "nop" instruction object.
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
Declaration of the Packet class.
Struct that defines the information passed from fetch to decode.
Definition comm.hh:62
statistics::Scalar icacheSquashes
Total number of outstanding icache accesses that were dropped due to a squash.
Definition fetch.hh:572
statistics::Scalar pendingDrainCycles
Total number of cycles spent in waiting for drains.
Definition fetch.hh:557
statistics::Scalar cacheLines
Stat for total number of fetched cache lines.
Definition fetch.hh:568
statistics::Scalar blockedCycles
Total number of cycles spent blocked.
Definition fetch.hh:553
statistics::Scalar idleCycles
Stat for total number of cycles spent blocked due to other stages in the pipeline.
Definition fetch.hh:551
statistics::Scalar predictedBranches
Stat for total number of predicted branches.
Definition fetch.hh:540
statistics::Scalar noActiveThreadStallCycles
Total number of stall cycles caused by no active threads to run.
Definition fetch.hh:559
statistics::Scalar pendingQuiesceStallCycles
Total number of stall cycles caused by pending quiesce instructions.
Definition fetch.hh:564
statistics::Scalar icacheWaitRetryStallCycles
Total number of stall cycles caused by I-cache wait retrys.
Definition fetch.hh:566
statistics::Scalar pendingTrapStallCycles
Total number of stall cycles caused by pending traps.
Definition fetch.hh:561
statistics::Scalar cycles
Stat for total number of cycles spent fetching.
Definition fetch.hh:542
statistics::Scalar miscStallCycles
Total number of cycles spent in any other state.
Definition fetch.hh:555
statistics::Scalar tlbCycles
Stat for total number of cycles spent waiting for translation.
Definition fetch.hh:546
statistics::Scalar squashCycles
Stat for total number of cycles spent squashing.
Definition fetch.hh:544
FetchStatGroup(CPU *cpu, Fetch *fetch)
Definition fetch.cc:158
statistics::Formula idleRate
Rate of how often fetch was idle.
Definition fetch.hh:580
statistics::Scalar tlbSquashes
Total number of outstanding tlb accesses that were dropped due to a squash.
Definition fetch.hh:576
statistics::Distribution nisnDist
Distribution of number of instructions fetched each cycle.
Definition fetch.hh:578

Generated on Mon May 26 2025 09:19:08 for gem5 by doxygen 1.13.2