gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
bac.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2022-2023 The University of Edinburgh
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "cpu/o3/bac.hh"
39
40#include <algorithm>
41
43#include "base/trace.hh"
44#include "cpu/inst_seq.hh"
45#include "cpu/o3/dyn_inst.hh"
46#include "cpu/o3/ftq.hh"
47#include "cpu/o3/limits.hh"
48#include "debug/Activity.hh"
49#include "debug/BAC.hh"
50#include "debug/Branch.hh"
51#include "debug/Drain.hh"
52#include "debug/FTQ.hh"
53#include "debug/Fetch.hh"
54#include "debug/O3PipeView.hh"
55#include "params/BaseO3CPU.hh"
56#include "sim/full_system.hh"
57
58using namespace gem5::branch_prediction;
59
60namespace gem5
61{
62
63namespace o3
64{
65
66// clang-format off
67std::string BAC::BACStats::statusStrings[ThreadStatusMax] = {
68 "Idle",
69 "Running",
70 "Squashing",
71 "Blocked",
72 "FTQFull",
73 "FTQLocked",
74};
75// clang-format on
76
77BAC::BAC(CPU *_cpu, const BaseO3CPUParams &params)
78 : cpu(_cpu),
79 bpu(params.branchPred),
80 ftq(nullptr),
81 wroteToTimeBuffer(false),
87 cacheBlkSize(cpu->cacheLineSize()),
90 numThreads(params.numThreads),
93 stats(_cpu, this)
94{
95 fatal_if(decoupledFrontEnd && (fetchTargetWidth < params.fetchBufferSize),
96 "Fetch target width should be larger than fetch buffer size!");
98 "More than 1 thread has not been tested with the decoupled "
99 "front end");
100 fatal_if(bpu == nullptr, "Branch predictor not configured");
101
102 for (int i = 0; i < MaxThreads; i++) {
103 bacPC[i].reset(params.isa[0]->newPCState());
104 stalls[i] = {false, false, false};
105 }
106}
107
108std::string
110{
111 return cpu->name() + ".bac";
112}
113
114void
116{
117 timeBuffer = time_buffer;
118
119 // Create wires to get information from proper places in time buffer.
123}
124
125void
130
131void
133{
134 // Set pointer to the fetch target queue
135 ftq = _ptr;
136}
137
138void
140{
141 resetStage();
142 // For decoupled BPU the BAC need to start together with fetch
143 // so it must start up in active state.
145}
146
147void
149{
150 bacStatus[tid] = Running;
151 set(bacPC[tid], cpu->pcState(tid));
152
153 stalls[tid].fetch = false;
154 stalls[tid].drain = false;
155 stalls[tid].bpu = false;
156
157 assert(ftq != nullptr);
158 ftq->resetState(tid);
159}
160
161void
163{
164 // Setup PC and nextPC with initial state.
165 for (ThreadID tid = 0; tid < numThreads; ++tid) {
166 clearStates(tid);
167 }
168
169 wroteToTimeBuffer = false;
171}
172
173void
175{
176 DPRINTF(Drain, "Resume from draining.\n");
177 for (ThreadID i = 0; i < numThreads; ++i) {
178 stalls[i].drain = false;
179 }
180}
181
182void
184{
185 assert(isDrained());
186
187 for (ThreadID i = 0; i < numThreads; ++i) {
188 assert(bacStatus[i] == Idle || stalls[i].drain);
189 assert(ftq->isEmpty(i));
190 }
191
192 bpu->drainSanityCheck();
193}
194
195bool
197{
198 // Make sure the FTQ is empty and the state of all threads is idle.
199 for (ThreadID i = 0; i < numThreads; ++i) {
200 // Verify the FTQ is drained
201 if (!ftq->isEmpty(i)) {
202 return false;
203 }
204
205 // Return false if not in an idle state
206 if (bacStatus[i] != Idle) {
207 return false;
208 }
209 }
210 return true;
211}
212
213void
215{
216 assert(cpu->isDraining());
217 assert(!stalls[tid].drain);
218 DPRINTF(Drain, "%i: Thread drained.\n", tid);
219 stalls[tid].drain = true;
220}
221
222void
224{
225 if (_status == Inactive) {
226 DPRINTF(Activity, "Activating stage.\n");
227 cpu->activateStage(CPU::BACIdx);
228 _status = Active;
229 }
230}
231
232void
234{
235 if (_status == Active) {
236 DPRINTF(Activity, "Deactivating stage.\n");
237 cpu->deactivateStage(CPU::BACIdx);
239 }
240}
241
242bool
244{
245 bool ret_val = false;
246
247 if (stalls[tid].fetch) {
248 DPRINTF(BAC, "[tid:%i] Fetch stall detected.\n", tid);
249 ret_val = true;
250 }
251
252 if (stalls[tid].bpu) {
253 DPRINTF(BAC, "[tid:%i] BPU stall detected.\n", tid);
254 ret_val = true;
255 }
256
257 return ret_val;
258}
259
260void
262{
263 // Check Running
264 for (ThreadID tid : *activeThreads) {
265
266 if (bacStatus[tid] == Running || bacStatus[tid] == Squashing) {
267
268 if (_status == Inactive) {
269 DPRINTF(Activity, "[tid:%i] Activating stage.\n", tid);
270
271 cpu->activateStage(CPU::BACIdx);
272 }
273
274 _status = Active;
275 return;
276 }
277 }
278
279 // Stage is switching from active to inactive, notify CPU of it.
280 if (_status == Active) {
281 DPRINTF(Activity, "Deactivating stage.\n");
282
283 cpu->deactivateStage(CPU::BACIdx);
284 }
285
287}
288
289bool
291{
292 // Check squash signals from commit.
293 if (fromCommit->commitInfo[tid].squash) {
294
295 DPRINTF(BAC, "[tid:%i] Squashing from commit. PC = %s\n", tid,
296 *fromCommit->commitInfo[tid].pc);
297
298 // In any case, squash the FTQ and the branch histories in the
299 // FTQ first.
301 squash(*fromCommit->commitInfo[tid].pc, tid);
302
303 // If it was a branch mispredict on a control instruction, update the
304 // branch predictor with that instruction, otherwise just kill the
305 // invalid state we generated in after sequence number
306 if (fromCommit->commitInfo[tid].mispredictInst &&
307 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
308
309 bpu->squash(fromCommit->commitInfo[tid].doneSeqNum,
310 *fromCommit->commitInfo[tid].pc,
311 fromCommit->commitInfo[tid].branchTaken, tid, true);
312 stats.branchMisspredict++;
313 stats.squashBranchCommit++;
314 } else {
315 bpu->squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
316 if (fromCommit->commitInfo[tid].mispredictInst) {
317 DPRINTF(BAC,
318 "[tid:%i] Squashing due to mispredict of "
319 "non-control instruction: %s\n",
320 tid,
321 fromCommit->commitInfo[tid]
322 .mispredictInst->staticInst->disassemble(
323 fromCommit->commitInfo[tid]
324 .mispredictInst->pcState()
325 .instAddr()));
326 }
327 stats.noBranchMisspredict++;
328 }
329 return true;
330
331 } else if (fromCommit->commitInfo[tid].doneSeqNum) {
332 // Update the branch predictor if it wasn't a squashed instruction
333 // that was broadcasted.
334 bpu->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
335 }
336
337 // Check squash signals from decode.
338 if (fromDecode->decodeInfo[tid].squash) {
339 DPRINTF(Fetch, "[tid:%i] Squashing from decode. PC = %s\n", tid,
340 *fromDecode->decodeInfo[tid].nextPC);
341
342 // Squash.
344 squash(*fromDecode->decodeInfo[tid].nextPC, tid);
345
346 // Update the branch predictor.
347 if (fromDecode->decodeInfo[tid].branchMispredict) {
348
349 bpu->squash(fromDecode->decodeInfo[tid].doneSeqNum,
350 *fromDecode->decodeInfo[tid].nextPC,
351 fromDecode->decodeInfo[tid].branchTaken, tid, false);
352 stats.branchMisspredict++;
353 stats.squashBranchDecode++;
354 } else {
355 bpu->squash(fromDecode->decodeInfo[tid].doneSeqNum, tid);
356 stats.noBranchMisspredict++;
357 }
358 return true;
359 }
360
361 // Check squash signals from fetch.
362 if (fromFetch->fetchInfo[tid].squash && bacStatus[tid] != Squashing) {
363 DPRINTF(BAC, "Squashing from fetch with PC = %s\n",
364 *fromFetch->fetchInfo[tid].nextPC);
365
366 // Squash unless we're already squashing
368 squash(*fromFetch->fetchInfo[tid].nextPC, tid);
369 return true;
370 }
371 return false;
372}
373
374bool
376{
377 // Check if there's a squash signal, squash if there is.
378 // Check stall signals, block if necessary.
379 if (checkAndUpdateBPUSignals(tid)) {
380 return true;
381 }
382
383 // Check stalls
384 if (stalls[tid].drain) {
385 assert(cpu->isDraining());
386 DPRINTF(BAC, "[tid:%i] Drain stall detected.\n", tid);
387 // Squash BPU histories and disable the FTQ.
389 ftq->squash(tid);
390
391 bacStatus[tid] = Idle;
392 return true;
393 }
394
395 if (checkStall(tid)) {
396 bacStatus[tid] = Blocked;
397 return true;
398 }
399
400 // If at this point the FTQ is still invalid we need to wait for
401 // A resteer/squash signal.
402 if (!ftq->isReady(tid) && bacStatus[tid] != Idle) {
403 DPRINTF(BAC, "[tid:%i] FTQ is invalid. Wait for resteer.\n", tid);
404
405 bacStatus[tid] = Idle;
406 return true;
407 }
408
409 // Check if the FTQ got blocked or unblocked
410 if ((bacStatus[tid] == Running) && ftq->isLocked(tid)) {
411
412 DPRINTF(BAC, "[tid:%i] FTQ is locked\n", tid);
413 bacStatus[tid] = FTQLocked;
414 return true;
415 }
416 if ((bacStatus[tid] == FTQLocked) && !ftq->isLocked(tid)) {
417
418 DPRINTF(BAC, "[tid:%i] FTQ not locked anymore -> Running\n", tid);
419 bacStatus[tid] = Running;
420 return true;
421 }
422
423 // Check if the FTQ became free in that cycle.
424 if ((bacStatus[tid] == FTQFull) && !ftq->isFull(tid)) {
425
426 DPRINTF(BAC, "[tid:%i] FTQ not full anymore -> Running\n", tid);
427 bacStatus[tid] = Running;
428 return true;
429 }
430
431 if (bacStatus[tid] == Squashing) {
432
433 // Switch status to running after squashing FTQ and setting the PC.
434 DPRINTF(BAC, "[tid:%i] Done squashing, switching to running.\n", tid);
435 bacStatus[tid] = Running;
436 return true;
437 }
438
439 if (ftq->isFull(tid)) {
440 // If the FTQ is full, we need to block the BAC.
441 if (bacStatus[tid] != FTQFull) {
442 DPRINTF(BAC, "[tid:%i] FTQ is full. Blocking BAC.\n", tid);
443 bacStatus[tid] = FTQFull;
444 }
445 return true;
446 }
447
448 // Now all stall/squash conditions are checked.
449 // Attempt to run the BAC if not already running.
450 if (ftq->isReady(tid) &&
451 ((bacStatus[tid] == Idle) || (bacStatus[tid] == Blocked))) {
452
453 DPRINTF(BAC, "[tid:%i] Attempt to run\n", tid);
454 bacStatus[tid] = Running;
455 return true;
456 }
457
458 // If we've reached this point, we have not gotten any signals that
459 // cause BAC to change its status. BAC remains the same as before.
460 return false;
461}
462
463void
465{
466 if (!decoupledFrontEnd) {
467 return;
468 }
469
470 DPRINTF(BAC, "%s(tid:%i): FTQ sz: %i\n", __func__, tid, ftq->size(tid));
471
472 // Iterate over the FTQ in reverse order to
473 // revert all predictions made.
474 ftq->forAllBackward(tid, [this, tid](FetchTargetPtr &ft) {
475 if (ft->bpuHistory) {
476 bpu->squashHistory(tid, ft->bpuHistory);
477 assert(ft->bpuHistory == nullptr);
478 ft->bpuHistory = nullptr;
479 }
480 });
481}
482
483void
485{
486 if (!decoupledFrontEnd) {
487 return;
488 }
489
490 DPRINTF(BAC, "[tid:%i] Squashing FTQ.\n", tid);
491
492 // Set status to squashing.
493 bacStatus[tid] = Squashing;
494
495 // Set the new PC
496 set(bacPC[tid], new_pc);
497
498 // Then squash all fetch targets
499 ftq->squash(tid);
500}
501
502void
504{
505 bool activity = false;
506 bool status_change = false;
507
508 if (decoupledFrontEnd) {
509 // FDP ---------------------------------------------
510 // In the decoupled frontend the BAC stage is active as all
511 // others. Its main purpose is generate fetch targets by using
512 // the branch predction unit.
513
514 for (const ThreadID tid : *activeThreads) {
515 // Check stall and squash signals first.
516 status_change = status_change || checkSignalsAndUpdate(tid);
517
518 // Generate fetch targets if BAC is in running state
519 if (bacStatus[tid] == Running) {
520 generateFetchTargets(tid, status_change);
521 activity = true;
522 }
523 stats.status[bacStatus[tid]]++;
524 }
525
526 } else {
527 // No FDP -------------------------------------------
528 // In the non-decoupled frontend the BAC stage is passive and
529 // only manages the branch prediction unit.
530 // It is always idle.
531
532 for (const ThreadID tid : *activeThreads) {
533 // Update only the branch prediction signals.
535 if (bacStatus[tid] != Idle) {
536 bacStatus[tid] = Idle;
537 status_change = true;
538 }
539 }
540 } // ----------------------------------------------------
541
542 if (status_change) {
544 }
545
546 if (activity) {
547 DPRINTF(Activity, "Activity this cycle.\n");
548
549 cpu->activityThisCycle();
550 }
551}
552
555{
556 auto ft = std::make_shared<FetchTarget>(tid, start_pc,
557 cpu->getAndIncrementFTSeq());
558
559 DPRINTF(BAC, "Create new fetch target ftn:%llu\n", ft->ftNum());
560 stats.fetchTargets++;
561 return ft;
562}
563
564bool
567{
568
577 assert(ft->bpuHistory == nullptr);
578 bool taken = bpu->predict(inst, ft->ftNum(), pc, tid, ft->bpuHistory);
579
580 DPRINTF(Branch, "[tid:%i, ftn:%llu] History added.\n", tid, ft->ftNum());
581 return taken;
582}
583
584void
585BAC::generateFetchTargets(ThreadID tid, bool &status_change)
586{
615
616 PCStateBase &cur_pc = *bacPC[tid];
617 int num_ft = 0;
618 int num_taken = 0;
619
620 while ((num_ft < maxFTPerCycle) && (num_taken < maxTakenPredPerCycle)) {
621 // Get a reference to the current PC state for this thread.
622 // The search itself is done on the instruction address to speed up
623 // simulation time.
624 Addr search_addr = cur_pc.instAddr();
625 Addr start_addr = search_addr;
626
627 // Create a new fetch target starting with the current PC.
628 FetchTargetPtr curFT = newFetchTarget(tid, cur_pc);
629 num_ft++;
630
631 bool branch_found = false;
632 bool predict_taken = false;
633
634 // Scan through the instruction stream and search for branches.
635 // The BTB contains only branches where taken at least once.
636 // Search stopped either because a branch was found in instruction
637 // stream or the maximum search width per cycle was reached.
638 // In the first case make the branch prediction and in the later
639 // advance the PC to start the search at the following address.
640 while (true) {
641
642 // Check if the current search address can be found in the BTB
643 // indicating the end of the branch.
644 branch_found = bpu->BTBValid(tid, search_addr);
645
646 // If its a branch stop searching
647 if (branch_found) {
648 break;
649 }
650
651 // If its not a branch check if the maximum search width is
652 // reached. If yes stop searching.
653 if ((search_addr - start_addr) >= fetchTargetWidth) {
654 break;
655 }
656
657 // Continue searching.
658 search_addr += minInstSize;
659 }
660
661 // Update the current PC to point to the last instruction
662 // in the fetch target
663 cur_pc.set(search_addr);
664
665 // Make a copy of the current PC since the BPU will update it.
666 std::unique_ptr<PCStateBase> next_pc(cur_pc.clone());
667 StaticInstPtr staticInst = nullptr;
668
669 if (branch_found) {
670 // Branch found in instruction stream. As the current
671 // BPU implementation required the static instruction we need to
672 // look it up from the BTB.
673 staticInst = bpu->BTBGetInst(tid, cur_pc.instAddr());
674 assert(staticInst);
675
676 // Now make the actual prediction. Note the BPU will advance
677 // the PC to the next instruction.
678 predict_taken = predict(tid, staticInst, curFT, *next_pc);
679
680 DPRINTF(BAC,
681 "[tid:%i, ftn:%llu] Branch found at PC %#x "
682 "taken?:%i, target:%#x\n",
683 tid, curFT->ftNum(), cur_pc.instAddr(), predict_taken,
684 next_pc->instAddr());
685
686 stats.branches++;
687 if (predict_taken) {
688 stats.predTakenBranches++;
689 num_taken++;
690 }
691 }
692
693 if (!predict_taken) {
694 // Not predicted taken. Start the next FT at the next address.
695 next_pc->set(cur_pc.instAddr() + minInstSize);
696 }
697
698 // x86 has some complex instruction like string copy where the branch
699 // is not the last instruction or have several branches within the same
700 // instruction. Those branches jump always! to itself. This messes up
701 // the searching approach and will result in an infinite loop until the
702 // branch is squashed.
703 // We handle this by assuming only one branch per instruction and go
704 // straight to the next address/instruction/fetch target. In case the
705 // decoder finds more branches in this instruction we squash the FTQ.
706 // (see postFetch())
707 // This could be circumvented by using not only the PC but also the
708 // microPC to make predictions. However, since such instructions are
709 // rare this is not implemented.
710 // However, Arm does not micro code branches and correspondingly does
711 // not set the `IsLastMicroop` flag which misleads the decoupling for
712 // Arm. Therefore, we check if an instruction is micro coded in the
713 // first place.
714 if (staticInst && staticInst->isMicroop() &&
715 !staticInst->isLastMicroop()) {
716 stats.branchesNotLastuOp++;
717 // The target is always to itself no matter if its taken or not.
718 // assert(next_pc->instAddr() == search_addr);
719 DPRINTF(BAC,
720 "Branch detected which is not the last uOp %s. "
721 "Continue with next address.\n",
722 cur_pc);
723
724 next_pc->set(cur_pc.instAddr() + staticInst->size());
725 }
726
727 // Complete the fetch target if
728 // - a branch is found
729 // - or the maximum fetch bandwidth is reached.
730 curFT->finalize(cur_pc, branch_found, predict_taken, *next_pc);
731
732 ftq->insert(tid, curFT);
733 wroteToTimeBuffer = true;
734
735 DPRINTF(BAC,
736 "[tid:%i] [fn:%llu] %i addresses searched. "
737 "Branch found:%i. Continue with PC:%s in next cycle\n",
738 tid, curFT->ftNum(), (search_addr - start_addr), branch_found,
739 *next_pc);
740
741 stats.ftSizeDist.sample(search_addr - start_addr);
742
743 // Finally set the BPU PC to the next FT in the next cycle
744 set(cur_pc, *next_pc);
745
746 if (debug::FTQ) {
747 ftq->printFTQ(tid);
748 }
749 // Check whether the FTQ became full. In that case block until
750 // fetch has consumed one.
751 if (ftq->isFull(tid)) {
752 DPRINTF(BAC, "FTQ full\n");
753 bacStatus[tid] = FTQFull;
754 status_change = true;
755 break;
756 }
757 }
758 stats.ftNumber.sample(num_ft);
759}
760
762
763bool
765 const StaticInstPtr &inst, PCStateBase &pc,
766 const FetchTargetPtr &ft)
767{
768 assert(ft != nullptr);
769 // The PC must be in the range of the fetch target.
770 assert(ft->inRange(pc.instAddr()));
771
772 assert(ft->ftNum() == ftq->readHead(tid)->ftNum());
774 stats.preDecUpdate[brType]++;
775
776 DPRINTF(BAC,
777 "%s(tid:%i, sn:%lu, inst: %s, PC:%#x, FT[%llu, taken=%i, "
778 "end=%#x])\n",
779 __func__, tid, seqNum, branch_prediction::toString(brType),
780 pc.instAddr(), ft->ftNum(), ft->predTaken(), ft->endAddress());
781
782 bool target_set = false;
783 BPredUnit::PredictorHistory *hist = nullptr;
784
785 // The fetch stage will call this function after pre-decoding an
786 // instruction finds a branch instruction. Check if this is the exit
787 // branch.
788 if (ft->isExitBranch(pc.instAddr()) && ft->bpuHistory != nullptr) {
789
790 // Pop the history from the FTQ to move it later to the
791 // history buffer.
792 std::swap(hist, ft->bpuHistory);
793
794 DPRINTF(BAC,
795 "Pop history from FT:%llu => sn:%llu, PC:%#x, taken:%i, "
796 "target:%#x\n",
797 ft->ftNum(), seqNum, hist->pc, hist->predTaken,
798 hist->target->instAddr());
799 }
800
801 // Special cases ------------------------------------------------
802 // We need to handle two corner cases for complex instructions
803 // 1. For complex instructions it can happen that several branches with
804 // different types exists in the same instruction. If the branch type
805 // does not match with the type of the prediction history its invalid.
806 // We squash everything the history and we can make a fresh
807 // prediction
808 if (hist && (hist->type != brType)) {
809 DPRINTF(Branch, "Branch types dont match. Delete history\n", tid);
810 stats.typeMissmatch++;
811
812 // Push the history back to the FTQ to allow it to be sqaushed
813 // in correct order. Then squash all histories right away.
814 std::swap(ft->bpuHistory, hist);
816
817 // Lock the FTQ. The complex instruction needs to
818 // be completed before unlocking. Unlocking is performed by resetting
819 // the BAC stage.
820 ftq->lock(tid);
821 }
822
823 // 2. For complex instruction comprising multiple branches the history is
824 // already used. We only predict the first branch in a complex instruction
825 // (see createFetchTarget() function).
826 // In that case we squash the FTQ and lock it until the full instruction
827 // Afterwards the fetch stage will reset the BAC stage with a
828 // bacResteer() call. Hence, operation for complex instructions is:
829 // Detecting multi branch inst. -> lock FTQ util inst. done. -> reset BAC.
830 //
831 // Note we might end up here multiple times until the full instruction
832 // is completed.
833 if (inst->isMicroop() && !inst->isLastMicroop() && (hist == nullptr)) {
834
835 DPRINTF(Branch, "No history for complex instruction found. \n");
836 stats.multiBranchInst++;
837
838 // First squash all histories that are already in the FTQ
839 // to have a clean state.
841
842 // Then lock the FTQ. The complex instruction needs to
843 // be completed before unlocking. Unlocking is performed by
844 // resetting the BAC stage with a bacResteer() call from the
845 // fetch stage.
846 ftq->lock(tid);
847
848 // Finally we can make a fresh prediction.
849 bpu->predict(inst, ft->ftNum(), pc, tid, hist);
850 target_set = true;
851 }
852
853 // Normal case --------------------------------------------------
854 // Check if we have a valid history. If not we need to create one.
855 if (hist == nullptr) {
856 DPRINTF(BAC, "[tid:%i, sn:%llu] No branch history for PC:%#x\n", tid,
857 seqNum, pc.instAddr());
858 stats.noHistByType[brType]++;
859
860 // The branch was not detected by the BAC stage in the first place
861 // because the BTB did not had an entry for this PC. It can happen
862 // if this is the first time the branch is encountered, the branch
863 // was never taken before, or the entry got evicted.
864 //
865 // Create a "dummy" history object by assuming the branch is not
866 // taken. This will allow the BPU to fix its histories and internal
867 // state in case the assumption was wrong. It works because for
868 // FDP we use "taken" history where not taken branches don't modify
869 // the global history.
870
871 hist =
872 new BPredUnit::PredictorHistory(tid, seqNum, pc.instAddr(), inst);
873 bpu->branchPlaceholder(tid, pc.instAddr(), inst->isUncondCtrl(),
874 hist->bpHistory);
875
876 set(hist->target, std::unique_ptr<PCStateBase>(pc.clone()));
877 inst->advancePC(*hist->target);
878 }
879
880 assert(hist != nullptr);
881 assert(hist->type == brType);
882
883 // Assign the branch instruction instance its sequence number
884 // and push the history to the main history buffer.
885 hist->seqNum = seqNum;
886 bpu->insertPredictorHistory(tid, hist);
887
888 // Finally update the current fetch PC if not already done.
889 // For taken branches the target is stored in the FTQ. For not taken
890 // branches we need to advance the PC.
891 if (!target_set) {
892 if (hist->predTaken) {
893 set(pc, ft->readPredTarg());
894 } else {
895 inst->advancePC(pc);
896 }
897 }
898
899 DPRINTF(BAC, "%s done. next PC:%s\n", __func__, pc);
900 return hist->predTaken;
901}
902
903bool
904BAC::updatePC(const DynInstPtr &inst, PCStateBase &fetch_pc,
905 FetchTargetPtr &ft)
906{
907 // This function will update the fetch PC to the next instruction.
908 // If the current instruction is a branch it will make
909 // the branch prediction.
910 bool predict_taken;
911 ThreadID tid = inst->threadNumber;
912
913 if (inst->isControl()) {
914 // The instruction is a control instruction.
915
916 if (decoupledFrontEnd) {
917 // With a decoupled front-end the branch prediction was done
918 // while creating the fetch target. Now update the prediction
919 // with the information from the predecoding.
920 predict_taken = updatePreDecode(tid, inst->seqNum,
921 inst->staticInst, fetch_pc, ft);
922 } else {
923 // With a coupled front-end we need to make the branch prediction
924 // here.
925 predict_taken =
926 bpu->predict(inst->staticInst, inst->seqNum, fetch_pc, tid);
927 }
928
929 DPRINTF(BAC,
930 "[tid:%i] [sn:%llu] Branch at PC %#x "
931 "predicted %s to go to %s\n",
932 tid, inst->seqNum, inst->pcState().instAddr(),
933 predict_taken ? "taken" : "not taken", fetch_pc);
934 inst->setPredTarg(fetch_pc);
935 inst->setPredTaken(predict_taken);
936
937 ++stats.branches;
938
939 if (predict_taken) {
940 ++stats.predTakenBranches;
941 }
942
943 } else {
944
945 // For non-branch instructions simply advance the PC.
946 inst->staticInst->advancePC(fetch_pc);
947 inst->setPredTarg(fetch_pc);
948 inst->setPredTaken(false);
949 predict_taken = false;
950 }
951
952 if (decoupledFrontEnd) {
953
954 // For the decoupled front-end we need to check if this instruction
955 // is the exit instruction of the fetch target. It does not need
956 // to be a branch.
957 // If the instruction is micro coded check if its the last uOp.
958 // Also remove the fetch target if the FTQ became invalid.
959 if ((ft->isExitInst(inst->pcState().instAddr()) &&
960 (!inst->isMicroop() || inst->isLastMicroop())) ||
961 !ftq->isReady(tid)) {
962
963 DPRINTF(BAC, "[tid:%i][ft:%llu] Reached end of Fetch Target\n",
964 tid, ft->ftNum());
965
966 ft = nullptr;
967 }
968 }
969
970 return predict_taken;
971}
972
974 : statistics::Group(cpu, "bac"),
975 ADD_STAT(status, statistics::units::Cycle::get(),
976 "Number of cycles BAC in state"),
977 ADD_STAT(fetchTargets, statistics::units::Count::get(),
978 "Number of fetch targets created "),
979 ADD_STAT(branches, statistics::units::Count::get(),
980 "Number of branches that BAC encountered"),
982 "Number of branches that BAC predicted taken."),
984 "Number of branches that fetch encountered which are not the "
985 "last uOp within a macrooperation. Jump to itself."),
987 "Number of mispredicted branches"),
989 "Number of non-branch instructions mispredicted"),
991 "Number of branches squashed from decode"),
993 "Number of branches squashed from commit"),
994 ADD_STAT(preDecUpdate, statistics::units::Count::get(),
995 "Number of branches extracted from the predecoder"),
996 ADD_STAT(noHistByType, statistics::units::Count::get(),
997 "Number and type of branches that were undetected by the BPU."),
998 ADD_STAT(typeMissmatch, statistics::units::Count::get(),
999 "Number branches where the branch type miss match"),
1000 ADD_STAT(multiBranchInst, statistics::units::Count::get(),
1001 "Number branches because its not the last branch."),
1002 ADD_STAT(ftSizeDist, statistics::units::Count::get(),
1003 "Number of bytes per fetch target"),
1004 ADD_STAT(ftNumber, statistics::units::Count::get(),
1005 "Number of fetch target inserted to the FTQ per cycle")
1006{
1007 using namespace statistics;
1009 for (int i = 0; i < ThreadStatusMax; ++i) {
1010 status.subname(i, statusStrings[i]);
1011 status.subdesc(i, "Number of cycles BAC is " + statusStrings[i]);
1012 }
1013
1015 .init(/* base value */ 0,
1016 /* last value */ bac->fetchTargetWidth,
1017 /* bucket size */ 4)
1018 .flags(statistics::pdf);
1019
1020 preDecUpdate.init(enums::Num_BranchType).flags(total | pdf);
1021 noHistByType.init(enums::Num_BranchType).flags(total | pdf);
1022 ftNumber.init(0, bac->maxFTPerCycle, 1);
1023
1024 for (int i = 0; i < enums::Num_BranchType; i++) {
1025 preDecUpdate.subname(i, enums::BranchTypeStrings[i]);
1026 noHistByType.subname(i, enums::BranchTypeStrings[i]);
1027 }
1028}
1029
1030} // namespace o3
1031} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
virtual void set(Addr val)
Definition pcstate.hh:131
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
Definition pcstate.hh:108
virtual PCStateBase * clone() const =0
Base class for branch operations.
Definition branch.hh:49
bool isUncondCtrl() const
virtual void advancePC(PCStateBase &pc_state) const =0
size_t size() const
bool isLastMicroop() const
bool isMicroop() const
const Cycles decodeToFetchDelay
Decode to fetch delay.
Definition bac.hh:386
bool isDrained() const
Has the stage drained?
Definition bac.cc:196
CPU * cpu
Pointer to the main CPU.
Definition bac.hh:337
void drainResume()
Resume after a drain.
Definition bac.cc:174
TimeBuffer< TimeStruct >::wire fromFetch
Wire to get fetches's information from backwards time buffer.
Definition bac.hh:349
std::list< ThreadID > * activeThreads
List of Active FTQ Threads.
Definition bac.hh:408
Stalls stalls[MaxThreads]
Tracks which stages are telling the ftq to stall.
Definition bac.hh:377
bool wroteToTimeBuffer
Variable that tracks if BAC has written to the time buffer this cycle.
Definition bac.hh:366
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
Definition bac.cc:243
const unsigned fetchTargetWidth
The maximum width of a fetch target.
Definition bac.hh:399
const unsigned minInstSize
The minimum size an instruction can have in the current architecture.
Definition bac.hh:405
void startupStage()
Initialize stage.
Definition bac.cc:139
void generateFetchTargets(ThreadID tid, bool &status_change)
Main function that feeds the FTQ with new fetch targets.
Definition bac.cc:585
@ ThreadStatusMax
Definition bac.hh:114
const Cycles fetchToBacDelay
Fetch to BAC delay.
Definition bac.hh:383
const unsigned int cacheBlkSize
Cache block size.
Definition bac.hh:395
void setFetchTargetQueue(FTQ *_ptr)
Connect the FTQ.
Definition bac.cc:132
const Cycles bacToFetchDelay
BAC to fetch delay.
Definition bac.hh:392
void updateBACStatus()
Updates overall BAC stage status; to be called at the end of each cycle.
Definition bac.cc:261
const unsigned maxTakenPredPerCycle
Definition bac.hh:417
ThreadStatus bacStatus[MaxThreads]
Per-thread status.
Definition bac.hh:122
const unsigned maxFTPerCycle
Definition bac.hh:414
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
Definition bac.cc:126
const ThreadID numThreads
Number of threads.
Definition bac.hh:411
BACStatus _status
Decode status.
Definition bac.hh:119
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
Definition bac.cc:375
bool predict(ThreadID tid, const StaticInstPtr &inst, const FetchTargetPtr &ft, PCStateBase &pc)
The prediction function for the BAC stage.
Definition bac.cc:565
bool updatePreDecode(ThreadID tid, const InstSeqNum seqNum, const StaticInstPtr &inst, PCStateBase &pc, const FetchTargetPtr &ft)
Pre-decode update --------------------------------------— After predecoding instruction in the fetch ...
Definition bac.cc:764
BAC(CPU *_cpu, const BaseO3CPUParams &params)
BAC constructor.
Definition bac.cc:77
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
Definition bac.cc:214
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition bac.cc:183
void resetStage()
Reset this pipeline stage.
Definition bac.cc:162
bool updatePC(const DynInstPtr &inst, PCStateBase &fetch_pc, FetchTargetPtr &ft)
Calculate the next PC address depending on the instruction type and the branch prediction.
Definition bac.cc:904
const Cycles commitToFetchDelay
Commit to fetch delay.
Definition bac.hh:389
const bool decoupledFrontEnd
Enables the decoupled front-end.
Definition bac.hh:380
FetchTargetPtr newFetchTarget(ThreadID tid, const PCStateBase &start_pc)
Create a new fetch target.
Definition bac.cc:554
gem5::o3::BAC::BACStats stats
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
Definition bac.cc:233
bool checkAndUpdateBPUSignals(ThreadID tid)
Check the backward signals that update the BPU.
Definition bac.cc:290
branch_prediction::BranchType BranchType
Definition bac.hh:93
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the main backwards communication time buffer pointer.
Definition bac.cc:115
void clearStates(ThreadID tid)
Clear all thread-specific states.
Definition bac.cc:148
std::string name() const
Returns the name of the stage.
Definition bac.cc:109
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
Definition bac.cc:223
void squashBpuHistories(ThreadID tid)
Squashes the BPU histories in the FTQ.
Definition bac.cc:464
void tick()
Process all input signals and create the next fetch target.
Definition bac.cc:503
std::unique_ptr< PCStateBase > bacPC[MaxThreads]
The decoupled PC which runs ahead of fetch.
Definition bac.hh:361
TimeBuffer< TimeStruct >::wire fromDecode
Wire to get decode's information from backwards time buffer.
Definition bac.hh:352
void squash(const PCStateBase &new_pc, ThreadID tid)
Squashes BAC for a specific thread and resets the PC.
Definition bac.cc:484
branch_prediction::BPredUnit * bpu
BPredUnit.
Definition bac.hh:340
FTQ * ftq
Fetch target Queue.
Definition bac.hh:343
TimeBuffer< TimeStruct > * timeBuffer
Time buffer interface.
Definition bac.hh:346
TimeBuffer< TimeStruct >::wire fromCommit
Wire to get commit's information from backwards time buffer.
Definition bac.hh:355
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:97
FTQ class.
Definition ftq.hh:227
Fetch class handles both single threaded and SMT fetch.
Definition fetch.hh:83
STL list class.
Definition stl.hh:51
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 12, 11 > set
Bitfield< 4 > pc
std::string toString(BranchType type)
BranchType getBranchType(StaticInstPtr inst)
std::shared_ptr< FetchTarget > FetchTargetPtr
Definition bac.hh:62
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
const FlagsType nozero
Don't print if this is zero.
Definition info.hh:67
const FlagsType total
Print the total.
Definition info.hh:59
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
RefCountingPtr< StaticInst > StaticInstPtr
uint64_t InstSeqNum
Definition inst_seq.hh:40
Branch Predictor Unit (BPU) history object PredictorHistory This class holds all information needed t...
InstSeqNum seqNum
The sequence number for the predictor history entry.
bool predTaken
Whether or not it was predicted taken.
void * bpHistory
Pointer to the history objects passed back from the branch predictor subcomponents.
const Addr pc
The PC associated with the sequence number.
const BranchType type
The type of the branch.
std::unique_ptr< PCStateBase > target
The predicted target.
statistics::Vector preDecUpdate
Number of post updates.
Definition bac.hh:454
BACStats(CPU *cpu, BAC *bac)
Definition bac.cc:973
statistics::Scalar branches
Total number of branches detected.
Definition bac.hh:439
statistics::Scalar predTakenBranches
Total number of branches predicted taken.
Definition bac.hh:441
statistics::Scalar branchesNotLastuOp
Total number of fetched branches.
Definition bac.hh:443
statistics::Scalar multiBranchInst
Definition bac.hh:460
statistics::Scalar typeMissmatch
Stat for the two corner cases.
Definition bac.hh:459
statistics::Scalar squashBranchCommit
Definition bac.hh:451
statistics::Vector noHistByType
Number of branches undetected by the BPU.
Definition bac.hh:456
statistics::Scalar squashBranchDecode
Stat for the number of squashes from decode and commit.
Definition bac.hh:450
statistics::Scalar noBranchMisspredict
Definition bac.hh:447
statistics::Scalar fetchTargets
Stat for total number fetch targets created.
Definition bac.hh:437
static std::string statusStrings[ThreadStatusMax]
Definition bac.hh:429
statistics::Distribution ftSizeDist
Distribution of number of bytes per fetch target.
Definition bac.hh:463
statistics::Distribution ftNumber
Definition bac.hh:464
statistics::Scalar branchMisspredict
Stat for total number of misspredicted instructions.
Definition bac.hh:446
statistics::Vector status
Stat for total number of cycles spent in each BAC state.
Definition bac.hh:434

Generated on Mon Oct 27 2025 04:13:00 for gem5 by doxygen 1.14.0