48#include "debug/Activity.hh"
49#include "debug/BAC.hh"
50#include "debug/Branch.hh"
51#include "debug/Drain.hh"
52#include "debug/FTQ.hh"
53#include "debug/Fetch.hh"
54#include "debug/O3PipeView.hh"
55#include "params/BaseO3CPU.hh"
79 bpu(params.branchPred),
96 "Fetch target width should be larger than fetch buffer size!");
98 "More than 1 thread has not been tested with the decoupled "
100 fatal_if(
bpu ==
nullptr,
"Branch predictor not configured");
103 bacPC[
i].reset(params.isa[0]->newPCState());
104 stalls[
i] = {
false,
false,
false};
111 return cpu->name() +
".bac";
153 stalls[tid].fetch =
false;
154 stalls[tid].drain =
false;
157 assert(
ftq !=
nullptr);
158 ftq->resetState(tid);
176 DPRINTF(Drain,
"Resume from draining.\n");
189 assert(
ftq->isEmpty(
i));
192 bpu->drainSanityCheck();
201 if (!
ftq->isEmpty(
i)) {
216 assert(
cpu->isDraining());
217 assert(!
stalls[tid].drain);
218 DPRINTF(Drain,
"%i: Thread drained.\n", tid);
226 DPRINTF(Activity,
"Activating stage.\n");
236 DPRINTF(Activity,
"Deactivating stage.\n");
245 bool ret_val =
false;
248 DPRINTF(
BAC,
"[tid:%i] Fetch stall detected.\n", tid);
253 DPRINTF(
BAC,
"[tid:%i] BPU stall detected.\n", tid);
269 DPRINTF(Activity,
"[tid:%i] Activating stage.\n", tid);
281 DPRINTF(Activity,
"Deactivating stage.\n");
295 DPRINTF(
BAC,
"[tid:%i] Squashing from commit. PC = %s\n", tid,
306 if (
fromCommit->commitInfo[tid].mispredictInst &&
307 fromCommit->commitInfo[tid].mispredictInst->isControl()) {
311 fromCommit->commitInfo[tid].branchTaken, tid,
true);
312 stats.branchMisspredict++;
313 stats.squashBranchCommit++;
316 if (
fromCommit->commitInfo[tid].mispredictInst) {
318 "[tid:%i] Squashing due to mispredict of "
319 "non-control instruction: %s\n",
322 .mispredictInst->staticInst->disassemble(
324 .mispredictInst->pcState()
327 stats.noBranchMisspredict++;
331 }
else if (
fromCommit->commitInfo[tid].doneSeqNum) {
339 DPRINTF(
Fetch,
"[tid:%i] Squashing from decode. PC = %s\n", tid,
347 if (
fromDecode->decodeInfo[tid].branchMispredict) {
351 fromDecode->decodeInfo[tid].branchTaken, tid,
false);
352 stats.branchMisspredict++;
353 stats.squashBranchDecode++;
356 stats.noBranchMisspredict++;
363 DPRINTF(
BAC,
"Squashing from fetch with PC = %s\n",
385 assert(
cpu->isDraining());
386 DPRINTF(
BAC,
"[tid:%i] Drain stall detected.\n", tid);
403 DPRINTF(
BAC,
"[tid:%i] FTQ is invalid. Wait for resteer.\n", tid);
412 DPRINTF(
BAC,
"[tid:%i] FTQ is locked\n", tid);
418 DPRINTF(
BAC,
"[tid:%i] FTQ not locked anymore -> Running\n", tid);
426 DPRINTF(
BAC,
"[tid:%i] FTQ not full anymore -> Running\n", tid);
434 DPRINTF(
BAC,
"[tid:%i] Done squashing, switching to running.\n", tid);
439 if (
ftq->isFull(tid)) {
442 DPRINTF(
BAC,
"[tid:%i] FTQ is full. Blocking BAC.\n", tid);
450 if (
ftq->isReady(tid) &&
453 DPRINTF(
BAC,
"[tid:%i] Attempt to run\n", tid);
470 DPRINTF(
BAC,
"%s(tid:%i): FTQ sz: %i\n", __func__, tid,
ftq->size(tid));
475 if (ft->bpuHistory) {
476 bpu->squashHistory(tid, ft->bpuHistory);
477 assert(ft->bpuHistory ==
nullptr);
478 ft->bpuHistory =
nullptr;
490 DPRINTF(
BAC,
"[tid:%i] Squashing FTQ.\n", tid);
505 bool activity =
false;
506 bool status_change =
false;
537 status_change =
true;
547 DPRINTF(Activity,
"Activity this cycle.\n");
549 cpu->activityThisCycle();
556 auto ft = std::make_shared<FetchTarget>(tid, start_pc,
557 cpu->getAndIncrementFTSeq());
559 DPRINTF(
BAC,
"Create new fetch target ftn:%llu\n", ft->ftNum());
560 stats.fetchTargets++;
577 assert(ft->bpuHistory ==
nullptr);
578 bool taken =
bpu->predict(inst, ft->ftNum(),
pc, tid, ft->bpuHistory);
580 DPRINTF(
Branch,
"[tid:%i, ftn:%llu] History added.\n", tid, ft->ftNum());
625 Addr start_addr = search_addr;
631 bool branch_found =
false;
632 bool predict_taken =
false;
644 branch_found =
bpu->BTBValid(tid, search_addr);
663 cur_pc.
set(search_addr);
666 std::unique_ptr<PCStateBase> next_pc(cur_pc.
clone());
673 staticInst =
bpu->BTBGetInst(tid, cur_pc.
instAddr());
678 predict_taken =
predict(tid, staticInst, curFT, *next_pc);
681 "[tid:%i, ftn:%llu] Branch found at PC %#x "
682 "taken?:%i, target:%#x\n",
683 tid, curFT->ftNum(), cur_pc.
instAddr(), predict_taken,
684 next_pc->instAddr());
688 stats.predTakenBranches++;
693 if (!predict_taken) {
714 if (staticInst && staticInst->
isMicroop() &&
716 stats.branchesNotLastuOp++;
720 "Branch detected which is not the last uOp %s. "
721 "Continue with next address.\n",
730 curFT->finalize(cur_pc, branch_found, predict_taken, *next_pc);
732 ftq->insert(tid, curFT);
736 "[tid:%i] [fn:%llu] %i addresses searched. "
737 "Branch found:%i. Continue with PC:%s in next cycle\n",
738 tid, curFT->ftNum(), (search_addr - start_addr), branch_found,
741 stats.ftSizeDist.sample(search_addr - start_addr);
744 set(cur_pc, *next_pc);
751 if (
ftq->isFull(tid)) {
754 status_change =
true;
758 stats.ftNumber.sample(num_ft);
768 assert(ft !=
nullptr);
770 assert(ft->inRange(
pc.instAddr()));
772 assert(ft->ftNum() ==
ftq->readHead(tid)->ftNum());
774 stats.preDecUpdate[brType]++;
777 "%s(tid:%i, sn:%lu, inst: %s, PC:%#x, FT[%llu, taken=%i, "
780 pc.instAddr(), ft->ftNum(), ft->predTaken(), ft->endAddress());
782 bool target_set =
false;
788 if (ft->isExitBranch(
pc.instAddr()) && ft->bpuHistory !=
nullptr) {
792 std::swap(hist, ft->bpuHistory);
795 "Pop history from FT:%llu => sn:%llu, PC:%#x, taken:%i, "
798 hist->
target->instAddr());
808 if (hist && (hist->
type != brType)) {
809 DPRINTF(
Branch,
"Branch types dont match. Delete history\n", tid);
810 stats.typeMissmatch++;
814 std::swap(ft->bpuHistory, hist);
835 DPRINTF(
Branch,
"No history for complex instruction found. \n");
836 stats.multiBranchInst++;
849 bpu->predict(inst, ft->ftNum(),
pc, tid, hist);
855 if (hist ==
nullptr) {
856 DPRINTF(
BAC,
"[tid:%i, sn:%llu] No branch history for PC:%#x\n", tid,
857 seqNum,
pc.instAddr());
858 stats.noHistByType[brType]++;
876 set(hist->
target, std::unique_ptr<PCStateBase>(
pc.clone()));
880 assert(hist !=
nullptr);
881 assert(hist->
type == brType);
886 bpu->insertPredictorHistory(tid, hist);
893 set(
pc, ft->readPredTarg());
913 if (inst->isControl()) {
921 inst->staticInst, fetch_pc, ft);
926 bpu->predict(inst->staticInst, inst->seqNum, fetch_pc, tid);
930 "[tid:%i] [sn:%llu] Branch at PC %#x "
931 "predicted %s to go to %s\n",
932 tid, inst->seqNum, inst->pcState().instAddr(),
933 predict_taken ?
"taken" :
"not taken", fetch_pc);
934 inst->setPredTarg(fetch_pc);
935 inst->setPredTaken(predict_taken);
940 ++
stats.predTakenBranches;
946 inst->staticInst->advancePC(fetch_pc);
947 inst->setPredTarg(fetch_pc);
948 inst->setPredTaken(
false);
949 predict_taken =
false;
959 if ((ft->isExitInst(inst->pcState().instAddr()) &&
960 (!inst->isMicroop() || inst->isLastMicroop())) ||
961 !
ftq->isReady(tid)) {
963 DPRINTF(
BAC,
"[tid:%i][ft:%llu] Reached end of Fetch Target\n",
970 return predict_taken;
976 "Number of cycles BAC in state"),
978 "Number of fetch targets created "),
980 "Number of branches that BAC encountered"),
982 "Number of branches that BAC predicted taken."),
984 "Number of branches that fetch encountered which are not the "
985 "last uOp within a macrooperation. Jump to itself."),
987 "Number of mispredicted branches"),
989 "Number of non-branch instructions mispredicted"),
991 "Number of branches squashed from decode"),
993 "Number of branches squashed from commit"),
995 "Number of branches extracted from the predecoder"),
997 "Number and type of branches that were undetected by the BPU."),
999 "Number branches where the branch type miss match"),
1001 "Number branches because its not the last branch."),
1003 "Number of bytes per fetch target"),
1005 "Number of fetch target inserted to the FTQ per cycle")
1024 for (
int i = 0;
i < enums::Num_BranchType;
i++) {
virtual void set(Addr val)
Addr instAddr() const
Returns the memory address of the instruction this PC points to.
virtual PCStateBase * clone() const =0
Base class for branch operations.
bool isUncondCtrl() const
virtual void advancePC(PCStateBase &pc_state) const =0
bool isLastMicroop() const
const Cycles decodeToFetchDelay
Decode to fetch delay.
bool isDrained() const
Has the stage drained?
CPU * cpu
Pointer to the main CPU.
void drainResume()
Resume after a drain.
TimeBuffer< TimeStruct >::wire fromFetch
Wire to get fetches's information from backwards time buffer.
std::list< ThreadID > * activeThreads
List of Active FTQ Threads.
Stalls stalls[MaxThreads]
Tracks which stages are telling the ftq to stall.
bool wroteToTimeBuffer
Variable that tracks if BAC has written to the time buffer this cycle.
bool checkStall(ThreadID tid) const
Checks if a thread is stalled.
const unsigned fetchTargetWidth
The maximum width of a fetch target.
const unsigned minInstSize
The minimum size an instruction can have in the current architecture.
void startupStage()
Initialize stage.
void generateFetchTargets(ThreadID tid, bool &status_change)
Main function that feeds the FTQ with new fetch targets.
const Cycles fetchToBacDelay
Fetch to BAC delay.
const unsigned int cacheBlkSize
Cache block size.
void setFetchTargetQueue(FTQ *_ptr)
Connect the FTQ.
const Cycles bacToFetchDelay
BAC to fetch delay.
void updateBACStatus()
Updates overall BAC stage status; to be called at the end of each cycle.
const unsigned maxTakenPredPerCycle
ThreadStatus bacStatus[MaxThreads]
Per-thread status.
const unsigned maxFTPerCycle
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets pointer to list of active threads.
const ThreadID numThreads
Number of threads.
BACStatus _status
Decode status.
bool checkSignalsAndUpdate(ThreadID tid)
Checks all input signals and updates the status as necessary.
bool predict(ThreadID tid, const StaticInstPtr &inst, const FetchTargetPtr &ft, PCStateBase &pc)
The prediction function for the BAC stage.
bool updatePreDecode(ThreadID tid, const InstSeqNum seqNum, const StaticInstPtr &inst, PCStateBase &pc, const FetchTargetPtr &ft)
Pre-decode update --------------------------------------— After predecoding instruction in the fetch ...
BAC(CPU *_cpu, const BaseO3CPUParams ¶ms)
BAC constructor.
void drainStall(ThreadID tid)
Stall the fetch stage after reaching a safe drain point.
void drainSanityCheck() const
Perform sanity checks after a drain.
void resetStage()
Reset this pipeline stage.
bool updatePC(const DynInstPtr &inst, PCStateBase &fetch_pc, FetchTargetPtr &ft)
Calculate the next PC address depending on the instruction type and the branch prediction.
const Cycles commitToFetchDelay
Commit to fetch delay.
const bool decoupledFrontEnd
Enables the decoupled front-end.
FetchTargetPtr newFetchTarget(ThreadID tid, const PCStateBase &start_pc)
Create a new fetch target.
gem5::o3::BAC::BACStats stats
void switchToInactive()
Changes the status of this stage to inactive, and indicates this to the CPU.
bool checkAndUpdateBPUSignals(ThreadID tid)
Check the backward signals that update the BPU.
branch_prediction::BranchType BranchType
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the main backwards communication time buffer pointer.
void clearStates(ThreadID tid)
Clear all thread-specific states.
std::string name() const
Returns the name of the stage.
void switchToActive()
Changes the status of this stage to active, and indicates this to the CPU.
void squashBpuHistories(ThreadID tid)
Squashes the BPU histories in the FTQ.
void tick()
Process all input signals and create the next fetch target.
std::unique_ptr< PCStateBase > bacPC[MaxThreads]
The decoupled PC which runs ahead of fetch.
TimeBuffer< TimeStruct >::wire fromDecode
Wire to get decode's information from backwards time buffer.
void squash(const PCStateBase &new_pc, ThreadID tid)
Squashes BAC for a specific thread and resets the PC.
branch_prediction::BPredUnit * bpu
BPredUnit.
FTQ * ftq
Fetch target Queue.
TimeBuffer< TimeStruct > * timeBuffer
Time buffer interface.
TimeBuffer< TimeStruct >::wire fromCommit
Wire to get commit's information from backwards time buffer.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Fetch class handles both single threaded and SMT fetch.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
std::string toString(BranchType type)
BranchType getBranchType(StaticInstPtr inst)
std::shared_ptr< FetchTarget > FetchTargetPtr
static constexpr int MaxThreads
RefCountingPtr< DynInst > DynInstPtr
const FlagsType pdf
Print the percent of the total that this entry represents.
const FlagsType nozero
Don't print if this is zero.
const FlagsType total
Print the total.
Copyright (c) 2024 Arm Limited All rights reserved.
int16_t ThreadID
Thread index/ID type.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
RefCountingPtr< StaticInst > StaticInstPtr
Branch Predictor Unit (BPU) history object PredictorHistory This class holds all information needed t...
InstSeqNum seqNum
The sequence number for the predictor history entry.
bool predTaken
Whether or not it was predicted taken.
void * bpHistory
Pointer to the history objects passed back from the branch predictor subcomponents.
const Addr pc
The PC associated with the sequence number.
const BranchType type
The type of the branch.
std::unique_ptr< PCStateBase > target
The predicted target.
statistics::Vector preDecUpdate
Number of post updates.
BACStats(CPU *cpu, BAC *bac)
statistics::Scalar branches
Total number of branches detected.
statistics::Scalar predTakenBranches
Total number of branches predicted taken.
statistics::Scalar branchesNotLastuOp
Total number of fetched branches.
statistics::Scalar multiBranchInst
statistics::Scalar typeMissmatch
Stat for the two corner cases.
statistics::Scalar squashBranchCommit
statistics::Vector noHistByType
Number of branches undetected by the BPU.
statistics::Scalar squashBranchDecode
Stat for the number of squashes from decode and commit.
statistics::Scalar noBranchMisspredict
statistics::Scalar fetchTargets
Stat for total number fetch targets created.
static std::string statusStrings[ThreadStatusMax]
statistics::Distribution ftSizeDist
Distribution of number of bytes per fetch target.
statistics::Distribution ftNumber
statistics::Scalar branchMisspredict
Stat for total number of misspredicted instructions.
statistics::Vector status
Stat for total number of cycles spent in each BAC state.