42 #include "debug/Branch.hh" 46 {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
50 {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
53 int n_local_histories,
int local_history_length,
int assoc,
55 int ghist_length,
int block_size,
62 : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
63 acyclic2_histories(acyclic_bits.size()),
64 blurrypath_histories(blurrypath_bits.size()),
65 ghist_words(ghist_length/block_size+1, 0),
66 path_history(path_length, 0), imli_counter(4,0),
67 localHistories(n_local_histories, local_history_length),
68 recency_stack(assoc), last_ghist_bit(false), occupancy(0)
79 int max_modhist_idx = -1;
80 for (
auto &elem : modhist_indices) {
81 max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
83 if (max_modhist_idx >= 0) {
86 for (
int i = 0;
i < modhist_lengths.size();
i+= 1) {
90 int max_modpath_idx = -1;
91 for (
auto &elem : modpath_indices) {
92 max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
94 if (max_modpath_idx >= 0) {
97 for (
int i = 0;
i < modpath_lengths.size();
i+= 1) {
101 for (
int i = 0;
i < table_sizes.size();
i += 1) {
105 for (
int j = 0;
j < table_sizes[
i];
j += 1) {
114 const MultiperspectivePerceptronParams *
p) :
BPredUnit(p),
144 for (
auto &spec :
specs) {
150 for (
auto &spec : specs) {
151 spec->setBitRequirements();
153 const MultiperspectivePerceptronParams *
p =
154 static_cast<const MultiperspectivePerceptronParams *
>(
params());
156 computeBits(p->num_filter_entries, p->num_local_histories,
157 p->local_history_length, p->ignore_path_size);
161 p->num_local_histories,
162 p->local_history_length,
assoc,
173 int nlocal_histories,
int local_history_length,
bool ignore_path_size)
177 totalbits += imli_bits;
180 if (!ignore_path_size) {
187 if (!ignore_path_size) {
189 totalbits += 16 *
len;
192 totalbits +=
doing_local ? (nlocal_histories * local_history_length) : 0;
196 for (
auto &bve : bv) {
200 totalbits += num_filter_entries * 2;
203 for (
auto &abj : abi) {
204 for (
auto abk : abj) {
214 for (
int i = 0;
i <
specs.size();
i +=1) {
224 int table_size_bits = (remaining / (
specs.size()-num_sized));
225 for (
int i = 0;
i <
specs.size();
i += 1) {
228 int my_table_size = table_size_bits /
235 DPRINTF(Branch,
"%d bits of metadata so far, %d left out of " 236 "%d total budget\n", totalbits, remaining,
budgetbits);
237 DPRINTF(Branch,
"table size is %d bits, %d entries for 5 bit, %d entries " 238 "for 6 bit\n", table_size_bits,
241 DPRINTF(Branch,
"%d total bits (%0.2fKB)\n", totalbits,
257 return mpreds < bp.mpreds;
261 for (
int i = 0;
i < best_preds.size();
i += 1) {
265 std::sort(pairs.begin(), pairs.end());
266 for (
int i = 0;
i < (std::min(
nbest, (
int) best_preds.size()));
i += 1) {
267 best_preds[
i] = pairs[
i].index;
276 unsigned long long int h =
g;
314 int history_len =
threadData[tid]->localHistories.getLocalHistoryLength();
317 }
else if (lhist == ((1<<history_len)-1)) {
319 }
else if (lhist == (1<<(history_len-1))) {
321 }
else if (lhist == ((1<<(history_len-1))-1)) {
331 for (
int i = 0;
i <
specs.size();
i += 1) {
334 unsigned int hashed_idx =
getIndex(tid, bi, spec,
i);
336 int counter =
threadData[tid]->tables[
i][hashed_idx];
341 int weight = spec.
coeff * ((spec.
width == 5) ?
344 int val = sign ? -weight : weight;
350 j < std::min(
nbest, (
int) best_preds.size());
353 if (best_preds[j] ==
i) {
367 int max_weight)
const 380 if (counter < max_weight) {
388 if (counter < max_weight) {
410 bool correct = (bi.
yout >= 1) == taken;
412 int abs_yout = abs(bi.
yout);
418 for (
int i = 0;
i <
specs.size();
i += 1) {
421 unsigned int hashed_idx =
getIndex(tid, bi, spec,
i);
423 int counter = tables[
i][hashed_idx];
424 int weight = spec.
coeff * ((spec.
width == 5) ?
426 if (sign) weight = -weight;
427 bool pred = weight >= 1;
437 for (
int i = 0;
i <
specs.size();
i += 1) {
444 bool do_train = !correct || (abs_yout <=
theta);
445 if (!do_train)
return;
455 if (correct && abs_yout <
theta) {
466 for (
int i = 0;
i <
specs.size();
i += 1) {
469 unsigned int hashed_idx =
getIndex(tid, bi, spec,
i);
470 int counter = tables[
i][hashed_idx];
476 tables[
i][hashed_idx] = counter;
478 int weight = ((spec.
width == 5) ?
xlat4[counter] :
xlat[counter]);
490 if ((newyout >= 1) != taken) {
492 int round_counter = 0;
500 for (
int j = 0;
j <
specs.size();
j += 1) {
501 int i = (nrand +
j) %
specs.size();
503 unsigned int hashed_idx =
getIndex(tid, bi, spec, i);
504 int counter = tables[
i][hashed_idx];
507 int weight = ((spec.
width == 5) ?
509 int signed_weight = sign ? -weight : weight;
510 pout = newyout - signed_weight;
511 if ((pout >= 1) == taken) {
521 unsigned int hashed_idx =
getIndex(tid, bi, spec, i);
522 int counter = tables[
i][hashed_idx];
527 tables[
i][hashed_idx] = counter;
529 int weight = ((spec.
width == 5) ?
531 int signed_weight = sign ? -weight : weight;
532 int out = pout + signed_weight;
534 if ((out >= 1) != taken) {
550 bp_history = (
void *)bi;
551 unsigned short int pc2 = pc >> 2;
552 bool ab = !(pc & (1<<
pcbit));
554 bool ab_new = (ghist_words[
i] >> (
blockSize - 1)) & 1;
555 ghist_words[
i] <<= 1;
556 ghist_words[
i] |= ab;
571 bp_history = (
void *)bi;
573 bool use_static =
false;
576 unsigned int findex =
610 void *bp_history,
bool squashed,
627 bool do_train =
true;
636 bool transition =
false;
658 if (
decay && transition &&
672 train(tid, *bi, taken);
675 #define RECORD_FILTERED_IMLI 1 676 #define RECORD_FILTERED_GHIST 2 677 #define RECORD_FILTERED_PATH 4 678 #define RECORD_FILTERED_ACYCLIC 8 679 #define RECORD_FILTERED_MOD 16 680 #define RECORD_FILTERED_BLURRY 32 682 #define RECORD_FILTERED_LOCAL 64 683 #define RECORD_FILTERED_RECENCY 128 687 unsigned int target = corrTarget;
688 if (target < bi->getPC()) {
717 bool ab = hashed_taken;
718 assert(
threadData[tid]->ghist_words.size() > 0);
721 bool ab_new = (a >> (
blockSize - 1)) & 1;
732 assert(
threadData[tid]->path_history.size() > 0);
748 if (bi->
getHPC() % (i + 2) == 0) {
749 memmove(&
threadData[tid]->modpath_histories[i][1],
762 for (
int i = 0;
i < blurrypath_histories.size();
i += 1)
764 if (blurrypath_histories[
i].size() > 0) {
765 unsigned int z = bi->
getPC() >>
i;
766 if (blurrypath_histories[
i][0] != z) {
767 memmove(&blurrypath_histories[
i][1],
768 &blurrypath_histories[i][0],
769 sizeof(
unsigned int) *
770 (blurrypath_histories[i].size() - 1));
771 blurrypath_histories[
i][0] =
z;
781 if (bi->
getHPC() % (i + 2) == 0) {
786 threadData[tid]->mod_histories[
i][0] = hashed_taken;
#define RECORD_FILTERED_ACYCLIC
Base class to implement the predictor tables.
#define RECORD_FILTERED_IMLI
void uncondBranch(ThreadID tid, Addr pc, void *&bp_history) override
unsigned short int getPC2() const
ThreadData(int num_filter, int n_local_histories, int local_history_length, int assoc, const std::vector< std::vector< int >> &blurrypath_bits, int path_length, int ghist_length, int block_size, const std::vector< std::vector< std::vector< bool >>> &acyclic_bits, const std::vector< int > &modhist_indices, const std::vector< int > &modhist_lengths, const std::vector< int > &modpath_indices, const std::vector< int > &modpath_lengths, const std::vector< int > &table_sizes, int n_sign_bits)
const unsigned int record_mask
#define RECORD_FILTERED_BLURRY
bool neverSeen() const
Whether this branch has been observed before.
std::vector< std::vector< unsigned int > > blurrypath_histories
unsigned int getHashFilter(bool last_ghist_bit) const
const double coeff
Coefficient of the feature, models the accuracy of the feature.
const int blockSize
Predictor parameters.
MultiperspectivePerceptron(const MultiperspectivePerceptronParams *params)
static int xlat4[]
Transfer function for 5-width tables.
std::vector< std::vector< unsigned int > > acyclic2_histories
const bool speculative_update
std::vector< std::vector< bool > > acyclic_histories
unsigned int getPC() const
bool prediction
Result of the prediction (true is taken)
std::enable_if< std::is_integral< T >::value, T >::type random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
std::vector< std::vector< short int > > tables
int yout
Score of the perceptron.
History data is kept for each thread.
void setExtraBits(int bits)
Sets the starting number of storage bits to compute the number of table entries.
bool alwaysTakenSoFar() const
Whether this branch has always been observed as taken.
#define RECORD_FILTERED_MOD
void btbUpdate(ThreadID tid, Addr branch_addr, void *&bp_history) override
If a branch is not taken, because the BTB address is invalid or missing, this function sets the appro...
std::vector< std::vector< std::array< bool, 2 > > > sign_bits
std::vector< int > modhist_indices
std::vector< int > imli_counter_bits
virtual void createSpecs()=0
Creates the tables of the predictor.
const Params * params() const
#define RECORD_FILTERED_LOCAL
static int xlat[]
Transfer function for 6-width tables.
bool seenUntaken
Has this branch been not taken at least once?
const unsigned long long int imli_mask1
std::vector< int > table_sizes
void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed, const StaticInstPtr &inst, Addr corrTarget=MaxAddr) override
Updates the BP with taken/not taken information.
std::vector< std::vector< bool > > mod_histories
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
bool isUnconditional() const
void squash(ThreadID tid, void *bp_history) override
std::vector< ThreadData * > threadData
const unsigned long long int recencypos_mask
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
unsigned short int getHPC() const
bool seenTaken
Has this branch been taken at least once?
void findBest(ThreadID tid, std::vector< int > &best_preds) const
Finds the best subset of features to use in case of a low-confidence branch, returns the result as an...
std::vector< int > modhist_lengths
bool filtered
Whether this branch has been filtered by the prefetcher.
Basically a wrapper class to hold both the branch predictor and the BTB.
int16_t ThreadID
Thread index/ID type.
unsigned int getIndex(ThreadID tid, const MPPBranchInfo &bi, const HistorySpec &spec, int index) const
Get the position index of a predictor table.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
const unsigned long long int imli_mask4
void computeBits(int num_filter_entries, int nlocal_histories, int local_history_length, bool ignore_path_size)
Computes the size in bits of the structures needed to keep track of the history and the predictor tab...
bool doing_local
runtime values and data used to count the size in bits
std::vector< HistorySpec * > specs
Predictor tables.
bool alwaysNotTakenSoFar() const
Whether this branch has always been observed as not taken.
std::vector< std::vector< unsigned short int > > modpath_histories
void satIncDec(bool taken, bool &sign, int &c, int max_weight) const
Auxiliary function to increase a table counter depending on the direction of the branch.
static unsigned int hash(const std::vector< unsigned int short > &recency_stack, const std::vector< int > &table_sizes, unsigned short int pc, int l, int t)
const int width
Width of the table in bits.
Entry of the branch filter.
#define RECORD_FILTERED_PATH
void train(ThreadID tid, MPPBranchInfo &bi, bool taken)
Trains the branch predictor with the given branch and direction.
const unsigned numThreads
Number of the threads for which the branch history is maintained.
int computeOutput(ThreadID tid, MPPBranchInfo &bi)
Computes the output of the predictor for a given branch and the resulting best value in case the pred...
#define RECORD_FILTERED_RECENCY
#define RECORD_FILTERED_GHIST
std::vector< int > modpath_lengths
virtual unsigned int getHash(ThreadID tid, Addr pc, Addr pc2, int t) const =0
Gets the hash to index the table, using the pc of the branch, and the index of the table...
std::vector< int > mpreds
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
bool operator<(const Time &l, const Time &r)
bool lookup(ThreadID tid, Addr instPC, void *&bp_history) override
Looks up a given PC in the BP to see if it is taken or not taken.
std::vector< std::vector< int > > blurrypath_bits
std::vector< std::vector< std::vector< bool > > > acyclic_bits
std::vector< int > modpath_indices