Go to the documentation of this file.
42 #include "debug/Branch.hh"
46 {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
50 {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
53 int n_local_histories,
int local_history_length,
int assoc,
55 int ghist_length,
int block_size,
62 : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
63 acyclic2_histories(acyclic_bits.size()),
64 blurrypath_histories(blurrypath_bits.size()),
65 ghist_words(ghist_length/block_size+1, 0),
66 path_history(path_length, 0), imli_counter(4,0),
67 localHistories(n_local_histories, local_history_length),
68 recency_stack(assoc), last_ghist_bit(false), occupancy(0)
79 int max_modhist_idx = -1;
81 max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
83 if (max_modhist_idx >= 0) {
90 int max_modpath_idx = -1;
92 max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
94 if (max_modpath_idx >= 0) {
114 const MultiperspectivePerceptronParams *
p) :
BPredUnit(
p),
144 for (
auto &spec :
specs) {
150 for (
auto &spec :
specs) {
151 spec->setBitRequirements();
153 const MultiperspectivePerceptronParams *
p =
154 static_cast<const MultiperspectivePerceptronParams *
>(
params());
157 p->local_history_length,
p->ignore_path_size);
161 p->num_local_histories,
162 p->local_history_length,
assoc,
173 int nlocal_histories,
int local_history_length,
bool ignore_path_size)
177 totalbits += imli_bits;
180 if (!ignore_path_size) {
187 if (!ignore_path_size) {
189 totalbits += 16 *
len;
192 totalbits +=
doing_local ? (nlocal_histories * local_history_length) : 0;
196 for (
auto &bve : bv) {
200 totalbits += num_filter_entries * 2;
203 for (
auto &abj : abi) {
204 for (
auto abk : abj) {
214 for (
int i = 0;
i <
specs.size();
i +=1) {
224 int table_size_bits = (remaining / (
specs.size()-num_sized));
225 for (
int i = 0;
i <
specs.size();
i += 1) {
228 int my_table_size = table_size_bits /
235 DPRINTF(Branch,
"%d bits of metadata so far, %d left out of "
236 "%d total budget\n", totalbits, remaining,
budgetbits);
237 DPRINTF(Branch,
"table size is %d bits, %d entries for 5 bit, %d entries "
238 "for 6 bit\n", table_size_bits,
241 DPRINTF(Branch,
"%d total bits (%0.2fKB)\n", totalbits,
257 return mpreds < bp.mpreds;
261 for (
int i = 0;
i < best_preds.size();
i += 1) {
265 std::sort(pairs.begin(), pairs.end());
266 for (
int i = 0;
i < (std::min(
nbest, (
int) best_preds.size()));
i += 1) {
267 best_preds[
i] = pairs[
i].index;
276 unsigned long long int h =
g;
313 int lhist =
threadData[tid]->localHistories[
bi.getPC()];
314 int history_len =
threadData[tid]->localHistories.getLocalHistoryLength();
317 }
else if (lhist == ((1<<history_len)-1)) {
319 }
else if (lhist == (1<<(history_len-1))) {
321 }
else if (lhist == ((1<<(history_len-1))-1)) {
331 for (
int i = 0;
i <
specs.size();
i += 1) {
334 unsigned int hashed_idx =
getIndex(tid,
bi, spec,
i);
336 int counter =
threadData[tid]->tables[
i][hashed_idx];
341 int weight = spec.
coeff * ((spec.
width == 5) ?
344 int val = sign ? -weight : weight;
350 j < std::min(
nbest, (
int) best_preds.size());
353 if (best_preds[
j] ==
i) {
367 int max_weight)
const
380 if (counter < max_weight) {
388 if (counter < max_weight) {
410 bool correct = (
bi.yout >= 1) == taken;
412 int abs_yout = abs(
bi.yout);
418 for (
int i = 0;
i <
specs.size();
i += 1) {
421 unsigned int hashed_idx =
getIndex(tid,
bi, spec,
i);
423 int counter = tables[
i][hashed_idx];
424 int weight = spec.
coeff * ((spec.
width == 5) ?
426 if (sign) weight = -weight;
427 bool pred = weight >= 1;
437 for (
int i = 0;
i <
specs.size();
i += 1) {
444 bool do_train = !correct || (abs_yout <=
theta);
445 if (!do_train)
return;
455 if (correct && abs_yout <
theta) {
466 for (
int i = 0;
i <
specs.size();
i += 1) {
469 unsigned int hashed_idx =
getIndex(tid,
bi, spec,
i);
470 int counter = tables[
i][hashed_idx];
476 tables[
i][hashed_idx] = counter;
478 int weight = ((spec.
width == 5) ?
xlat4[counter] :
xlat[counter]);
490 if ((newyout >= 1) != taken) {
492 int round_counter = 0;
500 for (
int j = 0;
j <
specs.size();
j += 1) {
501 int i = (nrand +
j) %
specs.size();
503 unsigned int hashed_idx =
getIndex(tid,
bi, spec,
i);
504 int counter = tables[
i][hashed_idx];
507 int weight = ((spec.
width == 5) ?
509 int signed_weight = sign ? -weight : weight;
510 pout = newyout - signed_weight;
511 if ((pout >= 1) == taken) {
521 unsigned int hashed_idx =
getIndex(tid,
bi, spec,
i);
522 int counter = tables[
i][hashed_idx];
527 tables[
i][hashed_idx] = counter;
529 int weight = ((spec.
width == 5) ?
531 int signed_weight = sign ? -weight : weight;
532 int out = pout + signed_weight;
534 if ((out >= 1) != taken) {
550 bp_history = (
void *)
bi;
551 unsigned short int pc2 =
pc >> 2;
554 bool ab_new = (ghist_words[
i] >> (
blockSize - 1)) & 1;
555 ghist_words[
i] <<= 1;
556 ghist_words[
i] |= ab;
571 bp_history = (
void *)
bi;
573 bool use_static =
false;
576 unsigned int findex =
580 if (
f.alwaysNotTakenSoFar()) {
582 bi->prediction =
false;
584 }
else if (
f.alwaysTakenSoFar()) {
586 bi->prediction =
true;
596 bi->prediction =
false;
599 bi->prediction = (bestval >= 1);
601 bi->prediction = (
bi->yout >= 1);
605 return bi->prediction;
610 void *bp_history,
bool squashed,
621 if (
bi->isUnconditional()) {
626 bool do_train =
true;
629 int findex =
bi->getHashFilter(
threadData[tid]->last_ghist_bit) %
635 bool transition =
false;
636 if (
f.alwaysNotTakenSoFar() ||
f.alwaysTakenSoFar()) {
640 if (
f.alwaysNotTakenSoFar()) {
645 if (
f.alwaysTakenSoFar()) {
648 f.seenUntaken =
true;
657 if (
decay && transition &&
674 #define RECORD_FILTERED_IMLI 1
675 #define RECORD_FILTERED_GHIST 2
676 #define RECORD_FILTERED_PATH 4
677 #define RECORD_FILTERED_ACYCLIC 8
678 #define RECORD_FILTERED_MOD 16
679 #define RECORD_FILTERED_BLURRY 32
681 #define RECORD_FILTERED_LOCAL 64
682 #define RECORD_FILTERED_RECENCY 128
686 unsigned int target = corrTarget;
687 if (target < bi->getPC()) {
716 bool ab = hashed_taken;
717 assert(
threadData[tid]->ghist_words.size() > 0);
731 assert(
threadData[tid]->path_history.size() > 0);
740 threadData[tid]->updateAcyclic(hashed_taken,
bi->getHPC());
747 if (
bi->getHPC() % (
i + 2) == 0) {
761 for (
int i = 0;
i < blurrypath_histories.size();
i += 1)
763 if (blurrypath_histories[
i].size() > 0) {
764 unsigned int z =
bi->getPC() >>
i;
765 if (blurrypath_histories[
i][0] !=
z) {
766 memmove(&blurrypath_histories[
i][1],
767 &blurrypath_histories[
i][0],
768 sizeof(
unsigned int) *
769 (blurrypath_histories[
i].size() - 1));
770 blurrypath_histories[
i][0] =
z;
780 if (
bi->getHPC() % (
i + 2) == 0) {
785 threadData[tid]->mod_histories[
i][0] = hashed_taken;
799 threadData[tid]->localHistories.update(
bi->getPC(), hashed_taken);
const unsigned long long int imli_mask1
static int xlat[]
Transfer function for 6-width tables.
std::vector< std::vector< std::vector< bool > > > acyclic_bits
ThreadData(int num_filter, int n_local_histories, int local_history_length, int assoc, const std::vector< std::vector< int >> &blurrypath_bits, int path_length, int ghist_length, int block_size, const std::vector< std::vector< std::vector< bool >>> &acyclic_bits, const std::vector< int > &modhist_indices, const std::vector< int > &modhist_lengths, const std::vector< int > &modpath_indices, const std::vector< int > &modpath_lengths, const std::vector< int > &table_sizes, int n_sign_bits)
void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed, const StaticInstPtr &inst, Addr corrTarget) override
Updates the BP with taken/not taken information.
const double coeff
Coefficient of the feature, models the accuracy of the feature.
std::vector< std::vector< unsigned int > > blurrypath_histories
int16_t ThreadID
Thread index/ID type.
#define RECORD_FILTERED_LOCAL
std::vector< std::vector< unsigned int > > acyclic2_histories
bool lookup(ThreadID tid, Addr instPC, void *&bp_history) override
Looks up a given PC in the BP to see if it is taken or not taken.
bool doing_local
runtime values and data used to count the size in bits
std::vector< int > modpath_indices
void computeBits(int num_filter_entries, int nlocal_histories, int local_history_length, bool ignore_path_size)
Computes the size in bits of the structures needed to keep track of the history and the predictor tab...
bool operator<(const Time &l, const Time &r)
std::vector< int > imli_counter_bits
std::vector< HistorySpec * > specs
Predictor tables.
#define RECORD_FILTERED_ACYCLIC
std::vector< std::vector< bool > > acyclic_histories
const bool speculative_update
std::vector< int > modhist_lengths
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
#define RECORD_FILTERED_PATH
#define RECORD_FILTERED_IMLI
const unsigned numThreads
Number of the threads for which the branch history is maintained.
const int blockSize
Predictor parameters.
void train(ThreadID tid, MPPBranchInfo &bi, bool taken)
Trains the branch predictor with the given branch and direction.
void setExtraBits(int bits)
Sets the starting number of storage bits to compute the number of table entries.
std::vector< std::vector< short int > > tables
virtual void createSpecs()=0
Creates the tables of the predictor.
std::vector< std::vector< bool > > mod_histories
bool seenTaken
Has this branch been taken at least once?
Basically a wrapper class to hold both the branch predictor and the BTB.
std::vector< int > modhist_indices
void squash(ThreadID tid, void *bp_history) override
std::vector< std::vector< std::array< bool, 2 > > > sign_bits
std::vector< ThreadData * > threadData
int computeOutput(ThreadID tid, MPPBranchInfo &bi)
Computes the output of the predictor for a given branch and the resulting best value in case the pred...
std::vector< int > modpath_lengths
Base class to implement the predictor tables.
bool seenUntaken
Has this branch been not taken at least once?
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
#define RECORD_FILTERED_RECENCY
const Params * params() const
virtual unsigned int getHash(ThreadID tid, Addr pc, Addr pc2, int t) const =0
Gets the hash to index the table, using the pc of the branch, and the index of the table.
#define RECORD_FILTERED_MOD
void satIncDec(bool taken, bool &sign, int &c, int max_weight) const
Auxiliary function to increase a table counter depending on the direction of the branch.
unsigned int getIndex(ThreadID tid, const MPPBranchInfo &bi, const HistorySpec &spec, int index) const
Get the position index of a predictor table.
static unsigned int hash(const std::vector< unsigned int short > &recency_stack, const std::vector< int > &table_sizes, unsigned short int pc, int l, int t)
const unsigned long long int recencypos_mask
const int width
Width of the table in bits
const unsigned long long int imli_mask4
static int xlat4[]
Transfer function for 5-width tables.
void uncondBranch(ThreadID tid, Addr pc, void *&bp_history) override
#define RECORD_FILTERED_BLURRY
const unsigned int record_mask
#define RECORD_FILTERED_GHIST
std::enable_if< std::is_integral< T >::value, T >::type random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
std::vector< int > table_sizes
std::vector< std::vector< unsigned short int > > modpath_histories
Entry of the branch filter.
std::vector< int > mpreds
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
History data is kept for each thread.
std::vector< std::vector< int > > blurrypath_bits
void btbUpdate(ThreadID tid, Addr branch_addr, void *&bp_history) override
If a branch is not taken, because the BTB address is invalid or missing, this function sets the appro...
void findBest(ThreadID tid, std::vector< int > &best_preds) const
Finds the best subset of features to use in case of a low-confidence branch, returns the result as an...
MultiperspectivePerceptron(const MultiperspectivePerceptronParams *params)
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Generated on Wed Sep 30 2020 14:02:09 for gem5 by doxygen 1.8.17