gem5  v19.0.0.0
multiperspective_perceptron.cc
Go to the documentation of this file.
1 /*
2  * Copyright 2019 Texas A&M University
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice,
8  * this list of conditions and the following disclaimer.
9  *
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  *
14  * 3. Neither the name of the copyright holder nor the names of its
15  * contributors may be used to endorse or promote products derived from this
16  * software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * Author: Daniel A. Jiménez
31  * Adapted to gem5 by: Javier Bueno Hedo
32  *
33  */
34 
35  /*
36  * Multiperspective Perceptron Predictor (by Daniel A. Jiménez)
37  */
38 
40 
41 #include "base/random.hh"
42 #include "debug/Branch.hh"
43 
44 int
46  {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
47  69,76,85,94,106,};
48 int
50  {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
51 
53  int n_local_histories, int local_history_length, int assoc,
54  const std::vector<std::vector<int>> &blurrypath_bits, int path_length,
55  int ghist_length, int block_size,
56  const std::vector<std::vector<std::vector<bool>>> &acyclic_bits,
57  const std::vector<int> &modhist_indices,
58  const std::vector<int> &modhist_lengths,
59  const std::vector<int> &modpath_indices,
60  const std::vector<int> &modpath_lengths,
61  const std::vector<int> &table_sizes, int n_sign_bits)
62  : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
63  acyclic2_histories(acyclic_bits.size()),
64  blurrypath_histories(blurrypath_bits.size()),
65  ghist_words(ghist_length/block_size+1, 0),
66  path_history(path_length, 0), imli_counter(4,0),
67  localHistories(n_local_histories, local_history_length),
68  recency_stack(assoc), last_ghist_bit(false), occupancy(0)
69 {
70  for (int i = 0; i < blurrypath_bits.size(); i+= 1) {
71  blurrypath_histories[i].resize(blurrypath_bits[i].size());
72  }
73 
74  for (int i = 0; i < acyclic_bits.size(); i += 1) {
75  acyclic_histories[i].resize(acyclic_bits[i].size());
76  acyclic2_histories[i].resize(acyclic_bits[i].size());
77  }
78 
79  int max_modhist_idx = -1;
80  for (auto &elem : modhist_indices) {
81  max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
82  }
83  if (max_modhist_idx >= 0) {
84  mod_histories.resize(max_modhist_idx + 1);
85  }
86  for (int i = 0; i < modhist_lengths.size(); i+= 1) {
87  mod_histories[modhist_indices[i]].resize(modhist_lengths[i]);
88  }
89 
90  int max_modpath_idx = -1;
91  for (auto &elem : modpath_indices) {
92  max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
93  }
94  if (max_modpath_idx >= 0) {
95  modpath_histories.resize(max_modpath_idx + 1);
96  }
97  for (int i = 0; i < modpath_lengths.size(); i+= 1) {
98  modpath_histories[modpath_indices[i]].resize(modpath_lengths[i]);
99  }
100 
101  for (int i = 0; i < table_sizes.size(); i += 1) {
102  mpreds.push_back(0);
103  tables.push_back(std::vector<short int>(table_sizes[i]));
104  sign_bits.push_back(std::vector<std::array<bool, 2>>(table_sizes[i]));
105  for (int j = 0; j < table_sizes[i]; j += 1) {
106  for (int k = 0; k < n_sign_bits; k += 1) {
107  sign_bits[i][j][k] = (i & 1) | (k & 1);
108  }
109  }
110  }
111 }
112 
114  const MultiperspectivePerceptronParams *p) : BPredUnit(p),
115  blockSize(p->block_size), pcshift(p->pcshift), threshold(p->threshold),
124  threadData(p->numThreads, nullptr), doing_local(false),
125  doing_recency(false), assoc(0), ghist_length(p->initial_ghist_length),
127  theta(p->initial_theta), extrabits(0), imli_counter_bits(4),
129 {
130  fatal_if(speculative_update, "Speculative update not implemented");
131 }
132 
133 void
135 {
136  extrabits = bits;
137 }
138 
139 void
141 {
142  createSpecs();
143 
144  for (auto &spec : specs) {
145  // initial assignation of values
146  table_sizes.push_back(spec->size);
147  }
148 
149  // Update bit requirements and runtime values
150  for (auto &spec : specs) {
151  spec->setBitRequirements();
152  }
153  const MultiperspectivePerceptronParams *p =
154  static_cast<const MultiperspectivePerceptronParams *>(params());
155 
156  computeBits(p->num_filter_entries, p->num_local_histories,
157  p->local_history_length, p->ignore_path_size);
158 
159  for (int i = 0; i < threadData.size(); i += 1) {
160  threadData[i] = new ThreadData(p->num_filter_entries,
161  p->num_local_histories,
162  p->local_history_length, assoc,
168  }
169 }
170 
171 void
173  int nlocal_histories, int local_history_length, bool ignore_path_size)
174 {
175  int totalbits = extrabits;
176  for (auto &imli_bits : imli_counter_bits) {
177  totalbits += imli_bits;
178  }
179  totalbits += ghist_length;
180  if (!ignore_path_size) {
181  totalbits += path_length * 16;
182  }
183  totalbits += (threshold >= 0) ? (tunebits * specs.size()) : 0;
184  for (auto &len : modhist_lengths) {
185  totalbits += len;
186  }
187  if (!ignore_path_size) {
188  for (auto &len : modpath_lengths) {
189  totalbits += 16 * len;
190  }
191  }
192  totalbits += doing_local ? (nlocal_histories * local_history_length) : 0;
193  totalbits += doing_recency ? (assoc * 16) : 0;
194 
195  for (auto &bv : blurrypath_bits) {
196  for (auto &bve : bv) {
197  totalbits += bve;
198  }
199  }
200  totalbits += num_filter_entries * 2;
201 
202  for (auto &abi : acyclic_bits) {
203  for (auto &abj : abi) {
204  for (auto abk : abj) {
205  totalbits += abk;
206  }
207  }
208  }
209 
210  int remaining = budgetbits - totalbits;
211 
212  // count the tables that have already been assigned sizes
213  int num_sized = 0;
214  for (int i = 0; i < specs.size(); i +=1) {
215  if (table_sizes[i] != 0) {
216  int sz = table_sizes[i] * (specs[i]->width + (n_sign_bits - 1));
217  totalbits += sz;
218  remaining -= sz;
219  num_sized += 1;
220  }
221  }
222 
223  // whatever is left, we divide among the rest of the tables
224  int table_size_bits = (remaining / (specs.size()-num_sized));
225  for (int i = 0; i < specs.size(); i += 1) {
226  // if a table doesn't have a size yet, give it one and count those bits
227  if (!table_sizes[i]) {
228  int my_table_size = table_size_bits /
229  (specs[i]->width + (n_sign_bits - 1)); // extra sign bits
230  table_sizes[i] = my_table_size;
231  totalbits += my_table_size * (specs[i]->width + (n_sign_bits - 1));
232  }
233  }
234 
235  DPRINTF(Branch, "%d bits of metadata so far, %d left out of "
236  "%d total budget\n", totalbits, remaining, budgetbits);
237  DPRINTF(Branch, "table size is %d bits, %d entries for 5 bit, %d entries "
238  "for 6 bit\n", table_size_bits,
239  table_size_bits / (5 + (n_sign_bits - 1)),
240  table_size_bits / (6 + (n_sign_bits - 1)));
241  DPRINTF(Branch, "%d total bits (%0.2fKB)\n", totalbits,
242  totalbits / 8192.0);
243 }
244 
245 void
247  std::vector<int> &best_preds) const
248 {
249  if (threshold < 0) {
250  return;
251  }
252  struct BestPair {
253  int index;
254  int mpreds;
255  bool operator<(BestPair const &bp) const
256  {
257  return mpreds < bp.mpreds;
258  }
259  };
260  std::vector<BestPair> pairs(best_preds.size());
261  for (int i = 0; i < best_preds.size(); i += 1) {
262  pairs[i].index = i;
263  pairs[i].mpreds = threadData[tid]->mpreds[i];
264  }
265  std::sort(pairs.begin(), pairs.end());
266  for (int i = 0; i < (std::min(nbest, (int) best_preds.size())); i += 1) {
267  best_preds[i] = pairs[i].index;
268  }
269 }
270 
271 unsigned int
273  const HistorySpec &spec, int index) const
274 {
275  unsigned int g = spec.getHash(tid, bi.getPC(), bi.getPC2(), index);
276  unsigned long long int h = g;
277  // shift the hash from the feature to xor with the hashed PC
278  if (hshift < 0) {
279  h <<= -hshift;
280  h ^= bi.getPC2();
281  } else {
282  h <<= hshift;
283  h ^= bi.getHPC();
284  }
285  // xor in the imli counter(s) and/or recency position based on the masks
286  if ((1ull<<index) & imli_mask1) {
287  h ^= threadData[tid]->imli_counter[0];
288  }
289  if ((1ull<<index) & imli_mask4) {
290  h ^= threadData[tid]->imli_counter[3];
291  }
292  if (doing_recency) {
293  if ((1ull<<index) & recencypos_mask) {
294  h ^= RECENCYPOS::hash(threadData[tid]->recency_stack, table_sizes,
295  bi.getPC2(), 31, index);
296  }
297  }
298  h %= table_sizes[index];
299  return h;
300 }
301 
302 int
304 {
305  // list of best predictors
306  std::vector<int> best_preds(specs.size(), -1);
307 
308  // initialize sum
309  bi.yout = 0;
310 
311  // bias the prediction by whether the local history is
312  // one of four distinctive patterns
313  int lhist = threadData[tid]->localHistories[bi.getPC()];
314  int history_len = threadData[tid]->localHistories.getLocalHistoryLength();
315  if (lhist == 0) {
316  bi.yout = bias0;
317  } else if (lhist == ((1<<history_len)-1)) {
318  bi.yout = bias1;
319  } else if (lhist == (1<<(history_len-1))) {
320  bi.yout = biasmostly0;
321  } else if (lhist == ((1<<(history_len-1))-1)) {
322  bi.yout = biasmostly1;
323  }
324  // find the best subset of features to use in case of a low-confidence
325  // branch
326  findBest(tid, best_preds);
327 
328  // begin computation of the sum for low-confidence branch
329  int bestval = 0;
330 
331  for (int i = 0; i < specs.size(); i += 1) {
332  HistorySpec const &spec = *specs[i];
333  // get the hash to index the table
334  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
335  // add the weight; first get the weight's magnitude
336  int counter = threadData[tid]->tables[i][hashed_idx];
337  // get the sign
338  bool sign =
339  threadData[tid]->sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
340  // apply the transfer function and multiply by a coefficient
341  int weight = spec.coeff * ((spec.width == 5) ?
342  xlat4[counter] : xlat[counter]);
343  // apply the sign
344  int val = sign ? -weight : weight;
345  // add the value
346  bi.yout += val;
347  // if this is one of those good features, add the value to bestval
348  if (threshold >= 0) {
349  for (int j = 0;
350  j < std::min(nbest, (int) best_preds.size());
351  j += 1)
352  {
353  if (best_preds[j] == i) {
354  bestval += val;
355  break;
356  }
357  }
358  }
359  }
360  // apply a fudge factor to affect when training is triggered
361  bi.yout *= fudge;
362  return bestval;
363 }
364 
365 void
366 MultiperspectivePerceptron::satIncDec(bool taken, bool &sign, int &counter,
367  int max_weight) const
368 {
369  if (taken) {
370  // increment sign/magnitude
371  if (sign) {
372  // go toward 0 away from negative max weight
373  if (counter == 0) {
374  sign = false; // moved to positive 0
375  } else {
376  counter -= 1;
377  }
378  } else {
379  // go toward max weight away from 0
380  if (counter < max_weight) {
381  counter += 1;
382  }
383  }
384  } else {
385  // decrement sign/magnitude
386  if (sign) {
387  // go toward negative max weight down from 0
388  if (counter < max_weight) {
389  counter += 1;
390  }
391  } else {
392  // go toward 0 away from max weight
393  if (counter == 0) {
394  sign = true; // negative 0
395  } else {
396  counter -= 1;
397  }
398  }
399  }
400 }
401 
402 void
404 {
405  std::vector<std::vector<short int>> &tables = threadData[tid]->tables;
407  threadData[tid]->sign_bits;
408  std::vector<int> &mpreds = threadData[tid]->mpreds;
409  // was the prediction correct?
410  bool correct = (bi.yout >= 1) == taken;
411  // what is the magnitude of yout?
412  int abs_yout = abs(bi.yout);
413  // keep track of mispredictions per table
414  if (threshold >= 0) if (!tuneonly || (abs_yout <= threshold)) {
415  bool halve = false;
416 
417  // for each table, figure out if there was a misprediction
418  for (int i = 0; i < specs.size(); i += 1) {
419  HistorySpec const &spec = *specs[i];
420  // get the hash to index the table
421  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
422  bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
423  int counter = tables[i][hashed_idx];
424  int weight = spec.coeff * ((spec.width == 5) ?
425  xlat4[counter] : xlat[counter]);
426  if (sign) weight = -weight;
427  bool pred = weight >= 1;
428  if (pred != taken) {
429  mpreds[i] += 1;
430  if (mpreds[i] == (1 << tunebits) - 1) {
431  halve = true;
432  }
433  }
434  }
435  // if we reach the maximum counter value, halve all the counters
436  if (halve) {
437  for (int i = 0; i < specs.size(); i += 1) {
438  mpreds[i] /= 2;
439  }
440  }
441  }
442  // if the branch was predicted incorrectly or the correct
443  // prediction was weak, update the weights
444  bool do_train = !correct || (abs_yout <= theta);
445  if (!do_train) return;
446 
447  // adaptive theta training, adapted from O-GEHL
448  if (!correct) {
449  thresholdCounter += 1;
450  if (thresholdCounter >= speed) {
451  theta += 1;
452  thresholdCounter = 0;
453  }
454  }
455  if (correct && abs_yout < theta) {
456  thresholdCounter -= 1;
457  if (thresholdCounter <= -speed) {
458  theta -= 1;
459  thresholdCounter = 0;
460  }
461  }
462 
463  // train the weights, computing what the value of yout
464  // would have been if these updates had been applied before
465  int newyout = 0;
466  for (int i = 0; i < specs.size(); i += 1) {
467  HistorySpec const &spec = *specs[i];
468  // get the magnitude
469  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
470  int counter = tables[i][hashed_idx];
471  // get the sign
472  bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
473  // increment/decrement if taken/not taken
474  satIncDec(taken, sign, counter, (1 << (spec.width - 1)) - 1);
475  // update the magnitude and sign
476  tables[i][hashed_idx] = counter;
477  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits] = sign;
478  int weight = ((spec.width == 5) ? xlat4[counter] : xlat[counter]);
479  // update the new version of yout
480  if (sign) {
481  newyout -= weight;
482  } else {
483  newyout += weight;
484  }
485  }
486 
487  // if the prediction still would have been incorrect even
488  // with the updated weights, update some more weights to
489  // try to fix the problem
490  if ((newyout >= 1) != taken) {
491  if (extra_rounds != -1) {
492  int round_counter = 0;
493  bool found;
494  do {
495  // udpate a random weight
496  int besti = -1;
497  int nrand = random_mt.random<int>() % specs.size();
498  int pout;
499  found = false;
500  for (int j = 0; j < specs.size(); j += 1) {
501  int i = (nrand + j) % specs.size();
502  HistorySpec const &spec = *specs[i];
503  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
504  int counter = tables[i][hashed_idx];
505  bool sign =
506  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
507  int weight = ((spec.width == 5) ?
508  xlat4[counter] : xlat[counter]);
509  int signed_weight = sign ? -weight : weight;
510  pout = newyout - signed_weight;
511  if ((pout >= 1) == taken) {
512  // we have found a weight that if we blow
513  // it away will help!
514  besti = i;
515  break;
516  }
517  }
518  if (besti != -1) {
519  int i = besti;
520  HistorySpec const &spec = *specs[i];
521  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
522  int counter = tables[i][hashed_idx];
523  bool sign =
524  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
525  if (counter > 1) {
526  counter--;
527  tables[i][hashed_idx] = counter;
528  }
529  int weight = ((spec.width == 5) ?
530  xlat4[counter] : xlat[counter]);
531  int signed_weight = sign ? -weight : weight;
532  int out = pout + signed_weight;
533  round_counter += 1;
534  if ((out >= 1) != taken) {
535  found = true;
536  }
537  }
538  } while (found && round_counter < extra_rounds);
539  }
540  }
541 }
542 
543 void
545  void * &bp_history)
546 {
547  MPPBranchInfo *bi = new MPPBranchInfo(pc, pcshift, false);
548  std::vector<unsigned int> &ghist_words = threadData[tid]->ghist_words;
549 
550  bp_history = (void *)bi;
551  unsigned short int pc2 = pc >> 2;
552  bool ab = !(pc & (1<<pcbit));
553  for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
554  bool ab_new = (ghist_words[i] >> (blockSize - 1)) & 1;
555  ghist_words[i] <<= 1;
556  ghist_words[i] |= ab;
557  ghist_words[i] &= (1 << blockSize) - 1;
558  ab = ab_new;
559  }
560  memmove(&threadData[tid]->path_history[1],
561  &threadData[tid]->path_history[0],
562  sizeof(unsigned short int) * (path_length - 1));
563  threadData[tid]->path_history[0] = pc2;
564 }
565 
566 bool
568  void * &bp_history)
569 {
570  MPPBranchInfo *bi = new MPPBranchInfo(instPC, pcshift, true);
571  bp_history = (void *)bi;
572 
573  bool use_static = false;
574 
575  if (!threadData[tid]->filterTable.empty()) {
576  unsigned int findex =
577  bi->getHashFilter(threadData[tid]->last_ghist_bit) %
578  threadData[tid]->filterTable.size();
579  FilterEntry &f = threadData[tid]->filterTable[findex];
580  if (f.alwaysNotTakenSoFar()) {
581  bi->filtered = true;
582  bi->prediction = false;
583  return false;
584  } else if (f.alwaysTakenSoFar()) {
585  bi->filtered = true;
586  bi->prediction = true;
587  return true;
588  }
589  if (f.neverSeen()) {
590  use_static = true;
591  }
592  }
593 
594  int bestval = computeOutput(tid, *bi);
595  if (use_static) {
596  bi->prediction = false;
597  } else {
598  if (abs(bi->yout) <= threshold) {
599  bi->prediction = (bestval >= 1);
600  } else {
601  bi->prediction = (bi->yout >= 1);
602  }
603  }
604 
605  return bi->prediction;
606 }
607 
608 void
610  void *bp_history, bool squashed,
611  const StaticInstPtr & inst,
612  Addr corrTarget)
613 {
614  assert(bp_history);
615  MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
616  assert(corrTarget != MaxAddr);
617  if (squashed) {
618  //delete bi;
619  return;
620  }
621 
622  if (bi->isUnconditional()) {
623  delete bi;
624  return;
625  }
626 
627  bool do_train = true;
628 
629  if (!threadData[tid]->filterTable.empty()) {
630  int findex = bi->getHashFilter(threadData[tid]->last_ghist_bit) %
631  threadData[tid]->filterTable.size();
632  FilterEntry &f = threadData[tid]->filterTable[findex];
633 
634  // compute this first, so we don't not train on the
635  // first time a branch is seen.
636  bool transition = false;
637  if (f.alwaysNotTakenSoFar() || f.alwaysTakenSoFar()) {
638  do_train = false;
639  }
640  if (taken) {
641  if (f.alwaysNotTakenSoFar()) {
642  transition = true;
643  }
644  f.seenTaken = true;
645  } else {
646  if (f.alwaysTakenSoFar()) {
647  transition = true;
648  }
649  f.seenUntaken = true;
650  }
651  // is this the first time time the branch has gone both ways?
652  if (transition) {
653  threadData[tid]->occupancy += 1;
654  }
655  // for every new dynamic branch, when there ar
656  // more than 'decay' number of branches in the
657  // filter, blow a random filter entry away
658  if (decay && transition &&
659  ((threadData[tid]->occupancy > decay) || (decay == 1))) {
660  int rnd = random_mt.random<int>() %
661  threadData[tid]->filterTable.size();
662  FilterEntry &frand = threadData[tid]->filterTable[rnd];
663  if (frand.seenTaken && frand.seenUntaken) {
664  threadData[tid]->occupancy -= 1;
665  }
666  frand.seenTaken = false;
667  frand.seenUntaken = false;
668  }
669  }
670 
671  if (do_train) {
672  train(tid, *bi, taken);
673  }
674 
675 #define RECORD_FILTERED_IMLI 1
676 #define RECORD_FILTERED_GHIST 2
677 #define RECORD_FILTERED_PATH 4
678 #define RECORD_FILTERED_ACYCLIC 8
679 #define RECORD_FILTERED_MOD 16
680 #define RECORD_FILTERED_BLURRY 32
681 // should never record a filtered local branch - duh!
682 #define RECORD_FILTERED_LOCAL 64
683 #define RECORD_FILTERED_RECENCY 128
684 
685  // four different styles of IMLI
686  if (!bi->filtered || (record_mask & RECORD_FILTERED_IMLI)) {
687  unsigned int target = corrTarget;
688  if (target < bi->getPC()) {
689  if (taken) {
690  threadData[tid]->imli_counter[0] += 1;
691  } else {
692  threadData[tid]->imli_counter[0] = 0;
693  }
694  if (!taken) {
695  threadData[tid]->imli_counter[1] += 1;
696  } else {
697  threadData[tid]->imli_counter[1] = 0;
698  }
699  } else {
700  if (taken) {
701  threadData[tid]->imli_counter[2] += 1;
702  } else {
703  threadData[tid]->imli_counter[2] = 0;
704  }
705  if (!taken) {
706  threadData[tid]->imli_counter[3] += 1;
707  } else {
708  threadData[tid]->imli_counter[3] = 0;
709  }
710  }
711  }
712 
713  bool hashed_taken = hash_taken ? (taken ^ !!(bi->getPC() & (1<<pcbit)))
714  : taken;
715  // record into ghist
716  if (!bi->filtered || (record_mask & RECORD_FILTERED_GHIST)) {
717  bool ab = hashed_taken;
718  assert(threadData[tid]->ghist_words.size() > 0);
719  for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
720  unsigned int a = threadData[tid]->ghist_words[i];
721  bool ab_new = (a >> (blockSize - 1)) & 1;
722  a <<= 1;
723  a |= ab;
724  ab = ab_new;
725  a &= (1 << blockSize) - 1;
726  threadData[tid]->ghist_words[i] = a;
727  }
728  }
729 
730  // record into path history
731  if (!bi->filtered || (record_mask & RECORD_FILTERED_PATH)) {
732  assert(threadData[tid]->path_history.size() > 0);
733  memmove(&threadData[tid]->path_history[1],
734  &threadData[tid]->path_history[0],
735  sizeof(unsigned short int) * (path_length - 1));
736  threadData[tid]->path_history[0] = bi->getPC2();
737  }
738 
739  // record into acyclic history
740  if (!bi->filtered || (record_mask & RECORD_FILTERED_ACYCLIC)) {
741  threadData[tid]->updateAcyclic(hashed_taken, bi->getHPC());
742  }
743 
744  // record into modulo path history
745  if (!bi->filtered || (record_mask & RECORD_FILTERED_MOD)) {
746  for (int ii = 0; ii < modpath_indices.size(); ii += 1) {
747  int i = modpath_indices[ii];
748  if (bi->getHPC() % (i + 2) == 0) {
749  memmove(&threadData[tid]->modpath_histories[i][1],
750  &threadData[tid]->modpath_histories[i][0],
751  sizeof(unsigned short int) * (modpath_lengths[ii]-1));
752  threadData[tid]->modpath_histories[i][0] = bi->getPC2();
753  }
754  }
755  }
756 
757  // update blurry history
758  if (!bi->filtered || (record_mask & RECORD_FILTERED_BLURRY)) {
759  std::vector<std::vector<unsigned int>> &blurrypath_histories =
760  threadData[tid]->blurrypath_histories;
761 
762  for (int i = 0; i < blurrypath_histories.size(); i += 1)
763  {
764  if (blurrypath_histories[i].size() > 0) {
765  unsigned int z = bi->getPC() >> i;
766  if (blurrypath_histories[i][0] != z) {
767  memmove(&blurrypath_histories[i][1],
768  &blurrypath_histories[i][0],
769  sizeof(unsigned int) *
770  (blurrypath_histories[i].size() - 1));
771  blurrypath_histories[i][0] = z;
772  }
773  }
774  }
775  }
776 
777  // record into modulo pattern history
778  if (!bi->filtered || (record_mask & RECORD_FILTERED_MOD)) {
779  for (int ii = 0; ii < modhist_indices.size(); ii += 1) {
780  int i = modhist_indices[ii];
781  if (bi->getHPC() % (i + 2) == 0) {
782  for (int j = modhist_lengths[ii] - 1; j > 0; j -= 1) {
783  threadData[tid]->mod_histories[i][j] =
784  threadData[tid]->mod_histories[i][j-1];
785  }
786  threadData[tid]->mod_histories[i][0] = hashed_taken;
787  }
788  }
789  }
790 
791  // insert this PC into the recency stack
792  if (doing_recency) {
793  if (!bi->filtered || (record_mask & RECORD_FILTERED_RECENCY)) {
794  threadData[tid]->insertRecency(bi->getPC2(), assoc);
795  }
796  }
797 
798  // record into a local history
799  if (!bi->filtered || (record_mask & RECORD_FILTERED_LOCAL)) {
800  threadData[tid]->localHistories.update(bi->getPC(), hashed_taken);
801  }
802 
803  // update last ghist bit, used to index filter
804  threadData[tid]->last_ghist_bit = taken;
805 
806  delete bi;
807 }
808 
809 void
811  void* &bp_history)
812 {
813 }
814 
815 void
817 {
818  assert(bp_history);
819  MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
820  delete bi;
821 }
#define RECORD_FILTERED_ACYCLIC
#define DPRINTF(x,...)
Definition: trace.hh:229
Base class to implement the predictor tables.
Bitfield< 30, 0 > index
#define RECORD_FILTERED_IMLI
void uncondBranch(ThreadID tid, Addr pc, void *&bp_history) override
ThreadData(int num_filter, int n_local_histories, int local_history_length, int assoc, const std::vector< std::vector< int >> &blurrypath_bits, int path_length, int ghist_length, int block_size, const std::vector< std::vector< std::vector< bool >>> &acyclic_bits, const std::vector< int > &modhist_indices, const std::vector< int > &modhist_lengths, const std::vector< int > &modpath_indices, const std::vector< int > &modpath_lengths, const std::vector< int > &table_sizes, int n_sign_bits)
const Addr MaxAddr
Definition: types.hh:166
Bitfield< 7 > i
#define RECORD_FILTERED_BLURRY
bool neverSeen() const
Whether this branch has been observed before.
std::vector< std::vector< unsigned int > > blurrypath_histories
unsigned int getHashFilter(bool last_ghist_bit) const
const double coeff
Coefficient of the feature, models the accuracy of the feature.
Bitfield< 11 > z
Bitfield< 8 > a
const int blockSize
Predictor parameters.
MultiperspectivePerceptron(const MultiperspectivePerceptronParams *params)
static int xlat4[]
Transfer function for 5-width tables.
std::vector< std::vector< unsigned int > > acyclic2_histories
std::vector< std::vector< bool > > acyclic_histories
Bitfield< 20, 16 > bi
Definition: types.hh:65
bool prediction
Result of the prediction (true is taken)
STL vector class.
Definition: stl.hh:40
std::enable_if< std::is_integral< T >::value, T >::type random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
Definition: random.hh:83
Bitfield< 63 > val
Definition: misc.hh:771
std::vector< std::vector< short int > > tables
Bitfield< 6 > f
History data is kept for each thread.
void setExtraBits(int bits)
Sets the starting number of storage bits to compute the number of table entries.
bool alwaysTakenSoFar() const
Whether this branch has always been observed as taken.
#define RECORD_FILTERED_MOD
void btbUpdate(ThreadID tid, Addr branch_addr, void *&bp_history) override
If a branch is not taken, because the BTB address is invalid or missing, this function sets the appro...
Bitfield< 4 > pc
Bitfield< 4 > g
Definition: dt_constants.hh:85
std::vector< std::vector< std::array< bool, 2 > > > sign_bits
virtual void createSpecs()=0
Creates the tables of the predictor.
const Params * params() const
Definition: sim_object.hh:114
#define RECORD_FILTERED_LOCAL
static int xlat[]
Transfer function for 6-width tables.
bool seenUntaken
Has this branch been not taken at least once?
Bitfield< 23 > k
Definition: dt_constants.hh:80
const unsigned long long int imli_mask1
Bitfield< 18, 16 > len
void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed, const StaticInstPtr &inst, Addr corrTarget=MaxAddr) override
Updates the BP with taken/not taken information.
std::vector< std::vector< bool > > mod_histories
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
void squash(ThreadID tid, void *bp_history) override
std::vector< ThreadData * > threadData
const unsigned long long int recencypos_mask
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
bool seenTaken
Has this branch been taken at least once?
void findBest(ThreadID tid, std::vector< int > &best_preds) const
Finds the best subset of features to use in case of a low-confidence branch, returns the result as an...
Bitfield< 24 > j
bool filtered
Whether this branch has been filtered by the prefetcher.
Basically a wrapper class to hold both the branch predictor and the BTB.
Definition: bpred_unit.hh:67
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:227
unsigned int getIndex(ThreadID tid, const MPPBranchInfo &bi, const HistorySpec &spec, int index) const
Get the position index of a predictor table.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
const unsigned long long int imli_mask4
void computeBits(int num_filter_entries, int nlocal_histories, int local_history_length, bool ignore_path_size)
Computes the size in bits of the structures needed to keep track of the history and the predictor tab...
bool doing_local
runtime values and data used to count the size in bits
std::vector< HistorySpec * > specs
Predictor tables.
bool alwaysNotTakenSoFar() const
Whether this branch has always been observed as not taken.
std::vector< std::vector< unsigned short int > > modpath_histories
Random random_mt
Definition: random.cc:100
void satIncDec(bool taken, bool &sign, int &c, int max_weight) const
Auxiliary function to increase a table counter depending on the direction of the branch.
static unsigned int hash(const std::vector< unsigned int short > &recency_stack, const std::vector< int > &table_sizes, unsigned short int pc, int l, int t)
const int width
Width of the table in bits.
#define RECORD_FILTERED_PATH
void train(ThreadID tid, MPPBranchInfo &bi, bool taken)
Trains the branch predictor with the given branch and direction.
const unsigned numThreads
Number of the threads for which the branch history is maintained.
Definition: bpred_unit.hh:272
int computeOutput(ThreadID tid, MPPBranchInfo &bi)
Computes the output of the predictor for a given branch and the resulting best value in case the pred...
#define RECORD_FILTERED_RECENCY
#define RECORD_FILTERED_GHIST
virtual unsigned int getHash(ThreadID tid, Addr pc, Addr pc2, int t) const =0
Gets the hash to index the table, using the pc of the branch, and the index of the table...
T bits(T val, int first, int last)
Extract the bitfield from position &#39;first&#39; to &#39;last&#39; (inclusive) from &#39;val&#39; and right justify it...
Definition: bitfield.hh:72
bool operator<(const Time &l, const Time &r)
Definition: time.hh:219
Bitfield< 0 > p
bool lookup(ThreadID tid, Addr instPC, void *&bp_history) override
Looks up a given PC in the BP to see if it is taken or not taken.
std::vector< std::vector< int > > blurrypath_bits
std::vector< std::vector< std::vector< bool > > > acyclic_bits

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13