gem5  v22.1.0.0
multiperspective_perceptron.cc
Go to the documentation of this file.
1 /*
2  * Copyright 2019 Texas A&M University
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice,
8  * this list of conditions and the following disclaimer.
9  *
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  *
14  * 3. Neither the name of the copyright holder nor the names of its
15  * contributors may be used to endorse or promote products derived from this
16  * software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * Author: Daniel A. Jiménez
31  * Adapted to gem5 by: Javier Bueno Hedo
32  *
33  */
34 
35  /*
36  * Multiperspective Perceptron Predictor (by Daniel A. Jiménez)
37  */
38 
40 
41 #include "base/random.hh"
42 #include "debug/Branch.hh"
43 
44 namespace gem5
45 {
46 
47 namespace branch_prediction
48 {
49 
50 int
52  {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
53  69,76,85,94,106,};
54 int
56  {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
57 
59  int n_local_histories, int local_history_length, int assoc,
60  const std::vector<std::vector<int>> &blurrypath_bits, int path_length,
61  int ghist_length, int block_size,
62  const std::vector<std::vector<std::vector<bool>>> &acyclic_bits,
63  const std::vector<int> &modhist_indices,
64  const std::vector<int> &modhist_lengths,
65  const std::vector<int> &modpath_indices,
66  const std::vector<int> &modpath_lengths,
67  const std::vector<int> &table_sizes, int n_sign_bits)
68  : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
69  acyclic2_histories(acyclic_bits.size()),
70  blurrypath_histories(blurrypath_bits.size()),
71  ghist_words(ghist_length/block_size+1, 0),
72  path_history(path_length, 0), imli_counter(4,0),
73  localHistories(n_local_histories, local_history_length),
74  recency_stack(assoc), last_ghist_bit(false), occupancy(0)
75 {
76  for (int i = 0; i < blurrypath_bits.size(); i+= 1) {
77  blurrypath_histories[i].resize(blurrypath_bits[i].size());
78  }
79 
80  for (int i = 0; i < acyclic_bits.size(); i += 1) {
81  acyclic_histories[i].resize(acyclic_bits[i].size());
82  acyclic2_histories[i].resize(acyclic_bits[i].size());
83  }
84 
85  int max_modhist_idx = -1;
86  for (auto &elem : modhist_indices) {
87  max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
88  }
89  if (max_modhist_idx >= 0) {
90  mod_histories.resize(max_modhist_idx + 1);
91  }
92  for (int i = 0; i < modhist_lengths.size(); i+= 1) {
94  }
95 
96  int max_modpath_idx = -1;
97  for (auto &elem : modpath_indices) {
98  max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
99  }
100  if (max_modpath_idx >= 0) {
101  modpath_histories.resize(max_modpath_idx + 1);
102  }
103  for (int i = 0; i < modpath_lengths.size(); i+= 1) {
105  }
106 
107  for (int i = 0; i < table_sizes.size(); i += 1) {
108  mpreds.push_back(0);
110  sign_bits.push_back(std::vector<std::array<bool, 2>>(table_sizes[i]));
111  for (int j = 0; j < table_sizes[i]; j += 1) {
112  for (int k = 0; k < n_sign_bits; k += 1) {
113  sign_bits[i][j][k] = (i & 1) | (k & 1);
114  }
115  }
116  }
117 }
118 
120  const MultiperspectivePerceptronParams &p) : BPredUnit(p),
121  blockSize(p.block_size), pcshift(p.pcshift), threshold(p.threshold),
130  threadData(p.numThreads, nullptr), doing_local(false),
131  doing_recency(false), assoc(0), ghist_length(p.initial_ghist_length),
133  theta(p.initial_theta), extrabits(0), imli_counter_bits(4),
135 {
136  fatal_if(speculative_update, "Speculative update not implemented");
137 }
138 
139 void
141 {
142  extrabits = bits;
143 }
144 
145 void
147 {
148  createSpecs();
149 
150  for (auto &spec : specs) {
151  // initial assignation of values
152  table_sizes.push_back(spec->size);
153  }
154 
155  // Update bit requirements and runtime values
156  for (auto &spec : specs) {
157  spec->setBitRequirements();
158  }
159  const MultiperspectivePerceptronParams &p =
160  static_cast<const MultiperspectivePerceptronParams &>(params());
161 
162  computeBits(p.num_filter_entries, p.num_local_histories,
163  p.local_history_length, p.ignore_path_size);
164 
165  for (int i = 0; i < threadData.size(); i += 1) {
166  threadData[i] = new ThreadData(p.num_filter_entries,
167  p.num_local_histories,
168  p.local_history_length, assoc,
174  }
175 }
176 
177 void
179  int nlocal_histories, int local_history_length, bool ignore_path_size)
180 {
181  int totalbits = extrabits;
182  for (auto &imli_bits : imli_counter_bits) {
183  totalbits += imli_bits;
184  }
185  totalbits += ghist_length;
186  if (!ignore_path_size) {
187  totalbits += path_length * 16;
188  }
189  totalbits += (threshold >= 0) ? (tunebits * specs.size()) : 0;
190  for (auto &len : modhist_lengths) {
191  totalbits += len;
192  }
193  if (!ignore_path_size) {
194  for (auto &len : modpath_lengths) {
195  totalbits += 16 * len;
196  }
197  }
198  totalbits += doing_local ? (nlocal_histories * local_history_length) : 0;
199  totalbits += doing_recency ? (assoc * 16) : 0;
200 
201  for (auto &bv : blurrypath_bits) {
202  for (auto &bve : bv) {
203  totalbits += bve;
204  }
205  }
206  totalbits += num_filter_entries * 2;
207 
208  for (auto &abi : acyclic_bits) {
209  for (auto &abj : abi) {
210  for (auto abk : abj) {
211  totalbits += abk;
212  }
213  }
214  }
215 
216  int remaining = budgetbits - totalbits;
217 
218  // count the tables that have already been assigned sizes
219  int num_sized = 0;
220  for (int i = 0; i < specs.size(); i +=1) {
221  if (table_sizes[i] != 0) {
222  int sz = table_sizes[i] * (specs[i]->width + (n_sign_bits - 1));
223  totalbits += sz;
224  remaining -= sz;
225  num_sized += 1;
226  }
227  }
228 
229  // whatever is left, we divide among the rest of the tables
230  int table_size_bits = (remaining / (specs.size()-num_sized));
231  for (int i = 0; i < specs.size(); i += 1) {
232  // if a table doesn't have a size yet, give it one and count those bits
233  if (!table_sizes[i]) {
234  int my_table_size = table_size_bits /
235  (specs[i]->width + (n_sign_bits - 1)); // extra sign bits
236  table_sizes[i] = my_table_size;
237  totalbits += my_table_size * (specs[i]->width + (n_sign_bits - 1));
238  }
239  }
240 
241  DPRINTF(Branch, "%d bits of metadata so far, %d left out of "
242  "%d total budget\n", totalbits, remaining, budgetbits);
243  DPRINTF(Branch, "table size is %d bits, %d entries for 5 bit, %d entries "
244  "for 6 bit\n", table_size_bits,
245  table_size_bits / (5 + (n_sign_bits - 1)),
246  table_size_bits / (6 + (n_sign_bits - 1)));
247  DPRINTF(Branch, "%d total bits (%0.2fKB)\n", totalbits,
248  totalbits / 8192.0);
249 }
250 
251 void
253  std::vector<int> &best_preds) const
254 {
255  if (threshold < 0) {
256  return;
257  }
258  struct BestPair
259  {
260  int index;
261  int mpreds;
262  bool operator<(BestPair const &bp) const
263  {
264  return mpreds < bp.mpreds;
265  }
266  };
267  std::vector<BestPair> pairs(best_preds.size());
268  for (int i = 0; i < best_preds.size(); i += 1) {
269  pairs[i].index = i;
270  pairs[i].mpreds = threadData[tid]->mpreds[i];
271  }
272  std::sort(pairs.begin(), pairs.end());
273  for (int i = 0; i < (std::min(nbest, (int) best_preds.size())); i += 1) {
274  best_preds[i] = pairs[i].index;
275  }
276 }
277 
278 unsigned int
280  const HistorySpec &spec, int index) const
281 {
282  unsigned int g = spec.getHash(tid, bi.getPC(), bi.getPC2(), index);
283  unsigned long long int h = g;
284  // shift the hash from the feature to xor with the hashed PC
285  if (hshift < 0) {
286  h <<= -hshift;
287  h ^= bi.getPC2();
288  } else {
289  h <<= hshift;
290  h ^= bi.getHPC();
291  }
292  // xor in the imli counter(s) and/or recency position based on the masks
293  if ((1ull<<index) & imli_mask1) {
294  h ^= threadData[tid]->imli_counter[0];
295  }
296  if ((1ull<<index) & imli_mask4) {
297  h ^= threadData[tid]->imli_counter[3];
298  }
299  if (doing_recency) {
300  if ((1ull<<index) & recencypos_mask) {
301  h ^= RECENCYPOS::hash(threadData[tid]->recency_stack, table_sizes,
302  bi.getPC2(), 31, index);
303  }
304  }
305  h %= table_sizes[index];
306  return h;
307 }
308 
309 int
311 {
312  // list of best predictors
313  std::vector<int> best_preds(specs.size(), -1);
314 
315  // initialize sum
316  bi.yout = 0;
317 
318  // bias the prediction by whether the local history is
319  // one of four distinctive patterns
320  int lhist = threadData[tid]->localHistories[bi.getPC()];
321  int history_len = threadData[tid]->localHistories.getLocalHistoryLength();
322  if (lhist == 0) {
323  bi.yout = bias0;
324  } else if (lhist == ((1<<history_len)-1)) {
325  bi.yout = bias1;
326  } else if (lhist == (1<<(history_len-1))) {
327  bi.yout = biasmostly0;
328  } else if (lhist == ((1<<(history_len-1))-1)) {
329  bi.yout = biasmostly1;
330  }
331  // find the best subset of features to use in case of a low-confidence
332  // branch
333  findBest(tid, best_preds);
334 
335  // begin computation of the sum for low-confidence branch
336  int bestval = 0;
337 
338  for (int i = 0; i < specs.size(); i += 1) {
339  HistorySpec const &spec = *specs[i];
340  // get the hash to index the table
341  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
342  // add the weight; first get the weight's magnitude
343  int counter = threadData[tid]->tables[i][hashed_idx];
344  // get the sign
345  bool sign =
346  threadData[tid]->sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
347  // apply the transfer function and multiply by a coefficient
348  int weight = spec.coeff * ((spec.width == 5) ?
349  xlat4[counter] : xlat[counter]);
350  // apply the sign
351  int val = sign ? -weight : weight;
352  // add the value
353  bi.yout += val;
354  // if this is one of those good features, add the value to bestval
355  if (threshold >= 0) {
356  for (int j = 0;
357  j < std::min(nbest, (int) best_preds.size());
358  j += 1)
359  {
360  if (best_preds[j] == i) {
361  bestval += val;
362  break;
363  }
364  }
365  }
366  }
367  // apply a fudge factor to affect when training is triggered
368  bi.yout *= fudge;
369  return bestval;
370 }
371 
372 void
373 MultiperspectivePerceptron::satIncDec(bool taken, bool &sign, int &counter,
374  int max_weight) const
375 {
376  if (taken) {
377  // increment sign/magnitude
378  if (sign) {
379  // go toward 0 away from negative max weight
380  if (counter == 0) {
381  sign = false; // moved to positive 0
382  } else {
383  counter -= 1;
384  }
385  } else {
386  // go toward max weight away from 0
387  if (counter < max_weight) {
388  counter += 1;
389  }
390  }
391  } else {
392  // decrement sign/magnitude
393  if (sign) {
394  // go toward negative max weight down from 0
395  if (counter < max_weight) {
396  counter += 1;
397  }
398  } else {
399  // go toward 0 away from max weight
400  if (counter == 0) {
401  sign = true; // negative 0
402  } else {
403  counter -= 1;
404  }
405  }
406  }
407 }
408 
409 void
411 {
412  std::vector<std::vector<short int>> &tables = threadData[tid]->tables;
414  threadData[tid]->sign_bits;
415  std::vector<int> &mpreds = threadData[tid]->mpreds;
416  // was the prediction correct?
417  bool correct = (bi.yout >= 1) == taken;
418  // what is the magnitude of yout?
419  int abs_yout = abs(bi.yout);
420  // keep track of mispredictions per table
421  if (threshold >= 0) if (!tuneonly || (abs_yout <= threshold)) {
422  bool halve = false;
423 
424  // for each table, figure out if there was a misprediction
425  for (int i = 0; i < specs.size(); i += 1) {
426  HistorySpec const &spec = *specs[i];
427  // get the hash to index the table
428  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
429  bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
430  int counter = tables[i][hashed_idx];
431  int weight = spec.coeff * ((spec.width == 5) ?
432  xlat4[counter] : xlat[counter]);
433  if (sign) weight = -weight;
434  bool pred = weight >= 1;
435  if (pred != taken) {
436  mpreds[i] += 1;
437  if (mpreds[i] == (1 << tunebits) - 1) {
438  halve = true;
439  }
440  }
441  }
442  // if we reach the maximum counter value, halve all the counters
443  if (halve) {
444  for (int i = 0; i < specs.size(); i += 1) {
445  mpreds[i] /= 2;
446  }
447  }
448  }
449  // if the branch was predicted incorrectly or the correct
450  // prediction was weak, update the weights
451  bool do_train = !correct || (abs_yout <= theta);
452  if (!do_train) return;
453 
454  // adaptive theta training, adapted from O-GEHL
455  if (!correct) {
456  thresholdCounter += 1;
457  if (thresholdCounter >= speed) {
458  theta += 1;
459  thresholdCounter = 0;
460  }
461  }
462  if (correct && abs_yout < theta) {
463  thresholdCounter -= 1;
464  if (thresholdCounter <= -speed) {
465  theta -= 1;
466  thresholdCounter = 0;
467  }
468  }
469 
470  // train the weights, computing what the value of yout
471  // would have been if these updates had been applied before
472  int newyout = 0;
473  for (int i = 0; i < specs.size(); i += 1) {
474  HistorySpec const &spec = *specs[i];
475  // get the magnitude
476  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
477  int counter = tables[i][hashed_idx];
478  // get the sign
479  bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
480  // increment/decrement if taken/not taken
481  satIncDec(taken, sign, counter, (1 << (spec.width - 1)) - 1);
482  // update the magnitude and sign
483  tables[i][hashed_idx] = counter;
484  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits] = sign;
485  int weight = ((spec.width == 5) ? xlat4[counter] : xlat[counter]);
486  // update the new version of yout
487  if (sign) {
488  newyout -= weight;
489  } else {
490  newyout += weight;
491  }
492  }
493 
494  // if the prediction still would have been incorrect even
495  // with the updated weights, update some more weights to
496  // try to fix the problem
497  if ((newyout >= 1) != taken) {
498  if (extra_rounds != -1) {
499  int round_counter = 0;
500  bool found;
501  do {
502  // udpate a random weight
503  int besti = -1;
504  int nrand = random_mt.random<int>() % specs.size();
505  int pout;
506  found = false;
507  for (int j = 0; j < specs.size(); j += 1) {
508  int i = (nrand + j) % specs.size();
509  HistorySpec const &spec = *specs[i];
510  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
511  int counter = tables[i][hashed_idx];
512  bool sign =
513  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
514  int weight = ((spec.width == 5) ?
515  xlat4[counter] : xlat[counter]);
516  int signed_weight = sign ? -weight : weight;
517  pout = newyout - signed_weight;
518  if ((pout >= 1) == taken) {
519  // we have found a weight that if we blow
520  // it away will help!
521  besti = i;
522  break;
523  }
524  }
525  if (besti != -1) {
526  int i = besti;
527  HistorySpec const &spec = *specs[i];
528  unsigned int hashed_idx = getIndex(tid, bi, spec, i);
529  int counter = tables[i][hashed_idx];
530  bool sign =
531  sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
532  if (counter > 1) {
533  counter--;
534  tables[i][hashed_idx] = counter;
535  }
536  int weight = ((spec.width == 5) ?
537  xlat4[counter] : xlat[counter]);
538  int signed_weight = sign ? -weight : weight;
539  int out = pout + signed_weight;
540  round_counter += 1;
541  if ((out >= 1) != taken) {
542  found = true;
543  }
544  }
545  } while (found && round_counter < extra_rounds);
546  }
547  }
548 }
549 
550 void
552  void * &bp_history)
553 {
554  MPPBranchInfo *bi = new MPPBranchInfo(pc, pcshift, false);
555  std::vector<unsigned int> &ghist_words = threadData[tid]->ghist_words;
556 
557  bp_history = (void *)bi;
558  unsigned short int pc2 = pc >> 2;
559  bool ab = !(pc & (1<<pcbit));
560  for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
561  bool ab_new = (ghist_words[i] >> (blockSize - 1)) & 1;
562  ghist_words[i] <<= 1;
563  ghist_words[i] |= ab;
564  ghist_words[i] &= (1 << blockSize) - 1;
565  ab = ab_new;
566  }
567  memmove(&threadData[tid]->path_history[1],
568  &threadData[tid]->path_history[0],
569  sizeof(unsigned short int) * (path_length - 1));
570  threadData[tid]->path_history[0] = pc2;
571 }
572 
573 bool
575  void * &bp_history)
576 {
577  MPPBranchInfo *bi = new MPPBranchInfo(instPC, pcshift, true);
578  bp_history = (void *)bi;
579 
580  bool use_static = false;
581 
582  if (!threadData[tid]->filterTable.empty()) {
583  unsigned int findex =
584  bi->getHashFilter(threadData[tid]->last_ghist_bit) %
585  threadData[tid]->filterTable.size();
586  FilterEntry &f = threadData[tid]->filterTable[findex];
587  if (f.alwaysNotTakenSoFar()) {
588  bi->filtered = true;
589  bi->prediction = false;
590  return false;
591  } else if (f.alwaysTakenSoFar()) {
592  bi->filtered = true;
593  bi->prediction = true;
594  return true;
595  }
596  if (f.neverSeen()) {
597  use_static = true;
598  }
599  }
600 
601  int bestval = computeOutput(tid, *bi);
602  if (use_static) {
603  bi->prediction = false;
604  } else {
605  if (abs(bi->yout) <= threshold) {
606  bi->prediction = (bestval >= 1);
607  } else {
608  bi->prediction = (bi->yout >= 1);
609  }
610  }
611 
612  return bi->prediction;
613 }
614 
615 void
617  void *bp_history, bool squashed,
618  const StaticInstPtr & inst,
619  Addr corrTarget)
620 {
621  assert(bp_history);
622  MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
623  if (squashed) {
624  //delete bi;
625  return;
626  }
627 
628  if (bi->isUnconditional()) {
629  delete bi;
630  return;
631  }
632 
633  bool do_train = true;
634 
635  if (!threadData[tid]->filterTable.empty()) {
636  int findex = bi->getHashFilter(threadData[tid]->last_ghist_bit) %
637  threadData[tid]->filterTable.size();
638  FilterEntry &f = threadData[tid]->filterTable[findex];
639 
640  // compute this first, so we don't not train on the
641  // first time a branch is seen.
642  bool transition = false;
643  if (f.alwaysNotTakenSoFar() || f.alwaysTakenSoFar()) {
644  do_train = false;
645  }
646  if (taken) {
647  if (f.alwaysNotTakenSoFar()) {
648  transition = true;
649  }
650  f.seenTaken = true;
651  } else {
652  if (f.alwaysTakenSoFar()) {
653  transition = true;
654  }
655  f.seenUntaken = true;
656  }
657  // is this the first time time the branch has gone both ways?
658  if (transition) {
659  threadData[tid]->occupancy += 1;
660  }
661  // for every new dynamic branch, when there ar
662  // more than 'decay' number of branches in the
663  // filter, blow a random filter entry away
664  if (decay && transition &&
665  ((threadData[tid]->occupancy > decay) || (decay == 1))) {
666  int rnd = random_mt.random<int>() %
667  threadData[tid]->filterTable.size();
668  FilterEntry &frand = threadData[tid]->filterTable[rnd];
669  if (frand.seenTaken && frand.seenUntaken) {
670  threadData[tid]->occupancy -= 1;
671  }
672  frand.seenTaken = false;
673  frand.seenUntaken = false;
674  }
675  }
676 
677  if (do_train) {
678  train(tid, *bi, taken);
679  }
680 
681  enum RecordFiltered
682  {
683  Imli = 1,
684  GHist = 2,
685  Path = 4,
686  Acyclic = 8,
687  Mod = 16,
688  Blurry = 32,
689  // Should never record a filtered local branch - duh!
690  Local = 64,
691  Recency = 128
692  };
693 
694  // four different styles of IMLI
695  if (!bi->filtered || (record_mask & Imli)) {
696  unsigned int target = corrTarget;
697  if (target < bi->getPC()) {
698  if (taken) {
699  threadData[tid]->imli_counter[0] += 1;
700  } else {
701  threadData[tid]->imli_counter[0] = 0;
702  }
703  if (!taken) {
704  threadData[tid]->imli_counter[1] += 1;
705  } else {
706  threadData[tid]->imli_counter[1] = 0;
707  }
708  } else {
709  if (taken) {
710  threadData[tid]->imli_counter[2] += 1;
711  } else {
712  threadData[tid]->imli_counter[2] = 0;
713  }
714  if (!taken) {
715  threadData[tid]->imli_counter[3] += 1;
716  } else {
717  threadData[tid]->imli_counter[3] = 0;
718  }
719  }
720  }
721 
722  bool hashed_taken = hash_taken ? (taken ^ !!(bi->getPC() & (1<<pcbit)))
723  : taken;
724  // record into ghist
725  if (!bi->filtered || (record_mask & GHist)) {
726  bool ab = hashed_taken;
727  assert(threadData[tid]->ghist_words.size() > 0);
728  for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
729  unsigned int a = threadData[tid]->ghist_words[i];
730  bool ab_new = (a >> (blockSize - 1)) & 1;
731  a <<= 1;
732  a |= ab;
733  ab = ab_new;
734  a &= (1 << blockSize) - 1;
735  threadData[tid]->ghist_words[i] = a;
736  }
737  }
738 
739  // record into path history
740  if (!bi->filtered || (record_mask & Path)) {
741  assert(threadData[tid]->path_history.size() > 0);
742  memmove(&threadData[tid]->path_history[1],
743  &threadData[tid]->path_history[0],
744  sizeof(unsigned short int) * (path_length - 1));
745  threadData[tid]->path_history[0] = bi->getPC2();
746  }
747 
748  // record into acyclic history
749  if (!bi->filtered || (record_mask & Acyclic)) {
750  threadData[tid]->updateAcyclic(hashed_taken, bi->getHPC());
751  }
752 
753  // record into modulo path history
754  if (!bi->filtered || (record_mask & Mod)) {
755  for (int ii = 0; ii < modpath_indices.size(); ii += 1) {
756  int i = modpath_indices[ii];
757  if (bi->getHPC() % (i + 2) == 0) {
758  memmove(&threadData[tid]->modpath_histories[i][1],
759  &threadData[tid]->modpath_histories[i][0],
760  sizeof(unsigned short int) * (modpath_lengths[ii]-1));
761  threadData[tid]->modpath_histories[i][0] = bi->getPC2();
762  }
763  }
764  }
765 
766  // update blurry history
767  if (!bi->filtered || (record_mask & Blurry)) {
768  std::vector<std::vector<unsigned int>> &blurrypath_histories =
769  threadData[tid]->blurrypath_histories;
770 
771  for (int i = 0; i < blurrypath_histories.size(); i += 1)
772  {
773  if (blurrypath_histories[i].size() > 0) {
774  unsigned int z = bi->getPC() >> i;
775  if (blurrypath_histories[i][0] != z) {
776  memmove(&blurrypath_histories[i][1],
777  &blurrypath_histories[i][0],
778  sizeof(unsigned int) *
779  (blurrypath_histories[i].size() - 1));
780  blurrypath_histories[i][0] = z;
781  }
782  }
783  }
784  }
785 
786  // record into modulo pattern history
787  if (!bi->filtered || (record_mask & Mod)) {
788  for (int ii = 0; ii < modhist_indices.size(); ii += 1) {
789  int i = modhist_indices[ii];
790  if (bi->getHPC() % (i + 2) == 0) {
791  for (int j = modhist_lengths[ii] - 1; j > 0; j -= 1) {
792  threadData[tid]->mod_histories[i][j] =
793  threadData[tid]->mod_histories[i][j-1];
794  }
795  threadData[tid]->mod_histories[i][0] = hashed_taken;
796  }
797  }
798  }
799 
800  // insert this PC into the recency stack
801  if (doing_recency) {
802  if (!bi->filtered || (record_mask & Recency)) {
803  threadData[tid]->insertRecency(bi->getPC2(), assoc);
804  }
805  }
806 
807  // record into a local history
808  if (!bi->filtered || (record_mask & Local)) {
809  threadData[tid]->localHistories.update(bi->getPC(), hashed_taken);
810  }
811 
812  // update last ghist bit, used to index filter
813  threadData[tid]->last_ghist_bit = taken;
814 
815  delete bi;
816 }
817 
818 void
820  void* &bp_history)
821 {
822 }
823 
824 void
826 {
827  assert(bp_history);
828  MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
829  delete bi;
830 }
831 
832 } // namespace branch_prediction
833 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
Base class for branch operations.
Definition: branch.hh:49
Basically a wrapper class to hold both the branch predictor and the BTB.
Definition: bpred_unit.hh:69
const unsigned numThreads
Number of the threads for which the branch history is maintained.
Definition: bpred_unit.hh:287
static unsigned int hash(const std::vector< unsigned int short > &recency_stack, const std::vector< int > &table_sizes, unsigned short int pc, int l, int t)
std::vector< std::vector< std::vector< bool > > > acyclic_bits
int computeOutput(ThreadID tid, MPPBranchInfo &bi)
Computes the output of the predictor for a given branch and the resulting best value in case the pred...
bool lookup(ThreadID tid, Addr instPC, void *&bp_history) override
Looks up a given PC in the BP to see if it is taken or not taken.
void findBest(ThreadID tid, std::vector< int > &best_preds) const
Finds the best subset of features to use in case of a low-confidence branch, returns the result as an...
void btbUpdate(ThreadID tid, Addr branch_addr, void *&bp_history) override
If a branch is not taken, because the BTB address is invalid or missing, this function sets the appro...
static int xlat[]
Transfer function for 6-width tables.
bool doing_local
runtime values and data used to count the size in bits
MultiperspectivePerceptron(const MultiperspectivePerceptronParams &params)
unsigned int getIndex(ThreadID tid, const MPPBranchInfo &bi, const HistorySpec &spec, int index) const
Get the position index of a predictor table.
virtual void createSpecs()=0
Creates the tables of the predictor.
void satIncDec(bool taken, bool &sign, int &c, int max_weight) const
Auxiliary function to increase a table counter depending on the direction of the branch.
void computeBits(int num_filter_entries, int nlocal_histories, int local_history_length, bool ignore_path_size)
Computes the size in bits of the structures needed to keep track of the history and the predictor tab...
static int xlat4[]
Transfer function for 5-width tables.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
void train(ThreadID tid, MPPBranchInfo &bi, bool taken)
Trains the branch predictor with the given branch and direction.
void setExtraBits(int bits)
Sets the starting number of storage bits to compute the number of table entries.
void uncondBranch(ThreadID tid, Addr pc, void *&bp_history) override
std::vector< HistorySpec * > specs
Predictor tables.
void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed, const StaticInstPtr &inst, Addr corrTarget) override
Updates the BP with taken/not taken information.
void squash(ThreadID tid, void *bp_history) override
STL vector class.
Definition: stl.hh:37
Random random_mt
Definition: random.cc:99
std::enable_if_t< std::is_integral_v< T >, T > random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
Definition: random.hh:90
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
const Params & params() const
Definition: sim_object.hh:176
uint16_t len
Definition: helpers.cc:62
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 8 > a
Definition: misc_types.hh:66
Bitfield< 24 > j
Definition: misc_types.hh:57
Bitfield< 4 > pc
Bitfield< 30, 0 > index
Bitfield< 4 > g
Definition: dt_constants.hh:86
Bitfield< 23 > k
Definition: dt_constants.hh:81
Bitfield< 20, 16 > bi
Definition: types.hh:80
Bitfield< 3 > z
Definition: pagetable.hh:62
Bitfield< 56 > f
Definition: pagetable.hh:53
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 63 > val
Definition: misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:235
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
bool operator<(const Time &l, const Time &r)
Definition: time.hh:219
virtual unsigned int getHash(ThreadID tid, Addr pc, Addr pc2, int t) const =0
Gets the hash to index the table, using the pc of the branch, and the index of the table.
const double coeff
Coefficient of the feature, models the accuracy of the feature.
std::vector< std::vector< std::array< bool, 2 > > > sign_bits
std::vector< std::vector< unsigned short int > > modpath_histories
ThreadData(int num_filter, int n_local_histories, int local_history_length, int assoc, const std::vector< std::vector< int >> &blurrypath_bits, int path_length, int ghist_length, int block_size, const std::vector< std::vector< std::vector< bool >>> &acyclic_bits, const std::vector< int > &modhist_indices, const std::vector< int > &modhist_lengths, const std::vector< int > &modpath_indices, const std::vector< int > &modpath_lengths, const std::vector< int > &table_sizes, int n_sign_bits)

Generated on Wed Dec 21 2022 10:22:31 for gem5 by doxygen 1.9.1