gem5 v23.0.0.1
Loading...
Searching...
No Matches
multiperspective_perceptron.cc
Go to the documentation of this file.
1/*
2 * Copyright 2019 Texas A&M University
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * Author: Daniel A. Jiménez
31 * Adapted to gem5 by: Javier Bueno Hedo
32 *
33 */
34
35 /*
36 * Multiperspective Perceptron Predictor (by Daniel A. Jiménez)
37 */
38
40
41#include "base/random.hh"
42#include "debug/Branch.hh"
43
44namespace gem5
45{
46
47namespace branch_prediction
48{
49
50int
52 {1,3,4,5,7,8,9,11,12,14,15,17,19,21,23,25,27,29,32,34,37,41,45,49,53,58,63,
53 69,76,85,94,106,};
54int
56 {0,4,5,7,9,11,12,14,16,17,19,22,28,33,39,45,};
57
59 int n_local_histories, int local_history_length, int assoc,
60 const std::vector<std::vector<int>> &blurrypath_bits, int path_length,
61 int ghist_length, int block_size,
62 const std::vector<std::vector<std::vector<bool>>> &acyclic_bits,
63 const std::vector<int> &modhist_indices,
64 const std::vector<int> &modhist_lengths,
65 const std::vector<int> &modpath_indices,
66 const std::vector<int> &modpath_lengths,
67 const std::vector<int> &table_sizes, int n_sign_bits)
68 : filterTable(num_filters), acyclic_histories(acyclic_bits.size()),
69 acyclic2_histories(acyclic_bits.size()),
70 blurrypath_histories(blurrypath_bits.size()),
71 ghist_words(ghist_length/block_size+1, 0),
72 path_history(path_length, 0), imli_counter(4,0),
73 localHistories(n_local_histories, local_history_length),
74 recency_stack(assoc), last_ghist_bit(false), occupancy(0)
75{
76 for (int i = 0; i < blurrypath_bits.size(); i+= 1) {
77 blurrypath_histories[i].resize(blurrypath_bits[i].size());
78 }
79
80 for (int i = 0; i < acyclic_bits.size(); i += 1) {
81 acyclic_histories[i].resize(acyclic_bits[i].size());
82 acyclic2_histories[i].resize(acyclic_bits[i].size());
83 }
84
85 int max_modhist_idx = -1;
86 for (auto &elem : modhist_indices) {
87 max_modhist_idx = (max_modhist_idx < elem) ? elem : max_modhist_idx;
88 }
89 if (max_modhist_idx >= 0) {
90 mod_histories.resize(max_modhist_idx + 1);
91 }
92 for (int i = 0; i < modhist_lengths.size(); i+= 1) {
94 }
95
96 int max_modpath_idx = -1;
97 for (auto &elem : modpath_indices) {
98 max_modpath_idx = (max_modpath_idx < elem) ? elem : max_modpath_idx;
99 }
100 if (max_modpath_idx >= 0) {
101 modpath_histories.resize(max_modpath_idx + 1);
102 }
103 for (int i = 0; i < modpath_lengths.size(); i+= 1) {
105 }
106
107 for (int i = 0; i < table_sizes.size(); i += 1) {
108 mpreds.push_back(0);
110 sign_bits.push_back(std::vector<std::array<bool, 2>>(table_sizes[i]));
111 for (int j = 0; j < table_sizes[i]; j += 1) {
112 for (int k = 0; k < n_sign_bits; k += 1) {
113 sign_bits[i][j][k] = (i & 1) | (k & 1);
114 }
115 }
116 }
117}
118
120 const MultiperspectivePerceptronParams &p) : BPredUnit(p),
130 threadData(p.numThreads, nullptr), doing_local(false),
131 doing_recency(false), assoc(0), ghist_length(p.initial_ghist_length),
133 theta(p.initial_theta), extrabits(0), imli_counter_bits(4),
135{
136 fatal_if(speculative_update, "Speculative update not implemented");
137}
138
139void
141{
142 extrabits = bits;
143}
144
145void
147{
148 createSpecs();
149
150 for (auto &spec : specs) {
151 // initial assignation of values
152 table_sizes.push_back(spec->size);
153 }
154
155 // Update bit requirements and runtime values
156 for (auto &spec : specs) {
157 spec->setBitRequirements();
158 }
159 const MultiperspectivePerceptronParams &p =
160 static_cast<const MultiperspectivePerceptronParams &>(params());
161
162 computeBits(p.num_filter_entries, p.num_local_histories,
163 p.local_history_length, p.ignore_path_size);
164
165 for (int i = 0; i < threadData.size(); i += 1) {
166 threadData[i] = new ThreadData(p.num_filter_entries,
167 p.num_local_histories,
168 p.local_history_length, assoc,
174 }
175}
176
177void
179 int nlocal_histories, int local_history_length, bool ignore_path_size)
180{
181 int totalbits = extrabits;
182 for (auto &imli_bits : imli_counter_bits) {
183 totalbits += imli_bits;
184 }
185 totalbits += ghist_length;
186 if (!ignore_path_size) {
187 totalbits += path_length * 16;
188 }
189 totalbits += (threshold >= 0) ? (tunebits * specs.size()) : 0;
190 for (auto &len : modhist_lengths) {
191 totalbits += len;
192 }
193 if (!ignore_path_size) {
194 for (auto &len : modpath_lengths) {
195 totalbits += 16 * len;
196 }
197 }
198 totalbits += doing_local ? (nlocal_histories * local_history_length) : 0;
199 totalbits += doing_recency ? (assoc * 16) : 0;
200
201 for (auto &bv : blurrypath_bits) {
202 for (auto &bve : bv) {
203 totalbits += bve;
204 }
205 }
206 totalbits += num_filter_entries * 2;
207
208 for (auto &abi : acyclic_bits) {
209 for (auto &abj : abi) {
210 for (auto abk : abj) {
211 totalbits += abk;
212 }
213 }
214 }
215
216 int remaining = budgetbits - totalbits;
217
218 // count the tables that have already been assigned sizes
219 int num_sized = 0;
220 for (int i = 0; i < specs.size(); i +=1) {
221 if (table_sizes[i] != 0) {
222 int sz = table_sizes[i] * (specs[i]->width + (n_sign_bits - 1));
223 totalbits += sz;
224 remaining -= sz;
225 num_sized += 1;
226 }
227 }
228
229 // whatever is left, we divide among the rest of the tables
230 int table_size_bits = (remaining / (specs.size()-num_sized));
231 for (int i = 0; i < specs.size(); i += 1) {
232 // if a table doesn't have a size yet, give it one and count those bits
233 if (!table_sizes[i]) {
234 int my_table_size = table_size_bits /
235 (specs[i]->width + (n_sign_bits - 1)); // extra sign bits
236 table_sizes[i] = my_table_size;
237 totalbits += my_table_size * (specs[i]->width + (n_sign_bits - 1));
238 }
239 }
240
241 DPRINTF(Branch, "%d bits of metadata so far, %d left out of "
242 "%d total budget\n", totalbits, remaining, budgetbits);
243 DPRINTF(Branch, "table size is %d bits, %d entries for 5 bit, %d entries "
244 "for 6 bit\n", table_size_bits,
245 table_size_bits / (5 + (n_sign_bits - 1)),
246 table_size_bits / (6 + (n_sign_bits - 1)));
247 DPRINTF(Branch, "%d total bits (%0.2fKB)\n", totalbits,
248 totalbits / 8192.0);
249}
250
251void
253 std::vector<int> &best_preds) const
254{
255 if (threshold < 0) {
256 return;
257 }
258 struct BestPair
259 {
260 int index;
261 int mpreds;
262 bool operator<(BestPair const &bp) const
263 {
264 return mpreds < bp.mpreds;
265 }
266 };
267 std::vector<BestPair> pairs(best_preds.size());
268 for (int i = 0; i < best_preds.size(); i += 1) {
269 pairs[i].index = i;
270 pairs[i].mpreds = threadData[tid]->mpreds[i];
271 }
272 std::sort(pairs.begin(), pairs.end());
273 for (int i = 0; i < (std::min(nbest, (int) best_preds.size())); i += 1) {
274 best_preds[i] = pairs[i].index;
275 }
276}
277
278unsigned int
280 const HistorySpec &spec, int index) const
281{
282 unsigned int g = spec.getHash(tid, bi.getPC(), bi.getPC2(), index);
283 unsigned long long int h = g;
284 // shift the hash from the feature to xor with the hashed PC
285 if (hshift < 0) {
286 h <<= -hshift;
287 h ^= bi.getPC2();
288 } else {
289 h <<= hshift;
290 h ^= bi.getHPC();
291 }
292 // xor in the imli counter(s) and/or recency position based on the masks
293 if ((1ull<<index) & imli_mask1) {
294 h ^= threadData[tid]->imli_counter[0];
295 }
296 if ((1ull<<index) & imli_mask4) {
297 h ^= threadData[tid]->imli_counter[3];
298 }
299 if (doing_recency) {
300 if ((1ull<<index) & recencypos_mask) {
301 h ^= RECENCYPOS::hash(threadData[tid]->recency_stack, table_sizes,
302 bi.getPC2(), 31, index);
303 }
304 }
305 h %= table_sizes[index];
306 return h;
307}
308
309int
311{
312 // list of best predictors
313 std::vector<int> best_preds(specs.size(), -1);
314
315 // initialize sum
316 bi.yout = 0;
317
318 // bias the prediction by whether the local history is
319 // one of four distinctive patterns
320 int lhist = threadData[tid]->localHistories[bi.getPC()];
321 int history_len = threadData[tid]->localHistories.getLocalHistoryLength();
322 if (lhist == 0) {
323 bi.yout = bias0;
324 } else if (lhist == ((1<<history_len)-1)) {
325 bi.yout = bias1;
326 } else if (lhist == (1<<(history_len-1))) {
327 bi.yout = biasmostly0;
328 } else if (lhist == ((1<<(history_len-1))-1)) {
329 bi.yout = biasmostly1;
330 }
331 // find the best subset of features to use in case of a low-confidence
332 // branch
333 findBest(tid, best_preds);
334
335 // begin computation of the sum for low-confidence branch
336 int bestval = 0;
337
338 for (int i = 0; i < specs.size(); i += 1) {
339 HistorySpec const &spec = *specs[i];
340 // get the hash to index the table
341 unsigned int hashed_idx = getIndex(tid, bi, spec, i);
342 // add the weight; first get the weight's magnitude
343 int counter = threadData[tid]->tables[i][hashed_idx];
344 // get the sign
345 bool sign =
346 threadData[tid]->sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
347 // apply the transfer function and multiply by a coefficient
348 int weight = spec.coeff * ((spec.width == 5) ?
349 xlat4[counter] : xlat[counter]);
350 // apply the sign
351 int val = sign ? -weight : weight;
352 // add the value
353 bi.yout += val;
354 // if this is one of those good features, add the value to bestval
355 if (threshold >= 0) {
356 for (int j = 0;
357 j < std::min(nbest, (int) best_preds.size());
358 j += 1)
359 {
360 if (best_preds[j] == i) {
361 bestval += val;
362 break;
363 }
364 }
365 }
366 }
367 // apply a fudge factor to affect when training is triggered
368 bi.yout *= fudge;
369 return bestval;
370}
371
372void
373MultiperspectivePerceptron::satIncDec(bool taken, bool &sign, int &counter,
374 int max_weight) const
375{
376 if (taken) {
377 // increment sign/magnitude
378 if (sign) {
379 // go toward 0 away from negative max weight
380 if (counter == 0) {
381 sign = false; // moved to positive 0
382 } else {
383 counter -= 1;
384 }
385 } else {
386 // go toward max weight away from 0
387 if (counter < max_weight) {
388 counter += 1;
389 }
390 }
391 } else {
392 // decrement sign/magnitude
393 if (sign) {
394 // go toward negative max weight down from 0
395 if (counter < max_weight) {
396 counter += 1;
397 }
398 } else {
399 // go toward 0 away from max weight
400 if (counter == 0) {
401 sign = true; // negative 0
402 } else {
403 counter -= 1;
404 }
405 }
406 }
407}
408
409void
411{
412 std::vector<std::vector<short int>> &tables = threadData[tid]->tables;
414 threadData[tid]->sign_bits;
415 std::vector<int> &mpreds = threadData[tid]->mpreds;
416 // was the prediction correct?
417 bool correct = (bi.yout >= 1) == taken;
418 // what is the magnitude of yout?
419 int abs_yout = abs(bi.yout);
420 // keep track of mispredictions per table
421 if (threshold >= 0) if (!tuneonly || (abs_yout <= threshold)) {
422 bool halve = false;
423
424 // for each table, figure out if there was a misprediction
425 for (int i = 0; i < specs.size(); i += 1) {
426 HistorySpec const &spec = *specs[i];
427 // get the hash to index the table
428 unsigned int hashed_idx = getIndex(tid, bi, spec, i);
429 bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
430 int counter = tables[i][hashed_idx];
431 int weight = spec.coeff * ((spec.width == 5) ?
432 xlat4[counter] : xlat[counter]);
433 if (sign) weight = -weight;
434 bool pred = weight >= 1;
435 if (pred != taken) {
436 mpreds[i] += 1;
437 if (mpreds[i] == (1 << tunebits) - 1) {
438 halve = true;
439 }
440 }
441 }
442 // if we reach the maximum counter value, halve all the counters
443 if (halve) {
444 for (int i = 0; i < specs.size(); i += 1) {
445 mpreds[i] /= 2;
446 }
447 }
448 }
449 // if the branch was predicted incorrectly or the correct
450 // prediction was weak, update the weights
451 bool do_train = !correct || (abs_yout <= theta);
452 if (!do_train) return;
453
454 // adaptive theta training, adapted from O-GEHL
455 if (!correct) {
456 thresholdCounter += 1;
457 if (thresholdCounter >= speed) {
458 theta += 1;
460 }
461 }
462 if (correct && abs_yout < theta) {
463 thresholdCounter -= 1;
464 if (thresholdCounter <= -speed) {
465 theta -= 1;
467 }
468 }
469
470 // train the weights, computing what the value of yout
471 // would have been if these updates had been applied before
472 int newyout = 0;
473 for (int i = 0; i < specs.size(); i += 1) {
474 HistorySpec const &spec = *specs[i];
475 // get the magnitude
476 unsigned int hashed_idx = getIndex(tid, bi, spec, i);
477 int counter = tables[i][hashed_idx];
478 // get the sign
479 bool sign = sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
480 // increment/decrement if taken/not taken
481 satIncDec(taken, sign, counter, (1 << (spec.width - 1)) - 1);
482 // update the magnitude and sign
483 tables[i][hashed_idx] = counter;
484 sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits] = sign;
485 int weight = ((spec.width == 5) ? xlat4[counter] : xlat[counter]);
486 // update the new version of yout
487 if (sign) {
488 newyout -= weight;
489 } else {
490 newyout += weight;
491 }
492 }
493
494 // if the prediction still would have been incorrect even
495 // with the updated weights, update some more weights to
496 // try to fix the problem
497 if ((newyout >= 1) != taken) {
498 if (extra_rounds != -1) {
499 int round_counter = 0;
500 bool found;
501 do {
502 // udpate a random weight
503 int besti = -1;
504 int nrand = random_mt.random<int>() % specs.size();
505 int pout;
506 found = false;
507 for (int j = 0; j < specs.size(); j += 1) {
508 int i = (nrand + j) % specs.size();
509 HistorySpec const &spec = *specs[i];
510 unsigned int hashed_idx = getIndex(tid, bi, spec, i);
511 int counter = tables[i][hashed_idx];
512 bool sign =
513 sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
514 int weight = ((spec.width == 5) ?
515 xlat4[counter] : xlat[counter]);
516 int signed_weight = sign ? -weight : weight;
517 pout = newyout - signed_weight;
518 if ((pout >= 1) == taken) {
519 // we have found a weight that if we blow
520 // it away will help!
521 besti = i;
522 break;
523 }
524 }
525 if (besti != -1) {
526 int i = besti;
527 HistorySpec const &spec = *specs[i];
528 unsigned int hashed_idx = getIndex(tid, bi, spec, i);
529 int counter = tables[i][hashed_idx];
530 bool sign =
531 sign_bits[i][hashed_idx][bi.getHPC() % n_sign_bits];
532 if (counter > 1) {
533 counter--;
534 tables[i][hashed_idx] = counter;
535 }
536 int weight = ((spec.width == 5) ?
537 xlat4[counter] : xlat[counter]);
538 int signed_weight = sign ? -weight : weight;
539 int out = pout + signed_weight;
540 round_counter += 1;
541 if ((out >= 1) != taken) {
542 found = true;
543 }
544 }
545 } while (found && round_counter < extra_rounds);
546 }
547 }
548}
549
550void
552 void * &bp_history)
553{
554 MPPBranchInfo *bi = new MPPBranchInfo(pc, pcshift, false);
555 std::vector<unsigned int> &ghist_words = threadData[tid]->ghist_words;
556
557 bp_history = (void *)bi;
558 unsigned short int pc2 = pc >> 2;
559 bool ab = !(pc & (1<<pcbit));
560 for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
561 bool ab_new = (ghist_words[i] >> (blockSize - 1)) & 1;
562 ghist_words[i] <<= 1;
563 ghist_words[i] |= ab;
564 ghist_words[i] &= (1 << blockSize) - 1;
565 ab = ab_new;
566 }
567 memmove(&threadData[tid]->path_history[1],
568 &threadData[tid]->path_history[0],
569 sizeof(unsigned short int) * (path_length - 1));
570 threadData[tid]->path_history[0] = pc2;
571}
572
573bool
575 void * &bp_history)
576{
577 MPPBranchInfo *bi = new MPPBranchInfo(instPC, pcshift, true);
578 bp_history = (void *)bi;
579
580 bool use_static = false;
581
582 if (!threadData[tid]->filterTable.empty()) {
583 unsigned int findex =
584 bi->getHashFilter(threadData[tid]->last_ghist_bit) %
585 threadData[tid]->filterTable.size();
586 FilterEntry &f = threadData[tid]->filterTable[findex];
587 if (f.alwaysNotTakenSoFar()) {
588 bi->filtered = true;
589 bi->prediction = false;
590 return false;
591 } else if (f.alwaysTakenSoFar()) {
592 bi->filtered = true;
593 bi->prediction = true;
594 return true;
595 }
596 if (f.neverSeen()) {
597 use_static = true;
598 }
599 }
600
601 int bestval = computeOutput(tid, *bi);
602 if (use_static) {
603 bi->prediction = false;
604 } else {
605 if (abs(bi->yout) <= threshold) {
606 bi->prediction = (bestval >= 1);
607 } else {
608 bi->prediction = (bi->yout >= 1);
609 }
610 }
611
612 return bi->prediction;
613}
614
615void
617 void *bp_history, bool squashed,
618 const StaticInstPtr & inst,
619 Addr corrTarget)
620{
621 assert(bp_history);
622 MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
623 if (squashed) {
624 //delete bi;
625 return;
626 }
627
628 if (bi->isUnconditional()) {
629 delete bi;
630 return;
631 }
632
633 bool do_train = true;
634
635 if (!threadData[tid]->filterTable.empty()) {
636 int findex = bi->getHashFilter(threadData[tid]->last_ghist_bit) %
637 threadData[tid]->filterTable.size();
638 FilterEntry &f = threadData[tid]->filterTable[findex];
639
640 // compute this first, so we don't not train on the
641 // first time a branch is seen.
642 bool transition = false;
643 if (f.alwaysNotTakenSoFar() || f.alwaysTakenSoFar()) {
644 do_train = false;
645 }
646 if (taken) {
647 if (f.alwaysNotTakenSoFar()) {
648 transition = true;
649 }
650 f.seenTaken = true;
651 } else {
652 if (f.alwaysTakenSoFar()) {
653 transition = true;
654 }
655 f.seenUntaken = true;
656 }
657 // is this the first time time the branch has gone both ways?
658 if (transition) {
659 threadData[tid]->occupancy += 1;
660 }
661 // for every new dynamic branch, when there ar
662 // more than 'decay' number of branches in the
663 // filter, blow a random filter entry away
664 if (decay && transition &&
665 ((threadData[tid]->occupancy > decay) || (decay == 1))) {
666 int rnd = random_mt.random<int>() %
667 threadData[tid]->filterTable.size();
668 FilterEntry &frand = threadData[tid]->filterTable[rnd];
669 if (frand.seenTaken && frand.seenUntaken) {
670 threadData[tid]->occupancy -= 1;
671 }
672 frand.seenTaken = false;
673 frand.seenUntaken = false;
674 }
675 }
676
677 if (do_train) {
678 train(tid, *bi, taken);
679 }
680
681 enum RecordFiltered
682 {
683 Imli = 1,
684 GHist = 2,
685 Path = 4,
686 Acyclic = 8,
687 Mod = 16,
688 Blurry = 32,
689 // Should never record a filtered local branch - duh!
690 Local = 64,
691 Recency = 128
692 };
693
694 // four different styles of IMLI
695 if (!bi->filtered || (record_mask & Imli)) {
696 unsigned int target = corrTarget;
697 if (target < bi->getPC()) {
698 if (taken) {
699 threadData[tid]->imli_counter[0] += 1;
700 } else {
701 threadData[tid]->imli_counter[0] = 0;
702 }
703 if (!taken) {
704 threadData[tid]->imli_counter[1] += 1;
705 } else {
706 threadData[tid]->imli_counter[1] = 0;
707 }
708 } else {
709 if (taken) {
710 threadData[tid]->imli_counter[2] += 1;
711 } else {
712 threadData[tid]->imli_counter[2] = 0;
713 }
714 if (!taken) {
715 threadData[tid]->imli_counter[3] += 1;
716 } else {
717 threadData[tid]->imli_counter[3] = 0;
718 }
719 }
720 }
721
722 bool hashed_taken = hash_taken ? (taken ^ !!(bi->getPC() & (1<<pcbit)))
723 : taken;
724 // record into ghist
725 if (!bi->filtered || (record_mask & GHist)) {
726 bool ab = hashed_taken;
727 assert(threadData[tid]->ghist_words.size() > 0);
728 for (int i = 0; i < ghist_length / blockSize + 1; i += 1) {
729 unsigned int a = threadData[tid]->ghist_words[i];
730 bool ab_new = (a >> (blockSize - 1)) & 1;
731 a <<= 1;
732 a |= ab;
733 ab = ab_new;
734 a &= (1 << blockSize) - 1;
735 threadData[tid]->ghist_words[i] = a;
736 }
737 }
738
739 // record into path history
740 if (!bi->filtered || (record_mask & Path)) {
741 assert(threadData[tid]->path_history.size() > 0);
742 memmove(&threadData[tid]->path_history[1],
743 &threadData[tid]->path_history[0],
744 sizeof(unsigned short int) * (path_length - 1));
745 threadData[tid]->path_history[0] = bi->getPC2();
746 }
747
748 // record into acyclic history
749 if (!bi->filtered || (record_mask & Acyclic)) {
750 threadData[tid]->updateAcyclic(hashed_taken, bi->getHPC());
751 }
752
753 // record into modulo path history
754 if (!bi->filtered || (record_mask & Mod)) {
755 for (int ii = 0; ii < modpath_indices.size(); ii += 1) {
756 int i = modpath_indices[ii];
757 if (bi->getHPC() % (i + 2) == 0) {
758 memmove(&threadData[tid]->modpath_histories[i][1],
759 &threadData[tid]->modpath_histories[i][0],
760 sizeof(unsigned short int) * (modpath_lengths[ii]-1));
761 threadData[tid]->modpath_histories[i][0] = bi->getPC2();
762 }
763 }
764 }
765
766 // update blurry history
767 if (!bi->filtered || (record_mask & Blurry)) {
768 std::vector<std::vector<unsigned int>> &blurrypath_histories =
769 threadData[tid]->blurrypath_histories;
770
771 for (int i = 0; i < blurrypath_histories.size(); i += 1)
772 {
773 if (blurrypath_histories[i].size() > 0) {
774 unsigned int z = bi->getPC() >> i;
775 if (blurrypath_histories[i][0] != z) {
776 memmove(&blurrypath_histories[i][1],
777 &blurrypath_histories[i][0],
778 sizeof(unsigned int) *
779 (blurrypath_histories[i].size() - 1));
780 blurrypath_histories[i][0] = z;
781 }
782 }
783 }
784 }
785
786 // record into modulo pattern history
787 if (!bi->filtered || (record_mask & Mod)) {
788 for (int ii = 0; ii < modhist_indices.size(); ii += 1) {
789 int i = modhist_indices[ii];
790 if (bi->getHPC() % (i + 2) == 0) {
791 for (int j = modhist_lengths[ii] - 1; j > 0; j -= 1) {
792 threadData[tid]->mod_histories[i][j] =
793 threadData[tid]->mod_histories[i][j-1];
794 }
795 threadData[tid]->mod_histories[i][0] = hashed_taken;
796 }
797 }
798 }
799
800 // insert this PC into the recency stack
801 if (doing_recency) {
802 if (!bi->filtered || (record_mask & Recency)) {
803 threadData[tid]->insertRecency(bi->getPC2(), assoc);
804 }
805 }
806
807 // record into a local history
808 if (!bi->filtered || (record_mask & Local)) {
809 threadData[tid]->localHistories.update(bi->getPC(), hashed_taken);
810 }
811
812 // update last ghist bit, used to index filter
813 threadData[tid]->last_ghist_bit = taken;
814
815 delete bi;
816}
817
818void
820 void* &bp_history)
821{
822}
823
824void
826{
827 assert(bp_history);
828 MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
829 delete bi;
830}
831
832} // namespace branch_prediction
833} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Base class for branch operations.
Definition branch.hh:49
Basically a wrapper class to hold both the branch predictor and the BTB.
Definition bpred_unit.hh:69
const unsigned numThreads
Number of the threads for which the branch history is maintained.
static unsigned int hash(const std::vector< unsigned int short > &recency_stack, const std::vector< int > &table_sizes, unsigned short int pc, int l, int t)
std::vector< std::vector< std::vector< bool > > > acyclic_bits
int computeOutput(ThreadID tid, MPPBranchInfo &bi)
Computes the output of the predictor for a given branch and the resulting best value in case the pred...
bool lookup(ThreadID tid, Addr instPC, void *&bp_history) override
Looks up a given PC in the BP to see if it is taken or not taken.
void findBest(ThreadID tid, std::vector< int > &best_preds) const
Finds the best subset of features to use in case of a low-confidence branch, returns the result as an...
void btbUpdate(ThreadID tid, Addr branch_addr, void *&bp_history) override
If a branch is not taken, because the BTB address is invalid or missing, this function sets the appro...
static int xlat[]
Transfer function for 6-width tables.
bool doing_local
runtime values and data used to count the size in bits
MultiperspectivePerceptron(const MultiperspectivePerceptronParams &params)
unsigned int getIndex(ThreadID tid, const MPPBranchInfo &bi, const HistorySpec &spec, int index) const
Get the position index of a predictor table.
virtual void createSpecs()=0
Creates the tables of the predictor.
void satIncDec(bool taken, bool &sign, int &c, int max_weight) const
Auxiliary function to increase a table counter depending on the direction of the branch.
void computeBits(int num_filter_entries, int nlocal_histories, int local_history_length, bool ignore_path_size)
Computes the size in bits of the structures needed to keep track of the history and the predictor tab...
static int xlat4[]
Transfer function for 5-width tables.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
void train(ThreadID tid, MPPBranchInfo &bi, bool taken)
Trains the branch predictor with the given branch and direction.
void setExtraBits(int bits)
Sets the starting number of storage bits to compute the number of table entries.
void uncondBranch(ThreadID tid, Addr pc, void *&bp_history) override
std::vector< HistorySpec * > specs
Predictor tables.
void update(ThreadID tid, Addr instPC, bool taken, void *bp_history, bool squashed, const StaticInstPtr &inst, Addr corrTarget) override
Updates the BP with taken/not taken information.
void squash(ThreadID tid, void *bp_history) override
STL vector class.
Definition stl.hh:37
Random random_mt
Definition random.cc:99
std::enable_if_t< std::is_integral_v< T >, T > random()
Use the SFINAE idiom to choose an implementation based on whether the type is integral or floating po...
Definition random.hh:90
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
const Params & params() const
Bitfield< 18, 16 > len
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 11 > z
Bitfield< 8 > a
Definition misc_types.hh:66
Bitfield< 6 > f
Definition misc_types.hh:68
Bitfield< 24 > j
Definition misc_types.hh:57
Bitfield< 4 > pc
Bitfield< 30, 0 > index
Bitfield< 4 > g
Bitfield< 0 > p
Bitfield< 23 > k
Bitfield< 20, 16 > bi
Definition types.hh:80
Bitfield< 27, 24 > pred
Definition types.hh:91
Bitfield< 63 > val
Definition misc.hh:776
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
bool operator<(const Time &l, const Time &r)
Definition time.hh:219
virtual unsigned int getHash(ThreadID tid, Addr pc, Addr pc2, int t) const =0
Gets the hash to index the table, using the pc of the branch, and the index of the table.
const double coeff
Coefficient of the feature, models the accuracy of the feature.
ThreadData(int num_filter, int n_local_histories, int local_history_length, int assoc, const std::vector< std::vector< int > > &blurrypath_bits, int path_length, int ghist_length, int block_size, const std::vector< std::vector< std::vector< bool > > > &acyclic_bits, const std::vector< int > &modhist_indices, const std::vector< int > &modhist_lengths, const std::vector< int > &modpath_indices, const std::vector< int > &modpath_lengths, const std::vector< int > &table_sizes, int n_sign_bits)
std::vector< std::vector< std::array< bool, 2 > > > sign_bits

Generated on Mon Jul 10 2023 15:32:01 for gem5 by doxygen 1.9.7