gem5  v22.1.0.0
bop.cc
Go to the documentation of this file.
1 
30 
31 #include "debug/HWPrefetch.hh"
32 #include "params/BOPPrefetcher.hh"
33 
34 namespace gem5
35 {
36 
38 namespace prefetch
39 {
40 
41 BOP::BOP(const BOPPrefetcherParams &p)
42  : Queued(p),
43  scoreMax(p.score_max), roundMax(p.round_max),
44  badScore(p.bad_score), rrEntries(p.rr_size),
45  tagMask((1 << p.tag_bits) - 1),
46  delayQueueEnabled(p.delay_queue_enable),
47  delayQueueSize(p.delay_queue_size),
48  delayTicks(cyclesToTicks(p.delay_queue_cycles)),
49  delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
50  issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
51  bestScore(0), round(0)
52 {
53  if (!isPowerOf2(rrEntries)) {
54  fatal("%s: number of RR entries is not power of 2\n", name());
55  }
56  if (!isPowerOf2(blkSize)) {
57  fatal("%s: cache line size is not power of 2\n", name());
58  }
59  if (!(p.negative_offsets_enable && (p.offset_list_size % 2 == 0))) {
60  fatal("%s: negative offsets enabled with odd offset list size\n",
61  name());
62  }
63 
64  rrLeft.resize(rrEntries);
65  rrRight.resize(rrEntries);
66 
67  // Following the paper implementation, a list with the specified number
68  // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
69  const int factors[] = { 2, 3, 5 };
70  unsigned int i = 0;
71  int64_t offset_i = 1;
72 
73  while (i < p.offset_list_size)
74  {
75  int64_t offset = offset_i;
76 
77  for (int n : factors) {
78  while ((offset % n) == 0) {
79  offset /= n;
80  }
81  }
82 
83  if (offset == 1) {
84  offsetsList.push_back(OffsetListEntry(offset_i, 0));
85  i++;
86  // If we want to use negative offsets, add also the negative value
87  // of the offset just calculated
88  if (p.negative_offsets_enable) {
89  offsetsList.push_back(OffsetListEntry(-offset_i, 0));
90  i++;
91  }
92  }
93 
94  offset_i++;
95  }
96 
97  offsetsListIterator = offsetsList.begin();
98 }
99 
100 void
102 {
103  while (!delayQueue.empty() &&
104  delayQueue.front().processTick <= curTick())
105  {
106  Addr addr_x = delayQueue.front().baseAddr;
107  insertIntoRR(addr_x, RRWay::Left);
108  delayQueue.pop_front();
109  }
110 
111  // Schedule an event for the next element if there is one
112  if (!delayQueue.empty()) {
113  schedule(delayQueueEvent, delayQueue.front().processTick);
114  }
115 }
116 
117 unsigned int
118 BOP::hash(Addr addr, unsigned int way) const
119 {
120  Addr hash1 = addr >> way;
121  Addr hash2 = hash1 >> floorLog2(rrEntries);
122  return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
123 }
124 
125 void
126 BOP::insertIntoRR(Addr addr, unsigned int way)
127 {
128  switch (way) {
129  case RRWay::Left:
130  rrLeft[hash(addr, RRWay::Left)] = addr;
131  break;
132  case RRWay::Right:
133  rrRight[hash(addr, RRWay::Right)] = addr;
134  break;
135  }
136 }
137 
138 void
140 {
141  if (delayQueue.size() == delayQueueSize) {
142  return;
143  }
144 
145  // Add the address to the delay queue and schedule an event to process
146  // it after the specified delay cycles
147  Tick process_tick = curTick() + delayTicks;
148 
149  delayQueue.push_back(DelayQueueEntry(x, process_tick));
150 
151  if (!delayQueueEvent.scheduled()) {
152  schedule(delayQueueEvent, process_tick);
153  }
154 }
155 
156 void
158 {
159  for (auto& it : offsetsList) {
160  it.second = 0;
161  }
162 }
163 
164 inline Addr
166 {
167  return (addr >> blkSize) & tagMask;
168 }
169 
170 bool
172 {
173  for (auto& it : rrLeft) {
174  if (it == addr) {
175  return true;
176  }
177  }
178 
179  for (auto& it : rrRight) {
180  if (it == addr) {
181  return true;
182  }
183  }
184 
185  return false;
186 }
187 
188 void
190 {
191  Addr offset_addr = (*offsetsListIterator).first;
192  Addr lookup_addr = x - offset_addr;
193 
194  // There was a hit in the RR table, increment the score for this offset
195  if (testRR(lookup_addr)) {
196  DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
197  (*offsetsListIterator).second++;
198  if ((*offsetsListIterator).second > bestScore) {
199  bestScore = (*offsetsListIterator).second;
200  phaseBestOffset = (*offsetsListIterator).first;
201  DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
202  }
203  }
204 
206 
207  // All the offsets in the list were visited meaning that a learning
208  // phase finished. Check if
209  if (offsetsListIterator == offsetsList.end()) {
211  round++;
212 
213  // Check if the best offset must be updated if:
214  // (1) One of the scores equals SCORE_MAX
215  // (2) The number of rounds equals ROUND_MAX
216  if ((bestScore >= scoreMax) || (round == roundMax)) {
218  round = 0;
219  bestScore = 0;
220  phaseBestOffset = 0;
221  resetScores();
222  issuePrefetchRequests = true;
223  } else if (bestScore <= badScore) {
224  issuePrefetchRequests = false;
225  }
226  }
227 }
228 
229 void
231  std::vector<AddrPriority> &addresses)
232 {
233  Addr addr = pfi.getAddr();
234  Addr tag_x = tag(addr);
235 
236  if (delayQueueEnabled) {
237  insertIntoDelayQueue(tag_x);
238  } else {
239  insertIntoRR(tag_x, RRWay::Left);
240  }
241 
242  // Go through the nth offset and update the score, the best score and the
243  // current best offset if a better one is found
244  bestOffsetLearning(tag_x);
245 
246  // This prefetcher is a degree 1 prefetch, so it will only generate one
247  // prefetch at most per access
248  if (issuePrefetchRequests) {
249  Addr prefetch_addr = addr + (bestOffset << lBlkSize);
250  addresses.push_back(AddrPriority(prefetch_addr, 0));
251  DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
252  }
253 }
254 
255 void
257 {
258  // Only insert into the RR right way if it's the pkt is a HWP
259  if (!pkt->cmd.isHWPrefetch()) return;
260 
261  Addr tag_y = tag(pkt->getAddr());
262 
263  if (issuePrefetchRequests) {
264  insertIntoRR(tag_y - bestOffset, RRWay::Right);
265  }
266 }
267 
268 } // namespace prefetch
269 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
bool isHWPrefetch() const
Definition: packet.hh:253
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
Addr getAddr() const
Definition: packet.hh:805
MemCmd cmd
The command field of the packet.
Definition: packet.hh:371
std::deque< DelayQueueEntry > delayQueue
Definition: bop.hh:97
void calculatePrefetch(const PrefetchInfo &pfi, std::vector< AddrPriority > &addresses) override
Definition: bop.cc:230
unsigned int round
Current round.
Definition: bop.hh:114
std::vector< Addr > rrLeft
Definition: bop.hh:75
unsigned int bestScore
Max score found so far.
Definition: bop.hh:112
const unsigned int delayQueueSize
Definition: bop.hh:72
void insertIntoDelayQueue(Addr addr)
Insert the specified address into the delay queue.
Definition: bop.cc:139
BOP(const BOPPrefetcherParams &p)
Definition: bop.cc:41
unsigned int hash(Addr addr, unsigned int way) const
Generate a hash for the specified address to index the RR table.
Definition: bop.cc:118
const unsigned int rrEntries
Recent requests table parameteres.
Definition: bop.hh:68
const unsigned int tagMask
Definition: bop.hh:69
std::vector< OffsetListEntry >::iterator offsetsListIterator
Current test offset index.
Definition: bop.hh:110
const unsigned int badScore
Definition: bop.hh:66
const bool delayQueueEnabled
Delay queue parameters.
Definition: bop.hh:71
const unsigned int scoreMax
Learning phase parameters.
Definition: bop.hh:64
std::vector< Addr > rrRight
Definition: bop.hh:76
bool issuePrefetchRequests
Hardware prefetcher enabled.
Definition: bop.hh:104
Addr tag(Addr addr) const
Generate the tag for the specified address based on the tag bits and the block size.
Definition: bop.cc:165
void delayQueueEventWrapper()
Event to handle the delay queue processing.
Definition: bop.cc:101
void bestOffsetLearning(Addr)
Learning phase of the BOP.
Definition: bop.cc:189
const unsigned int roundMax
Definition: bop.hh:65
void insertIntoRR(Addr addr, unsigned int way)
Insert the specified address into the RR table.
Definition: bop.cc:126
Addr phaseBestOffset
Current best offset found in the learning phase.
Definition: bop.hh:108
EventFunctionWrapper delayQueueEvent
Definition: bop.hh:101
void notifyFill(const PacketPtr &pkt) override
Update the RR right table after a prefetch fill.
Definition: bop.cc:256
std::vector< OffsetListEntry > offsetsList
Definition: bop.hh:80
bool testRR(Addr) const
Test if @X-O is hitting in the RR table to update the offset score.
Definition: bop.cc:171
void resetScores()
Reset all the scores from the offset list.
Definition: bop.cc:157
Addr bestOffset
Current best offset to issue prefetches.
Definition: bop.hh:106
const unsigned int delayTicks
Definition: bop.hh:73
Class containing the information needed by the prefetch to train and generate new prefetch requests.
Definition: base.hh:98
Addr getAddr() const
Obtains the address value of this Prefetcher address.
Definition: base.hh:125
unsigned blkSize
The block size of the parent cache.
Definition: base.hh:270
unsigned lBlkSize
log_2(block size of the parent cache).
Definition: base.hh:273
std::pair< Addr, int32_t > AddrPriority
Definition: queued.hh:191
STL vector class.
Definition: stl.hh:37
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition: intmath.hh:59
static constexpr bool isPowerOf2(const T &n)
Definition: intmath.hh:98
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
Bitfield< 31 > n
Definition: misc_types.hh:462
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 4 > x
Definition: pagetable.hh:61
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi)
In a first implementation of the BO prefetcher, both banks of the RR were written simultaneously when...
Definition: bop.hh:89
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:36 for gem5 by doxygen 1.9.1