gem5  v20.1.0.0
bop.cc
Go to the documentation of this file.
1 
30 
31 #include "debug/HWPrefetch.hh"
32 #include "params/BOPPrefetcher.hh"
33 
34 namespace Prefetcher {
35 
36 BOP::BOP(const BOPPrefetcherParams *p)
37  : Queued(p),
38  scoreMax(p->score_max), roundMax(p->round_max),
39  badScore(p->bad_score), rrEntries(p->rr_size),
40  tagMask((1 << p->tag_bits) - 1),
41  delayQueueEnabled(p->delay_queue_enable),
42  delayQueueSize(p->delay_queue_size),
43  delayTicks(cyclesToTicks(p->delay_queue_cycles)),
44  delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
45  issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
46  bestScore(0), round(0)
47 {
48  if (!isPowerOf2(rrEntries)) {
49  fatal("%s: number of RR entries is not power of 2\n", name());
50  }
51  if (!isPowerOf2(blkSize)) {
52  fatal("%s: cache line size is not power of 2\n", name());
53  }
54  if (!(p->negative_offsets_enable && (p->offset_list_size % 2 == 0))) {
55  fatal("%s: negative offsets enabled with odd offset list size\n",
56  name());
57  }
58 
59  rrLeft.resize(rrEntries);
60  rrRight.resize(rrEntries);
61 
62  // Following the paper implementation, a list with the specified number
63  // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
64  const int factors[] = { 2, 3, 5 };
65  unsigned int i = 0;
66  int64_t offset_i = 1;
67 
68  while (i < p->offset_list_size)
69  {
70  int64_t offset = offset_i;
71 
72  for (int n : factors) {
73  while ((offset % n) == 0) {
74  offset /= n;
75  }
76  }
77 
78  if (offset == 1) {
79  offsetsList.push_back(OffsetListEntry(offset_i, 0));
80  i++;
81  // If we want to use negative offsets, add also the negative value
82  // of the offset just calculated
83  if (p->negative_offsets_enable) {
84  offsetsList.push_back(OffsetListEntry(-offset_i, 0));
85  i++;
86  }
87  }
88 
89  offset_i++;
90  }
91 
92  offsetsListIterator = offsetsList.begin();
93 }
94 
95 void
97 {
98  while (!delayQueue.empty() &&
99  delayQueue.front().processTick <= curTick())
100  {
101  Addr addr_x = delayQueue.front().baseAddr;
102  insertIntoRR(addr_x, RRWay::Left);
103  delayQueue.pop_front();
104  }
105 
106  // Schedule an event for the next element if there is one
107  if (!delayQueue.empty()) {
108  schedule(delayQueueEvent, delayQueue.front().processTick);
109  }
110 }
111 
112 unsigned int
113 BOP::hash(Addr addr, unsigned int way) const
114 {
115  Addr hash1 = addr >> way;
116  Addr hash2 = hash1 >> floorLog2(rrEntries);
117  return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
118 }
119 
120 void
121 BOP::insertIntoRR(Addr addr, unsigned int way)
122 {
123  switch (way) {
124  case RRWay::Left:
125  rrLeft[hash(addr, RRWay::Left)] = addr;
126  break;
127  case RRWay::Right:
128  rrRight[hash(addr, RRWay::Right)] = addr;
129  break;
130  }
131 }
132 
133 void
135 {
136  if (delayQueue.size() == delayQueueSize) {
137  return;
138  }
139 
140  // Add the address to the delay queue and schedule an event to process
141  // it after the specified delay cycles
142  Tick process_tick = curTick() + delayTicks;
143 
144  delayQueue.push_back(DelayQueueEntry(x, process_tick));
145 
146  if (!delayQueueEvent.scheduled()) {
147  schedule(delayQueueEvent, process_tick);
148  }
149 }
150 
151 void
153 {
154  for (auto& it : offsetsList) {
155  it.second = 0;
156  }
157 }
158 
159 inline Addr
161 {
162  return (addr >> blkSize) & tagMask;
163 }
164 
165 bool
167 {
168  for (auto& it : rrLeft) {
169  if (it == addr) {
170  return true;
171  }
172  }
173 
174  for (auto& it : rrRight) {
175  if (it == addr) {
176  return true;
177  }
178  }
179 
180  return false;
181 }
182 
183 void
185 {
186  Addr offset_addr = (*offsetsListIterator).first;
187  Addr lookup_addr = x - offset_addr;
188 
189  // There was a hit in the RR table, increment the score for this offset
190  if (testRR(lookup_addr)) {
191  DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
192  (*offsetsListIterator).second++;
193  if ((*offsetsListIterator).second > bestScore) {
194  bestScore = (*offsetsListIterator).second;
195  phaseBestOffset = (*offsetsListIterator).first;
196  DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
197  }
198  }
199 
201 
202  // All the offsets in the list were visited meaning that a learning
203  // phase finished. Check if
204  if (offsetsListIterator == offsetsList.end()) {
206  round++;
207 
208  // Check if the best offset must be updated if:
209  // (1) One of the scores equals SCORE_MAX
210  // (2) The number of rounds equals ROUND_MAX
211  if ((bestScore >= scoreMax) || (round == roundMax)) {
213  round = 0;
214  bestScore = 0;
215  phaseBestOffset = 0;
216  resetScores();
217  issuePrefetchRequests = true;
218  } else if (phaseBestOffset <= badScore) {
219  issuePrefetchRequests = false;
220  }
221  }
222 }
223 
224 void
226  std::vector<AddrPriority> &addresses)
227 {
228  Addr addr = pfi.getAddr();
229  Addr tag_x = tag(addr);
230 
231  if (delayQueueEnabled) {
232  insertIntoDelayQueue(tag_x);
233  } else {
234  insertIntoRR(tag_x, RRWay::Left);
235  }
236 
237  // Go through the nth offset and update the score, the best score and the
238  // current best offset if a better one is found
239  bestOffsetLearning(tag_x);
240 
241  // This prefetcher is a degree 1 prefetch, so it will only generate one
242  // prefetch at most per access
243  if (issuePrefetchRequests) {
244  Addr prefetch_addr = addr + (bestOffset << lBlkSize);
245  addresses.push_back(AddrPriority(prefetch_addr, 0));
246  DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
247  }
248 }
249 
250 void
252 {
253  // Only insert into the RR right way if it's the pkt is a HWP
254  if (!pkt->cmd.isHWPrefetch()) return;
255 
256  Addr tag_y = tag(pkt->getAddr());
257 
258  if (issuePrefetchRequests) {
259  insertIntoRR(tag_y - bestOffset, RRWay::Right);
260  }
261 }
262 
263 } // namespace Prefetcher
264 
266 BOPPrefetcherParams::create()
267 {
268  return new Prefetcher::BOP(this);
269 }
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
Prefetcher::Queued::AddrPriority
std::pair< Addr, int32_t > AddrPriority
Definition: queued.hh:182
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
Prefetcher::BOP::delayQueueEventWrapper
void delayQueueEventWrapper()
Event to handle the delay queue processing.
Definition: bop.cc:96
Prefetcher::BOP::tag
Addr tag(Addr addr) const
Generate the tag for the specified address based on the tag bits and the block size.
Definition: bop.cc:160
Prefetcher::BOP::offsetsList
std::vector< OffsetListEntry > offsetsList
Definition: bop.hh:74
Packet::getAddr
Addr getAddr() const
Definition: packet.hh:754
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
Prefetcher::Base::blkSize
unsigned blkSize
The block size of the parent cache.
Definition: base.hh:262
Prefetcher::BOP::bestOffset
Addr bestOffset
Current best offset to issue prefetches.
Definition: bop.hh:100
Prefetcher::BOP::roundMax
const unsigned int roundMax
Definition: bop.hh:59
Prefetcher::BOP::delayQueueEvent
EventFunctionWrapper delayQueueEvent
Definition: bop.hh:95
Prefetcher::BOP::delayQueueSize
const unsigned int delayQueueSize
Definition: bop.hh:66
Prefetcher::BOP::calculatePrefetch
void calculatePrefetch(const PrefetchInfo &pfi, std::vector< AddrPriority > &addresses) override
Definition: bop.cc:225
Prefetcher::BOP::hash
unsigned int hash(Addr addr, unsigned int way) const
Generate a hash for the specified address to index the RR table.
Definition: bop.cc:113
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
Prefetcher::BOP::delayTicks
const unsigned int delayTicks
Definition: bop.hh:67
Prefetcher::BOP::issuePrefetchRequests
bool issuePrefetchRequests
Hardware prefetcher enabled.
Definition: bop.hh:98
std::vector
STL vector class.
Definition: stl.hh:37
floorLog2
std::enable_if< std::is_integral< T >::value, int >::type floorLog2(T x)
Definition: intmath.hh:63
Prefetcher::BOP::insertIntoRR
void insertIntoRR(Addr addr, unsigned int way)
Insert the specified address into the RR table.
Definition: bop.cc:121
ArmISA::n
Bitfield< 31 > n
Definition: miscregs_types.hh:450
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
bop.hh
Prefetcher::BOP::tagMask
const unsigned int tagMask
Definition: bop.hh:63
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
Prefetcher::Base::lBlkSize
unsigned lBlkSize
log_2(block size of the parent cache).
Definition: base.hh:265
Prefetcher::BOP::testRR
bool testRR(Addr) const
Test if @X-O is hitting in the RR table to update the offset score.
Definition: bop.cc:166
Prefetcher::BOP::rrRight
std::vector< Addr > rrRight
Definition: bop.hh:70
Prefetcher
Copyright (c) 2018 Metempsy Technology Consulting All rights reserved.
Definition: base.hh:78
Prefetcher::BOP::round
unsigned int round
Current round.
Definition: bop.hh:108
Prefetcher::BOP::bestOffsetLearning
void bestOffsetLearning(Addr)
Learning phase of the BOP.
Definition: bop.cc:184
Prefetcher::BOP::scoreMax
const unsigned int scoreMax
Learning phase parameters.
Definition: bop.hh:58
RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:69
Prefetcher::BOP
Definition: bop.hh:48
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Prefetcher::BOP::DelayQueueEntry
In a first implementation of the BO prefetcher, both banks of the RR were written simultaneously when...
Definition: bop.hh:82
Prefetcher::Queued
Definition: queued.hh:54
name
const std::string & name()
Definition: trace.cc:50
Prefetcher::BOP::badScore
const unsigned int badScore
Definition: bop.hh:60
Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:335
Prefetcher::BOP::notifyFill
void notifyFill(const PacketPtr &pkt) override
Update the RR right table after a prefetch fill.
Definition: bop.cc:251
Prefetcher::BOP::offsetsListIterator
std::vector< OffsetListEntry >::iterator offsetsListIterator
Current test offset index.
Definition: bop.hh:104
Prefetcher::BOP::bestScore
unsigned int bestScore
Max score found so far.
Definition: bop.hh:106
Prefetcher::BOP::rrEntries
const unsigned int rrEntries
Recent requests table parameteres.
Definition: bop.hh:62
Prefetcher::BOP::phaseBestOffset
Addr phaseBestOffset
Current best offset found in the learning phase.
Definition: bop.hh:102
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
addr
ip6_addr_t addr
Definition: inet.hh:423
Prefetcher::Base::PrefetchInfo
Class containing the information needed by the prefetch to train and generate new prefetch requests.
Definition: base.hh:90
Prefetcher::BOP::delayQueue
std::deque< DelayQueueEntry > delayQueue
Definition: bop.hh:91
MemCmd::isHWPrefetch
bool isHWPrefetch() const
Definition: packet.hh:225
Prefetcher::BOP::delayQueueEnabled
const bool delayQueueEnabled
Delay queue parameters.
Definition: bop.hh:65
Prefetcher::BOP::resetScores
void resetScores()
Reset all the scores from the offset list.
Definition: bop.cc:152
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Prefetcher::BOP::BOP
BOP(const BOPPrefetcherParams *p)
Definition: bop.cc:36
Prefetcher::Base::PrefetchInfo::getAddr
Addr getAddr() const
Obtains the address value of this Prefetcher address.
Definition: base.hh:117
isPowerOf2
bool isPowerOf2(const T &n)
Definition: intmath.hh:102
Prefetcher::BOP::rrLeft
std::vector< Addr > rrLeft
Definition: bop.hh:69
Prefetcher::BOP::insertIntoDelayQueue
void insertIntoDelayQueue(Addr addr)
Insert the specified address into the delay queue.
Definition: bop.cc:134
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17