gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
bop.cc
Go to the documentation of this file.
1 
30 
31 #include "debug/HWPrefetch.hh"
32 #include "params/BOPPrefetcher.hh"
33 
34 namespace gem5
35 {
36 
37 namespace prefetch
38 {
39 
40 BOP::BOP(const BOPPrefetcherParams &p)
41  : Queued(p),
42  scoreMax(p.score_max), roundMax(p.round_max),
43  badScore(p.bad_score), rrEntries(p.rr_size),
44  tagMask((1 << p.tag_bits) - 1),
45  delayQueueEnabled(p.delay_queue_enable),
46  delayQueueSize(p.delay_queue_size),
47  delayTicks(cyclesToTicks(p.delay_queue_cycles)),
48  delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
49  issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
50  bestScore(0), round(0)
51 {
52  if (!isPowerOf2(rrEntries)) {
53  fatal("%s: number of RR entries is not power of 2\n", name());
54  }
55  if (!isPowerOf2(blkSize)) {
56  fatal("%s: cache line size is not power of 2\n", name());
57  }
58  if (!(p.negative_offsets_enable && (p.offset_list_size % 2 == 0))) {
59  fatal("%s: negative offsets enabled with odd offset list size\n",
60  name());
61  }
62 
63  rrLeft.resize(rrEntries);
64  rrRight.resize(rrEntries);
65 
66  // Following the paper implementation, a list with the specified number
67  // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
68  const int factors[] = { 2, 3, 5 };
69  unsigned int i = 0;
70  int64_t offset_i = 1;
71 
72  while (i < p.offset_list_size)
73  {
74  int64_t offset = offset_i;
75 
76  for (int n : factors) {
77  while ((offset % n) == 0) {
78  offset /= n;
79  }
80  }
81 
82  if (offset == 1) {
83  offsetsList.push_back(OffsetListEntry(offset_i, 0));
84  i++;
85  // If we want to use negative offsets, add also the negative value
86  // of the offset just calculated
87  if (p.negative_offsets_enable) {
88  offsetsList.push_back(OffsetListEntry(-offset_i, 0));
89  i++;
90  }
91  }
92 
93  offset_i++;
94  }
95 
96  offsetsListIterator = offsetsList.begin();
97 }
98 
99 void
101 {
102  while (!delayQueue.empty() &&
103  delayQueue.front().processTick <= curTick())
104  {
105  Addr addr_x = delayQueue.front().baseAddr;
106  insertIntoRR(addr_x, RRWay::Left);
107  delayQueue.pop_front();
108  }
109 
110  // Schedule an event for the next element if there is one
111  if (!delayQueue.empty()) {
112  schedule(delayQueueEvent, delayQueue.front().processTick);
113  }
114 }
115 
116 unsigned int
117 BOP::hash(Addr addr, unsigned int way) const
118 {
119  Addr hash1 = addr >> way;
120  Addr hash2 = hash1 >> floorLog2(rrEntries);
121  return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
122 }
123 
124 void
125 BOP::insertIntoRR(Addr addr, unsigned int way)
126 {
127  switch (way) {
128  case RRWay::Left:
129  rrLeft[hash(addr, RRWay::Left)] = addr;
130  break;
131  case RRWay::Right:
132  rrRight[hash(addr, RRWay::Right)] = addr;
133  break;
134  }
135 }
136 
137 void
139 {
140  if (delayQueue.size() == delayQueueSize) {
141  return;
142  }
143 
144  // Add the address to the delay queue and schedule an event to process
145  // it after the specified delay cycles
146  Tick process_tick = curTick() + delayTicks;
147 
148  delayQueue.push_back(DelayQueueEntry(x, process_tick));
149 
150  if (!delayQueueEvent.scheduled()) {
151  schedule(delayQueueEvent, process_tick);
152  }
153 }
154 
155 void
157 {
158  for (auto& it : offsetsList) {
159  it.second = 0;
160  }
161 }
162 
163 inline Addr
165 {
166  return (addr >> blkSize) & tagMask;
167 }
168 
169 bool
171 {
172  for (auto& it : rrLeft) {
173  if (it == addr) {
174  return true;
175  }
176  }
177 
178  for (auto& it : rrRight) {
179  if (it == addr) {
180  return true;
181  }
182  }
183 
184  return false;
185 }
186 
187 void
189 {
190  Addr offset_addr = (*offsetsListIterator).first;
191  Addr lookup_addr = x - offset_addr;
192 
193  // There was a hit in the RR table, increment the score for this offset
194  if (testRR(lookup_addr)) {
195  DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
196  (*offsetsListIterator).second++;
197  if ((*offsetsListIterator).second > bestScore) {
198  bestScore = (*offsetsListIterator).second;
199  phaseBestOffset = (*offsetsListIterator).first;
200  DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
201  }
202  }
203 
205 
206  // All the offsets in the list were visited meaning that a learning
207  // phase finished. Check if
208  if (offsetsListIterator == offsetsList.end()) {
210  round++;
211 
212  // Check if the best offset must be updated if:
213  // (1) One of the scores equals SCORE_MAX
214  // (2) The number of rounds equals ROUND_MAX
215  if ((bestScore >= scoreMax) || (round == roundMax)) {
217  round = 0;
218  bestScore = 0;
219  phaseBestOffset = 0;
220  resetScores();
221  issuePrefetchRequests = true;
222  } else if (bestScore <= badScore) {
223  issuePrefetchRequests = false;
224  }
225  }
226 }
227 
228 void
230  std::vector<AddrPriority> &addresses)
231 {
232  Addr addr = pfi.getAddr();
233  Addr tag_x = tag(addr);
234 
235  if (delayQueueEnabled) {
236  insertIntoDelayQueue(tag_x);
237  } else {
238  insertIntoRR(tag_x, RRWay::Left);
239  }
240 
241  // Go through the nth offset and update the score, the best score and the
242  // current best offset if a better one is found
243  bestOffsetLearning(tag_x);
244 
245  // This prefetcher is a degree 1 prefetch, so it will only generate one
246  // prefetch at most per access
247  if (issuePrefetchRequests) {
248  Addr prefetch_addr = addr + (bestOffset << lBlkSize);
249  addresses.push_back(AddrPriority(prefetch_addr, 0));
250  DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
251  }
252 }
253 
254 void
256 {
257  // Only insert into the RR right way if it's the pkt is a HWP
258  if (!pkt->cmd.isHWPrefetch()) return;
259 
260  Addr tag_y = tag(pkt->getAddr());
261 
262  if (issuePrefetchRequests) {
263  insertIntoRR(tag_y - bestOffset, RRWay::Right);
264  }
265 }
266 
267 } // namespace prefetch
268 } // namespace gem5
gem5::prefetch::Base::PrefetchInfo::getAddr
Addr getAddr() const
Obtains the address value of this Prefetcher address.
Definition: base.hh:124
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:200
gem5::prefetch::BOP::badScore
const unsigned int badScore
Definition: bop.hh:65
gem5::prefetch::BOP::bestOffset
Addr bestOffset
Current best offset to issue prefetches.
Definition: bop.hh:105
gem5::prefetch::BOP::bestScore
unsigned int bestScore
Max score found so far.
Definition: bop.hh:111
gem5::prefetch::BOP::delayQueue
std::deque< DelayQueueEntry > delayQueue
Definition: bop.hh:96
gem5::prefetch::BOP::insertIntoDelayQueue
void insertIntoDelayQueue(Addr addr)
Insert the specified address into the delay queue.
Definition: bop.cc:138
gem5::prefetch::Queued::AddrPriority
std::pair< Addr, int32_t > AddrPriority
Definition: queued.hh:190
gem5::prefetch::BOP::tagMask
const unsigned int tagMask
Definition: bop.hh:68
gem5::prefetch::BOP::offsetsList
std::vector< OffsetListEntry > offsetsList
Definition: bop.hh:79
gem5::prefetch::BOP::DelayQueueEntry
In a first implementation of the BO prefetcher, both banks of the RR were written simultaneously when...
Definition: bop.hh:87
gem5::floorLog2
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition: intmath.hh:59
gem5::prefetch::BOP::hash
unsigned int hash(Addr addr, unsigned int way) const
Generate a hash for the specified address to index the RR table.
Definition: bop.cc:117
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1012
std::vector
STL vector class.
Definition: stl.hh:37
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::isPowerOf2
static constexpr bool isPowerOf2(const T &n)
Definition: intmath.hh:98
gem5::prefetch::BOP::insertIntoRR
void insertIntoRR(Addr addr, unsigned int way)
Insert the specified address into the RR table.
Definition: bop.cc:125
gem5::prefetch::BOP::calculatePrefetch
void calculatePrefetch(const PrefetchInfo &pfi, std::vector< AddrPriority > &addresses) override
Definition: bop.cc:229
bop.hh
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:210
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
gem5::prefetch::BOP::notifyFill
void notifyFill(const PacketPtr &pkt) override
Update the RR right table after a prefetch fill.
Definition: bop.cc:255
gem5::prefetch::BOP::BOP
BOP(const BOPPrefetcherParams &p)
Definition: bop.cc:40
gem5::prefetch::BOP::delayTicks
const unsigned int delayTicks
Definition: bop.hh:72
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::VegaISA::x
Bitfield< 4 > x
Definition: pagetable.hh:61
gem5::prefetch::BOP::scoreMax
const unsigned int scoreMax
Learning phase parameters.
Definition: bop.hh:63
gem5::MemCmd::isHWPrefetch
bool isHWPrefetch() const
Definition: packet.hh:254
gem5::prefetch::BOP::phaseBestOffset
Addr phaseBestOffset
Current best offset found in the learning phase.
Definition: bop.hh:107
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::prefetch::BOP::delayQueueSize
const unsigned int delayQueueSize
Definition: bop.hh:71
gem5::prefetch::BOP::rrRight
std::vector< Addr > rrRight
Definition: bop.hh:75
gem5::prefetch::BOP::testRR
bool testRR(Addr) const
Test if @X-O is hitting in the RR table to update the offset score.
Definition: bop.cc:170
gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:372
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::prefetch::BOP::delayQueueEventWrapper
void delayQueueEventWrapper()
Event to handle the delay queue processing.
Definition: bop.cc:100
name
const std::string & name()
Definition: trace.cc:48
gem5::prefetch::Queued
Definition: queued.hh:59
gem5::prefetch::BOP::rrEntries
const unsigned int rrEntries
Recent requests table parameteres.
Definition: bop.hh:67
gem5::prefetch::Base::lBlkSize
unsigned lBlkSize
log_2(block size of the parent cache).
Definition: base.hh:272
gem5::prefetch::BOP::roundMax
const unsigned int roundMax
Definition: bop.hh:64
gem5::prefetch::BOP::delayQueueEvent
EventFunctionWrapper delayQueueEvent
Definition: bop.hh:100
gem5::ArmISA::n
Bitfield< 31 > n
Definition: misc_types.hh:513
gem5::prefetch::BOP::resetScores
void resetScores()
Reset all the scores from the offset list.
Definition: bop.cc:156
gem5::prefetch::BOP::delayQueueEnabled
const bool delayQueueEnabled
Delay queue parameters.
Definition: bop.hh:70
gem5::prefetch::BOP::round
unsigned int round
Current round.
Definition: bop.hh:113
gem5::prefetch::BOP::bestOffsetLearning
void bestOffsetLearning(Addr)
Learning phase of the BOP.
Definition: bop.cc:188
gem5::prefetch::Base::blkSize
unsigned blkSize
The block size of the parent cache.
Definition: base.hh:269
gem5::Packet::getAddr
Addr getAddr() const
Definition: packet.hh:807
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::prefetch::BOP::rrLeft
std::vector< Addr > rrLeft
Definition: bop.hh:74
gem5::prefetch::BOP::offsetsListIterator
std::vector< OffsetListEntry >::iterator offsetsListIterator
Current test offset index.
Definition: bop.hh:109
gem5::prefetch::BOP::tag
Addr tag(Addr addr) const
Generate the tag for the specified address based on the tag bits and the block size.
Definition: bop.cc:164
gem5::prefetch::Base::PrefetchInfo
Class containing the information needed by the prefetch to train and generate new prefetch requests.
Definition: base.hh:96
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:458
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::prefetch::BOP::issuePrefetchRequests
bool issuePrefetchRequests
Hardware prefetcher enabled.
Definition: bop.hh:103

Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17