gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
bop.cc
Go to the documentation of this file.
1
31
32#include "debug/HWPrefetch.hh"
33#include "params/BOPPrefetcher.hh"
34
35namespace gem5
36{
37
38namespace prefetch
39{
40BOP::BOP(const BOPPrefetcherParams &p)
41 : Queued(p),
42 scoreMax(p.score_max), roundMax(p.round_max),
43 badScore(p.bad_score), rrEntries(p.rr_size),
44 tagMask((1 << p.tag_bits) - 1),
45 delayQueueEnabled(p.delay_queue_enable),
46 delayQueueSize(p.delay_queue_size),
47 delayTicks(cyclesToTicks(p.delay_queue_cycles)),
48 delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
49 issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
50 bestScore(0), round(0), degree(p.degree)
51{
52 if (!isPowerOf2(rrEntries)) {
53 fatal("%s: number of RR entries is not power of 2\n", name());
54 }
55 if (!isPowerOf2(blkSize)) {
56 fatal("%s: cache line size is not power of 2\n", name());
57 }
58 if (p.negative_offsets_enable && (p.offset_list_size % 2 != 0)) {
59 fatal("%s: negative offsets enabled with odd offset list size\n",
60 name());
61 }
62 if (p.degree <= 0) {
63 fatal("%s: prefetch degree must be strictly greater than zero\n",
64 name());
65 }
66
67 rrLeft.resize(rrEntries);
68 rrRight.resize(rrEntries);
69
70 /*
71 * Following the paper implementation, a list with the specified number
72 * of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
73 */
74 const int factors[] = { 2, 3, 5 };
75 unsigned int i = 0;
76 int64_t offset_i = 1;
77
78 while (i < p.offset_list_size)
79 {
80 int64_t offset = offset_i;
81
82 for (int n : factors) {
83 while ((offset % n) == 0) {
84 offset /= n;
85 }
86 }
87
88 if (offset == 1) {
89 offsetsList.push_back(OffsetListEntry(offset_i, 0));
90 i++;
91 /*
92 * If we want to use negative offsets, add also the negative value
93 * of the offset just calculated
94 */
95 if (p.negative_offsets_enable) {
96 offsetsList.push_back(OffsetListEntry(-offset_i, 0));
97 i++;
98 }
99 }
100
101 offset_i++;
102 }
103
104 offsetsListIterator = offsetsList.begin();
105}
106
107void
109{
110 while (!delayQueue.empty() &&
111 delayQueue.front().processTick <= curTick())
112 {
113 Addr addr_x = delayQueue.front().baseAddr;
114 Addr addr_tag = tag(addr_x);
115 insertIntoRR(addr_x, addr_tag, RRWay::Left);
116 delayQueue.pop_front();
117 }
118
119 // Schedule an event for the next element if there is one
120 if (!delayQueue.empty()) {
121 schedule(delayQueueEvent, delayQueue.front().processTick);
122 }
123}
124
125unsigned int
126BOP::index(Addr addr, unsigned int way) const
127{
128 /*
129 * The second parameter, way, is set to 0 for indexing the left side of the
130 * RR Table and, it is set to 1 for indexing the right side of the RR
131 * Table. This is because we always pass the enum RRWay as the way argument
132 * while calling index. This enum is defined in the bop.hh file.
133 *
134 * The indexing function in the author's ChampSim code, which can be found
135 * here: https://comparch-conf.gatech.edu/dpc2/final_program.html, computes
136 * the hash as follows:
137 *
138 * 1. For indexing the left side of the RR Table (way = 0), the cache line
139 * address is XORed with itself after right shifting it by the log base
140 * 2 of the number of entries in the RR Table.
141 * 2. For indexing the right side of the RR Table (way = 1), the cache
142 * line address is XORed with itself after right shifting it by the log
143 * base 2 of the number of entries in the RR Table, multiplied by two.
144 *
145 * Therefore, we if just left shift the log base 2 of the number of RR
146 * entries (log_rr_entries) with the parameter way, then if we are indexing
147 * the left side, we'll leave log_rr_entries as it is, but if we are
148 * indexing the right side, we'll multiply it with 2. Now once we have the
149 * result of this operation, we can right shift the cache line address
150 * (line_addr) by this value to get the first operand of the final XOR
151 * operation. The second operand of the XOR operation is line_addr itself
152 */
153 Addr log_rr_entries = floorLog2(rrEntries);
154 Addr line_addr = addr >> lBlkSize;
155 Addr hash = line_addr ^ (line_addr >> (log_rr_entries << way));
156 hash &= ((1ULL << log_rr_entries) - 1);
157 return hash % rrEntries;
158}
159
160void
161BOP::insertIntoRR(Addr addr, Addr tag, unsigned int way)
162{
163 switch (way) {
164 case RRWay::Left:
166 break;
167 case RRWay::Right:
169 break;
170 }
171}
172
173void
175{
176 if (delayQueue.size() == delayQueueSize) {
177 return;
178 }
179
180 /*
181 * Add the address to the delay queue and schedule an event to process
182 * it after the specified delay cycles
183 */
184 Tick process_tick = curTick() + delayTicks;
185
186 delayQueue.push_back(DelayQueueEntry(x, process_tick));
187
188 if (!delayQueueEvent.scheduled()) {
189 schedule(delayQueueEvent, process_tick);
190 }
191}
192
193void
195{
196 for (auto& it : offsetsList) {
197 it.second = 0;
198 }
199}
200
201inline Addr
203{
204 return (addr >> lBlkSize) & tagMask;
205}
206
207bool
209{
210 for (auto& it : rrLeft) {
211 if (it == addr) {
212 return true;
213 }
214 }
215
216 for (auto& it : rrRight) {
217 if (it == addr) {
218 return true;
219 }
220 }
221
222 return false;
223}
224
225void
227{
228 Addr offset_tag = (*offsetsListIterator).first;
229
230 /*
231 * Compute the lookup tag for the RR table. Since addr_tag is a tag value,
232 * and not an address, subtracting the offset from addr_tag may result in
233 * integer underflow. Therefore, we first convert the tag back to address
234 * by right shifting it, and then subtract the offset. This gives us a
235 * new lookup address which we use to compute the lookup tag
236 */
237 Addr lookup_tag = tag((addr_tag << lBlkSize) - (offset_tag << lBlkSize));
238
239 // There was a hit in the RR table, increment the score for this offset
240 if (testRR(lookup_tag)) {
241 DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n",
242 lookup_tag);
243 (*offsetsListIterator).second++;
244 if ((*offsetsListIterator).second > bestScore) {
245 bestScore = (*offsetsListIterator).second;
246 phaseBestOffset = (*offsetsListIterator).first;
247 DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
248 }
249 }
250
251 // Move the offset iterator forward to prepare for the next time
253
254 /*
255 * All the offsets in the list were visited meaning that a learning
256 * phase finished. Check if
257 */
258 if (offsetsListIterator == offsetsList.end()) {
260 round++;
261 }
262
263 // Check if its time to re-calculate the best offset
264 if ((bestScore >= scoreMax) || (round >= roundMax)) {
265 round = 0;
266
267 /*
268 * If the current best score (bestScore) has exceed the threshold to
269 * enable prefetching (badScore), reset the learning structures and
270 * enable prefetch generation
271 */
272 if (bestScore > badScore) {
274 round = 0;
276 } else {
277 issuePrefetchRequests = false;
278 }
279 resetScores();
280 bestScore = 0;
281 phaseBestOffset = 0;
282 }
283}
284
285void
287 std::vector<AddrPriority> &addresses,
288 const CacheAccessor &cache)
289{
290 Addr addr = pfi.getAddr();
291 Addr tag_x = tag(addr);
292
293 if (delayQueueEnabled) {
295 } else {
297 }
298
299 /*
300 * Go through the nth offset and update the score, the best score and the
301 * current best offset if a better one is found
302 */
304
306 for (int i = 1; i <= degree; i++) {
307 Addr prefetch_addr = addr + ((i * bestOffset) << lBlkSize);
308 addresses.push_back(AddrPriority(prefetch_addr, 0));
309 DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
310 }
311 }
312}
313
314void
316{
317 const PacketPtr& pkt = arg.pkt;
318
319 // Only insert into the RR right way if it's the pkt is a hardware prefetch
320 if (!pkt->cmd.isHWPrefetch()) return;
321
322 Addr addr = pkt->getAddr();
323 Addr tag_y = tag(addr);
324
327 }
328}
329
330} // namespace prefetch
331} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Information provided to probes on a cache event.
PacketPtr pkt
Packet that triggered the cache access.
bool isHWPrefetch() const
Definition packet.hh:254
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
std::deque< DelayQueueEntry > delayQueue
Definition bop.hh:97
void calculatePrefetch(const PrefetchInfo &pfi, std::vector< AddrPriority > &addresses, const CacheAccessor &cache) override
Definition bop.cc:286
unsigned int round
Current round.
Definition bop.hh:114
std::vector< Addr > rrLeft
Definition bop.hh:75
unsigned int bestScore
Max score found so far.
Definition bop.hh:112
unsigned int index(Addr addr, unsigned int way) const
Generate a hash for the specified address to index the RR table.
Definition bop.cc:126
const unsigned int delayQueueSize
Definition bop.hh:72
void insertIntoDelayQueue(Addr addr)
Insert the specified address into the delay queue.
Definition bop.cc:174
BOP(const BOPPrefetcherParams &p)
Definition bop.cc:40
const unsigned int rrEntries
Recent requests table parameteres.
Definition bop.hh:68
const unsigned int tagMask
Definition bop.hh:69
std::vector< OffsetListEntry >::iterator offsetsListIterator
Current test offset index.
Definition bop.hh:110
const unsigned int badScore
Definition bop.hh:66
const bool delayQueueEnabled
Delay queue parameters.
Definition bop.hh:71
const unsigned int scoreMax
Learning phase parameters.
Definition bop.hh:64
std::vector< Addr > rrRight
Definition bop.hh:76
bool issuePrefetchRequests
Hardware prefetcher enabled.
Definition bop.hh:104
Addr tag(Addr addr) const
Generate the tag for the specified address based on the tag bits and the block size.
Definition bop.cc:202
void delayQueueEventWrapper()
Event to handle the delay queue processing.
Definition bop.cc:108
void bestOffsetLearning(Addr)
Learning phase of the BOP.
Definition bop.cc:226
const unsigned int roundMax
Definition bop.hh:65
Addr phaseBestOffset
Current best offset found in the learning phase.
Definition bop.hh:108
EventFunctionWrapper delayQueueEvent
Definition bop.hh:101
void notifyFill(const CacheAccessProbeArg &arg) override
Update the RR right table after a prefetch fill.
Definition bop.cc:315
unsigned int degree
The prefetch degree, i.e.
Definition bop.hh:157
std::vector< OffsetListEntry > offsetsList
Definition bop.hh:80
bool testRR(Addr) const
Test if @X-O is hitting in the RR table to update the offset score.
Definition bop.cc:208
void insertIntoRR(Addr addr, Addr addr_tag, unsigned int way)
Insert the specified address into the RR table.
Definition bop.cc:161
void resetScores()
Reset all the scores from the offset list.
Definition bop.cc:194
Addr bestOffset
Current best offset to issue prefetches.
Definition bop.hh:106
const unsigned int delayTicks
Definition bop.hh:73
Class containing the information needed by the prefetch to train and generate new prefetch requests.
Definition base.hh:111
Addr getAddr() const
Obtains the address value of this Prefetcher address.
Definition base.hh:138
unsigned lBlkSize
log_2(block size of the parent cache).
Definition base.hh:289
std::pair< Addr, int32_t > AddrPriority
Definition queued.hh:192
STL vector class.
Definition stl.hh:37
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition intmath.hh:59
static constexpr bool isPowerOf2(const T &n)
Definition intmath.hh:98
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
Bitfield< 31 > n
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 30, 0 > index
Bitfield< 0 > p
Bitfield< 3 > x
Definition pagetable.hh:74
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
Provides generic cache lookup functions.
In a first implementation of the BO prefetcher, both banks of the RR were written simultaneously when...
Definition bop.hh:89
const std::string & name()
Definition trace.cc:48

Generated on Mon Jan 13 2025 04:28:38 for gem5 by doxygen 1.9.8