gem5 v24.0.0.0
Loading...
Searching...
No Matches
bop.cc
Go to the documentation of this file.
1
30
31#include "debug/HWPrefetch.hh"
32#include "params/BOPPrefetcher.hh"
33
34namespace gem5
35{
36
37namespace prefetch
38{
39
40BOP::BOP(const BOPPrefetcherParams &p)
41 : Queued(p),
42 scoreMax(p.score_max), roundMax(p.round_max),
43 badScore(p.bad_score), rrEntries(p.rr_size),
44 tagMask((1 << p.tag_bits) - 1),
45 delayQueueEnabled(p.delay_queue_enable),
46 delayQueueSize(p.delay_queue_size),
47 delayTicks(cyclesToTicks(p.delay_queue_cycles)),
48 delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
49 issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
50 bestScore(0), round(0)
51{
52 if (!isPowerOf2(rrEntries)) {
53 fatal("%s: number of RR entries is not power of 2\n", name());
54 }
55 if (!isPowerOf2(blkSize)) {
56 fatal("%s: cache line size is not power of 2\n", name());
57 }
58 if (!(p.negative_offsets_enable && (p.offset_list_size % 2 == 0))) {
59 fatal("%s: negative offsets enabled with odd offset list size\n",
60 name());
61 }
62
63 rrLeft.resize(rrEntries);
64 rrRight.resize(rrEntries);
65
66 // Following the paper implementation, a list with the specified number
67 // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
68 const int factors[] = { 2, 3, 5 };
69 unsigned int i = 0;
70 int64_t offset_i = 1;
71
72 while (i < p.offset_list_size)
73 {
74 int64_t offset = offset_i;
75
76 for (int n : factors) {
77 while ((offset % n) == 0) {
78 offset /= n;
79 }
80 }
81
82 if (offset == 1) {
83 offsetsList.push_back(OffsetListEntry(offset_i, 0));
84 i++;
85 // If we want to use negative offsets, add also the negative value
86 // of the offset just calculated
87 if (p.negative_offsets_enable) {
88 offsetsList.push_back(OffsetListEntry(-offset_i, 0));
89 i++;
90 }
91 }
92
93 offset_i++;
94 }
95
96 offsetsListIterator = offsetsList.begin();
97}
98
99void
101{
102 while (!delayQueue.empty() &&
103 delayQueue.front().processTick <= curTick())
104 {
105 Addr addr_x = delayQueue.front().baseAddr;
106 insertIntoRR(addr_x, RRWay::Left);
107 delayQueue.pop_front();
108 }
109
110 // Schedule an event for the next element if there is one
111 if (!delayQueue.empty()) {
112 schedule(delayQueueEvent, delayQueue.front().processTick);
113 }
114}
115
116unsigned int
117BOP::hash(Addr addr, unsigned int way) const
118{
119 Addr hash1 = addr >> way;
120 Addr hash2 = hash1 >> floorLog2(rrEntries);
121 return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
122}
123
124void
125BOP::insertIntoRR(Addr addr, unsigned int way)
126{
127 switch (way) {
128 case RRWay::Left:
130 break;
131 case RRWay::Right:
133 break;
134 }
135}
136
137void
139{
140 if (delayQueue.size() == delayQueueSize) {
141 return;
142 }
143
144 // Add the address to the delay queue and schedule an event to process
145 // it after the specified delay cycles
146 Tick process_tick = curTick() + delayTicks;
147
148 delayQueue.push_back(DelayQueueEntry(x, process_tick));
149
150 if (!delayQueueEvent.scheduled()) {
151 schedule(delayQueueEvent, process_tick);
152 }
153}
154
155void
157{
158 for (auto& it : offsetsList) {
159 it.second = 0;
160 }
161}
162
163inline Addr
165{
166 return (addr >> blkSize) & tagMask;
167}
168
169bool
171{
172 for (auto& it : rrLeft) {
173 if (it == addr) {
174 return true;
175 }
176 }
177
178 for (auto& it : rrRight) {
179 if (it == addr) {
180 return true;
181 }
182 }
183
184 return false;
185}
186
187void
189{
190 Addr offset_addr = (*offsetsListIterator).first;
191 Addr lookup_addr = x - offset_addr;
192
193 // There was a hit in the RR table, increment the score for this offset
194 if (testRR(lookup_addr)) {
195 DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
196 (*offsetsListIterator).second++;
197 if ((*offsetsListIterator).second > bestScore) {
198 bestScore = (*offsetsListIterator).second;
199 phaseBestOffset = (*offsetsListIterator).first;
200 DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
201 }
202 }
203
205
206 // All the offsets in the list were visited meaning that a learning
207 // phase finished. Check if
208 if (offsetsListIterator == offsetsList.end()) {
210 round++;
211
212 // Check if the best offset must be updated if:
213 // (1) One of the scores equals SCORE_MAX
214 // (2) The number of rounds equals ROUND_MAX
215 if ((bestScore >= scoreMax) || (round == roundMax)) {
217 round = 0;
218 bestScore = 0;
219 phaseBestOffset = 0;
220 resetScores();
222 } else if (bestScore <= badScore) {
223 issuePrefetchRequests = false;
224 }
225 }
226}
227
228void
230 std::vector<AddrPriority> &addresses,
231 const CacheAccessor &cache)
232{
233 Addr addr = pfi.getAddr();
234 Addr tag_x = tag(addr);
235
236 if (delayQueueEnabled) {
238 } else {
240 }
241
242 // Go through the nth offset and update the score, the best score and the
243 // current best offset if a better one is found
244 bestOffsetLearning(tag_x);
245
246 // This prefetcher is a degree 1 prefetch, so it will only generate one
247 // prefetch at most per access
249 Addr prefetch_addr = addr + (bestOffset << lBlkSize);
250 addresses.push_back(AddrPriority(prefetch_addr, 0));
251 DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
252 }
253}
254
255void
257{
258 const PacketPtr& pkt = arg.pkt;
259
260 // Only insert into the RR right way if it's the pkt is a HWP
261 if (!pkt->cmd.isHWPrefetch()) return;
262
263 Addr tag_y = tag(pkt->getAddr());
264
267 }
268}
269
270} // namespace prefetch
271} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Information provided to probes on a cache event.
PacketPtr pkt
Packet that triggered the cache access.
bool isHWPrefetch() const
Definition packet.hh:254
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
Addr getAddr() const
Definition packet.hh:807
MemCmd cmd
The command field of the packet.
Definition packet.hh:372
std::deque< DelayQueueEntry > delayQueue
Definition bop.hh:96
void calculatePrefetch(const PrefetchInfo &pfi, std::vector< AddrPriority > &addresses, const CacheAccessor &cache) override
Definition bop.cc:229
unsigned int round
Current round.
Definition bop.hh:113
std::vector< Addr > rrLeft
Definition bop.hh:74
unsigned int bestScore
Max score found so far.
Definition bop.hh:111
const unsigned int delayQueueSize
Definition bop.hh:71
void insertIntoDelayQueue(Addr addr)
Insert the specified address into the delay queue.
Definition bop.cc:138
BOP(const BOPPrefetcherParams &p)
Definition bop.cc:40
unsigned int hash(Addr addr, unsigned int way) const
Generate a hash for the specified address to index the RR table.
Definition bop.cc:117
const unsigned int rrEntries
Recent requests table parameteres.
Definition bop.hh:67
const unsigned int tagMask
Definition bop.hh:68
std::vector< OffsetListEntry >::iterator offsetsListIterator
Current test offset index.
Definition bop.hh:109
const unsigned int badScore
Definition bop.hh:65
const bool delayQueueEnabled
Delay queue parameters.
Definition bop.hh:70
const unsigned int scoreMax
Learning phase parameters.
Definition bop.hh:63
std::vector< Addr > rrRight
Definition bop.hh:75
bool issuePrefetchRequests
Hardware prefetcher enabled.
Definition bop.hh:103
Addr tag(Addr addr) const
Generate the tag for the specified address based on the tag bits and the block size.
Definition bop.cc:164
void delayQueueEventWrapper()
Event to handle the delay queue processing.
Definition bop.cc:100
void bestOffsetLearning(Addr)
Learning phase of the BOP.
Definition bop.cc:188
const unsigned int roundMax
Definition bop.hh:64
void insertIntoRR(Addr addr, unsigned int way)
Insert the specified address into the RR table.
Definition bop.cc:125
Addr phaseBestOffset
Current best offset found in the learning phase.
Definition bop.hh:107
EventFunctionWrapper delayQueueEvent
Definition bop.hh:100
void notifyFill(const CacheAccessProbeArg &arg) override
Update the RR right table after a prefetch fill.
Definition bop.cc:256
std::vector< OffsetListEntry > offsetsList
Definition bop.hh:79
bool testRR(Addr) const
Test if @X-O is hitting in the RR table to update the offset score.
Definition bop.cc:170
void resetScores()
Reset all the scores from the offset list.
Definition bop.cc:156
Addr bestOffset
Current best offset to issue prefetches.
Definition bop.hh:105
const unsigned int delayTicks
Definition bop.hh:72
Class containing the information needed by the prefetch to train and generate new prefetch requests.
Definition base.hh:111
Addr getAddr() const
Obtains the address value of this Prefetcher address.
Definition base.hh:138
unsigned blkSize
The block size of the parent cache.
Definition base.hh:286
unsigned lBlkSize
log_2(block size of the parent cache).
Definition base.hh:289
std::pair< Addr, int32_t > AddrPriority
Definition queued.hh:192
STL vector class.
Definition stl.hh:37
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition intmath.hh:59
static constexpr bool isPowerOf2(const T &n)
Definition intmath.hh:98
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
Bitfield< 31 > n
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 0 > p
Bitfield< 3 > x
Definition pagetable.hh:73
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
Provides generic cache lookup functions.
In a first implementation of the BO prefetcher, both banks of the RR were written simultaneously when...
Definition bop.hh:88
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:05 for gem5 by doxygen 1.11.0