gem5  v22.1.0.0
frequent_values.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2020 Inria
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met: redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer;
9  * redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution;
12  * neither the name of the copyright holders nor the names of its
13  * contributors may be used to endorse or promote products derived from
14  * this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
30 
31 #include <algorithm>
32 #include <limits>
33 
34 #include "base/bitfield.hh"
35 #include "base/compiler.hh"
36 #include "base/intmath.hh"
37 #include "base/logging.hh"
38 #include "debug/CacheComp.hh"
40 #include "params/FrequentValuesCompressor.hh"
41 
42 namespace gem5
43 {
44 
45 GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
46 namespace compression
47 {
48 
50  : Base(p), useHuffmanEncoding(p.max_code_length != 0),
51  indexEncoder(p.max_code_length), counterBits(p.counter_bits),
52  codeGenerationTicks(p.code_generation_ticks),
53  checkSaturation(p.check_saturation), numVFTEntries(p.vft_entries),
54  numSamples(p.num_samples), takenSamples(0), phase(SAMPLING),
55  VFT(p.vft_assoc, p.vft_entries, p.vft_indexing_policy,
56  p.vft_replacement_policy, VFTEntry(counterBits)),
57  codeGenerationEvent([this]{ phase = COMPRESSING; }, name())
58 {
59  fatal_if((numVFTEntries - 1) > mask(chunkSizeBits),
60  "There are more VFT entries than possible values.");
61 }
62 
63 std::unique_ptr<Base::CompressionData>
65  Cycles& decomp_lat)
66 {
67  std::unique_ptr<CompData> comp_data =
68  std::unique_ptr<CompData>(new CompData());
69 
70  // Compression size
71  std::size_t size = 0;
72 
73  // Compress every value sequentially. The compressed values are then
74  // added to the final compressed data.
75  for (const auto& chunk : chunks) {
76  encoder::Code code;
77  int length = 0;
78  if (phase == COMPRESSING) {
79  VFTEntry* entry = VFT.findEntry(chunk, false);
80 
81  // Theoretically, the code would be the index of the entry;
82  // however, there is no practical need to do so, and we simply
83  // use the value instead
84  const unsigned uncompressed_index = uncompressedValue;
85  const unsigned index = entry ? chunk : uncompressed_index;
86 
87  // If using an index encoder, apply it
88  if (useHuffmanEncoding) {
89  code = indexEncoder.encode(index);
90 
91  if (index == uncompressed_index) {
92  code.length += chunkSizeBits;
93  } else if (code.length > 64) {
94  // If, for some reason, we could not generate an encoding
95  // for the value, generate the uncompressed encoding
96  code = indexEncoder.encode(uncompressed_index);
97  assert(code.length <= 64);
98  code.length += chunkSizeBits;
99  }
100  } else {
101  const unsigned code_size = std::log2(numVFTEntries);
102  if (entry) {
103  code = {index, code_size};
104  } else {
105  code = {uncompressed_index, code_size + chunkSizeBits};
106  }
107  }
108  } else {
109  // Not compressing yet; simply copy the value over
110  code = {chunk, chunkSizeBits};
111  }
112  length += code.length;
113 
114  DPRINTF(CacheComp, "Compressed %016x to %016x (Size = %d) "
115  "(Phase: %d)\n", chunk, code.code, length, phase);
116 
117  comp_data->compressedValues.emplace_back(code, chunk);
118 
119  size += length;
120  }
121 
122  // Set final compression size
123  comp_data->setSizeBits(size);
124 
125  // Set latencies based on the degree of parallelization, and any extra
126  // latencies due to shifting or packaging
127  comp_lat = Cycles(compExtraLatency +
128  (chunks.size() / compChunksPerCycle));
129  decomp_lat = Cycles(decompExtraLatency +
130  (chunks.size() / decompChunksPerCycle));
131 
132  // Return compressed line
133  return comp_data;
134 }
135 
136 void
137 FrequentValues::decompress(const CompressionData* comp_data, uint64_t* data)
138 {
139  const CompData* casted_comp_data = static_cast<const CompData*>(comp_data);
140 
141  // Decompress every entry sequentially
142  std::vector<Chunk> decomp_chunks;
143  for (const auto& comp_chunk : casted_comp_data->compressedValues) {
144  if (phase == COMPRESSING) {
145  if (useHuffmanEncoding) {
146  // Although in theory we have the codeword and have to find
147  // its corresponding value, in order to make life easier we
148  // search for the value and verify that the stored code
149  // matches the table's
150  [[maybe_unused]] const encoder::Code code =
151  indexEncoder.encode(comp_chunk.value);
152 
153  // Either the value will be found and the codes match, or the
154  // value will not be found because it is an uncompressed entry
155  assert(((code.length <= 64) &&
156  (code.code == comp_chunk.code.code)) ||
157  (comp_chunk.code.code ==
159  } else {
160  // The value at the given VFT entry must match the one stored,
161  // if it is not the uncompressed value
162  assert((comp_chunk.code.code == uncompressedValue) ||
163  VFT.findEntry(comp_chunk.value, false));
164  }
165  }
166 
167  decomp_chunks.push_back(comp_chunk.value);
168  DPRINTF(CacheComp, "Decompressed %016x to %016x\n",
169  comp_chunk.code.code, comp_chunk.value);
170  }
171 
172  // Concatenate the decompressed words to generate the cache lines
173  fromChunks(decomp_chunks, data);
174 }
175 
176 void
178  bool is_invalidation)
179 {
180  const std::vector<Chunk> chunks = toChunks(data.data());
181  for (const Chunk& chunk : chunks) {
182  VFTEntry* entry = VFT.findEntry(chunk, false);
183  bool saturated = false;
184  if (!is_invalidation) {
185  // If a VFT hit, increase new value's counter; otherwise, insert
186  // new value
187  if (!entry) {
188  entry = VFT.findVictim(chunk);
189  assert(entry != nullptr);
190  entry->value = chunk;
191  VFT.insertEntry(chunk, false, entry);
192  } else {
193  VFT.accessEntry(entry);
194  }
195  entry->counter++;
196  saturated = entry->counter.isSaturated();
197  } else {
198  // If a VFT hit, decrease value's counter
199  if (entry) {
200  VFT.accessEntry(entry);
201  entry->counter--;
202  }
203  }
204 
205  // If any counter saturates, all counters are shifted right,
206  // resulting in precision loss
207  if (checkSaturation && saturated) {
208  for (auto& entry : VFT) {
209  entry.counter >>= 1;
210  }
211  }
212  }
213 
214  takenSamples += chunks.size();
215 }
216 
217 void
219 {
220  // We need to find a pseudo value to store uncompressed values as
221  // For that we generate all possible values from 0 to 1 size larger
222  // than the number of real values.
223  std::set<uint64_t> uncompressed_values;
224  for (int i = 0; i < numVFTEntries+1; ++i) {
225  uncompressed_values.insert(uncompressed_values.end(), i);
226  }
227 
228  for (const auto& entry : VFT) {
229  // Remove the respective real value from the list of possible
230  // pseudo values for the uncompressed value
231  uncompressed_values.erase(entry.value);
232  }
233 
234  // Select the first remaining possible value as the value
235  // representing uncompressed values
236  assert(uncompressed_values.size() >= 1);
237  uncompressedValue = *uncompressed_values.begin();
238  assert(VFT.findEntry(uncompressedValue, false) == nullptr);
239 
240  if (useHuffmanEncoding) {
241  // Populate the queue, adding each entry as a tree with one node.
242  // They are sorted such that the value with highest frequency is
243  // the queue's top
244  for (const auto& entry : VFT) {
245  indexEncoder.sample(entry.value, entry.counter);
246  }
247 
248  // Insert the uncompressed value in the tree assuming it has the
249  // highest frequency, since it is in fact a group of all the values
250  // not present in the VFT
252 
254  }
255 
256  // Generate the code map and mark the current phase as code generation
258 
259  // Let us know when to change from the code generation phase to the
260  // effective compression phase
262 }
263 
264 void
266 {
267  // Do not update VFT if not sampling
268  if (phase == SAMPLING) {
269  // If the new data is not present, the notification is due to a
270  // fill; otherwise, sample the old block's contents
271  if (data_update.oldData.size() > 0) {
272  sampleValues(data_update.oldData, true);
273  }
274  // If the new data is not present, the notification is due to an
275  // invalidation; otherwise, sample the new block's contents
276  if (data_update.newData.size() > 0) {
277  sampleValues(data_update.newData, false);
278  }
279 
280  // Check if it is done with the sampling phase. If so, generate the
281  // codes that will be used for the compression phase
282  if (takenSamples >= numSamples) {
283  generateCodes();
284  }
285  }
286 }
287 
288 void
290 {
291  assert(listeners.empty());
292  assert(cache != nullptr);
293  listeners.push_back(new FrequentValuesListener(
294  *this, cache->getProbeManager(), "Data Update"));
295 }
296 
297 void
299 {
300  parent.probeNotify(data_update);
301 }
302 
303 } // namespace compression
304 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
Base cache compressor interface.
Definition: base.hh:66
const Cycles compExtraLatency
Extra latency added to compression due to packaging, shifting or other operations.
Definition: base.hh:115
BaseCacheCompressorParams Params
Definition: base.hh:202
void fromChunks(const std::vector< Chunk > &chunks, uint64_t *data) const
This function re-joins the chunks to recreate the original data.
Definition: base.cc:134
BaseCache * cache
Pointer to the parent cache.
Definition: base.hh:130
std::vector< Chunk > toChunks(const uint64_t *data) const
This function splits the raw data into chunks, so that it can be parsed by the compressor.
Definition: base.cc:115
const Cycles decompExtraLatency
Extra latency added to decompression due to packaging, shifting or other operations.
Definition: base.hh:127
const Cycles compChunksPerCycle
Degree of parallelization of the compression process.
Definition: base.hh:109
uint64_t Chunk
A chunk is a basic lexical unit.
Definition: base.hh:72
const unsigned chunkSizeBits
Chunk size, in number of bits.
Definition: base.hh:97
const Cycles decompChunksPerCycle
Degree of parallelization of the decompression process.
Definition: base.hh:121
std::vector< CompressedValue > compressedValues
The values contained in the original data, after being compressed sequentially.
void notify(const DataUpdate &data_update) override
SatCounter32 counter
The ideal counter width (in bits) is determined by the maximum number of times a given value appears ...
uint64_t value
The value is stored as a 64 bit entry to accomodate for the uncompressed value.
std::vector< FrequentValuesListener * > listeners
void decompress(const CompressionData *comp_data, uint64_t *data) override
Apply the decompression process to the compressed data.
void regProbeListeners() override
Register probe listeners for this object.
void sampleValues(const std::vector< uint64_t > &data, bool is_invalidation)
Sample values from a packet, adding them to the VFT.
std::unique_ptr< Base::CompressionData > compress(const std::vector< Chunk > &chunks, Cycles &comp_lat, Cycles &decomp_lat) override
Apply the compression process to the cache line.
unsigned takenSamples
Number of samples taken so far.
void probeNotify(const DataUpdate &data_update)
Process a notification event from the ProbeListener.
const unsigned numVFTEntries
Maximum number of VFT entries, and thus of codewords too.
const Tick codeGenerationTicks
Ticks needed to perform the CODE_GENERATION phase.
const bool checkSaturation
Whether an action must be performed when counters saturate.
void generateCodes()
End sampling phase and start the code generation.
const unsigned numSamples
Number of samples in the sampling phase.
encoder::Huffman indexEncoder
The encoder applied to the VFT indices.
const bool useHuffmanEncoding
Whether Huffman encoding is applied to the VFT indices.
uint64_t uncompressedValue
A pseudo value is used as the representation of uncompressed values.
AssociativeSet< VFTEntry > VFT
The Value Frequency Table, a small cache that keeps track and estimates the frequency distribution of...
EventFunctionWrapper codeGenerationEvent
Event to handle finishing code generation and starting compression.
void sample(uint64_t value, uint64_t frequency)
Inserts the value-frequency pair in the tree.
Definition: huffman.cc:53
Code encode(const uint64_t val) const override
The function responsible for the generation of the alternative value.
Definition: huffman.cc:116
void generateCodeMaps()
Generation of the code maps.
Definition: huffman.cc:84
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
bool isSaturated() const
Whether the counter has achieved its maximum value or not.
Definition: sat_counter.hh:312
ProbeManager * getProbeManager()
Get the probe manager for this object.
Definition: sim_object.cc:120
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 30, 0 > index
Bitfield< 54 > p
Definition: pagetable.hh:70
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi)
A data contents update is composed of the updated block's address, the old contents,...
Definition: base.hh:125
std::vector< uint64_t > newData
The new data contents.
Definition: base.hh:133
std::vector< uint64_t > oldData
The stale data contents.
Definition: base.hh:131
unsigned length
Number of bits in the code.
Definition: base.hh:51
uint64_t code
Only the LSB of the code are relevant.
Definition: base.hh:49
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:36 for gem5 by doxygen 1.9.1