gem5  v22.1.0.0
elastic_trace.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2015 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
39 
40 #include "base/callback.hh"
41 #include "base/output.hh"
42 #include "base/trace.hh"
43 #include "cpu/o3/dyn_inst.hh"
44 #include "cpu/reg_class.hh"
45 #include "debug/ElasticTrace.hh"
46 #include "mem/packet.hh"
47 
48 namespace gem5
49 {
50 
51 namespace o3
52 {
53 
54 ElasticTrace::ElasticTrace(const ElasticTraceParams &params)
55  : ProbeListenerObject(params),
56  regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
57  firstWin(true),
58  lastClearedSeqNum(0),
59  depWindowSize(params.depWindowSize),
60  dataTraceStream(nullptr),
61  instTraceStream(nullptr),
62  startTraceInst(params.startTraceInst),
63  allProbesReg(false),
64  traceVirtAddr(params.traceVirtAddr),
65  stats(this)
66 {
67  cpu = dynamic_cast<CPU *>(params.manager);
68 
69  fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
70  "support dependency tracing.\n", name());
71 
72  fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
73  "Recommended size is 3x ROB size in the O3CPU.\n");
74 
75  fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
76  "single-threaded workload only", cpu->numThreads, name());
77  // Initialize the protobuf output stream
78  fatal_if(params.instFetchTraceFile == "", "Assign instruction fetch "\
79  "trace file path to instFetchTraceFile");
80  fatal_if(params.dataDepTraceFile == "", "Assign data dependency "\
81  "trace file path to dataDepTraceFile");
82  std::string filename = simout.resolve(name() + "." +
83  params.instFetchTraceFile);
84  instTraceStream = new ProtoOutputStream(filename);
85  filename = simout.resolve(name() + "." + params.dataDepTraceFile);
86  dataTraceStream = new ProtoOutputStream(filename);
87  // Create a protobuf message for the header and write it to the stream
88  ProtoMessage::PacketHeader inst_pkt_header;
89  inst_pkt_header.set_obj_id(name());
90  inst_pkt_header.set_tick_freq(sim_clock::Frequency);
91  instTraceStream->write(inst_pkt_header);
92  // Create a protobuf message for the header and write it to
93  // the stream
94  ProtoMessage::InstDepRecordHeader data_rec_header;
95  data_rec_header.set_obj_id(name());
96  data_rec_header.set_tick_freq(sim_clock::Frequency);
97  data_rec_header.set_window_size(depWindowSize);
98  dataTraceStream->write(data_rec_header);
99  // Register a callback to flush trace records and close the output streams.
100  registerExitCallback([this]() { flushTraces(); });
101 }
102 
103 void
105 {
106  inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
108  if (startTraceInst == 0) {
109  // If we want to start tracing from the start of the simulation,
110  // register all elastic trace probes now.
112  } else {
113  // Schedule an event to register all elastic trace probes when
114  // specified no. of instructions are committed.
117  }
118 }
119 
120 void
122 {
123  assert(!allProbesReg);
124  inform("@%llu: No. of instructions committed = %llu, registering elastic"
125  " probe listeners", curTick(), cpu->numSimulatedInsts());
126  // Create new listeners: provide method to be called upon a notify() for
127  // each probe point.
129  "FetchRequest", &ElasticTrace::fetchReqTrace));
131  DynInstConstPtr>(this, "Execute",
134  DynInstConstPtr>(this, "ToCommit",
137  DynInstConstPtr>(this, "Rename",
140  "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
142  DynInstConstPtr>(this, "Squash",
145  DynInstConstPtr>(this, "Commit",
147  allProbesReg = true;
148 }
149 
150 void
152 {
153 
154  DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
155  (MemCmd::ReadReq),
156  req->getPC(), req->getVaddr(), req->getPaddr(),
157  req->getFlags(), req->getSize(), curTick());
158 
159  // Create a protobuf message including the request fields necessary to
160  // recreate the request in the TraceCPU.
161  ProtoMessage::Packet inst_fetch_pkt;
162  inst_fetch_pkt.set_tick(curTick());
163  inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
164  inst_fetch_pkt.set_pc(req->getPC());
165  inst_fetch_pkt.set_flags(req->getFlags());
166  inst_fetch_pkt.set_addr(req->getPaddr());
167  inst_fetch_pkt.set_size(req->getSize());
168  // Write the message to the stream.
169  instTraceStream->write(inst_fetch_pkt);
170 }
171 
172 void
174 {
175 
176  // In a corner case, a retired instruction is propagated backward to the
177  // IEW instruction queue to handle some side-channel information. But we
178  // must not process an instruction again. So we test the sequence number
179  // against the lastClearedSeqNum and skip adding the instruction for such
180  // corner cases.
181  if (dyn_inst->seqNum <= lastClearedSeqNum) {
182  DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
183  has already retired (mostly squashed)", dyn_inst->seqNum);
184  // Do nothing as program has proceeded and this inst has been
185  // propagated backwards to handle something.
186  return;
187  }
188 
189  DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
190  curTick());
191  // Either the execution info object will already exist if this
192  // instruction had a register dependency recorded in the rename probe
193  // listener before entering execute stage or it will not exist and will
194  // need to be created here.
195  InstExecInfo* exec_info_ptr;
196  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
197  if (itr_exec_info != tempStore.end()) {
198  exec_info_ptr = itr_exec_info->second;
199  } else {
200  exec_info_ptr = new InstExecInfo;
201  tempStore[dyn_inst->seqNum] = exec_info_ptr;
202  }
203 
204  exec_info_ptr->executeTick = curTick();
205  stats.maxTempStoreSize = std::max(tempStore.size(),
206  (std::size_t)stats.maxTempStoreSize.value());
207 }
208 
209 void
211 {
212  // If tracing has just been enabled then the instruction at this stage of
213  // execution is far enough that we cannot gather info about its past like
214  // the tick it started execution. Simply return until we see an instruction
215  // that is found in the tempStore.
216  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
217  if (itr_exec_info == tempStore.end()) {
218  DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
219  " skipping.\n", dyn_inst->seqNum);
220  return;
221  }
222 
223  DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
224  curTick());
225  InstExecInfo* exec_info_ptr = itr_exec_info->second;
226  exec_info_ptr->toCommitTick = curTick();
227 
228 }
229 
230 void
232 {
233  // Get the sequence number of the instruction
234  InstSeqNum seq_num = dyn_inst->seqNum;
235 
236  assert(dyn_inst->seqNum > lastClearedSeqNum);
237 
238  // Since this is the first probe activated in the pipeline, create
239  // a new execution info object to track this instruction as it
240  // progresses through the pipeline.
241  InstExecInfo* exec_info_ptr = new InstExecInfo;
242  tempStore[seq_num] = exec_info_ptr;
243 
244  // Loop through the source registers and look up the dependency map. If
245  // the source register entry is found in the dependency map, add a
246  // dependency on the last writer.
247  int8_t max_regs = dyn_inst->numSrcRegs();
248  for (int src_idx = 0; src_idx < max_regs; src_idx++) {
249 
250  const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
251  if (!src_reg.is(MiscRegClass) && !src_reg.is(InvalidRegClass)) {
252  // Get the physical register index of the i'th source register.
253  PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
254  DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
255  " %i (%s)\n", seq_num,
256  phys_src_reg->flatIndex(), phys_src_reg->className());
257  auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
258  if (itr_writer != physRegDepMap.end()) {
259  InstSeqNum last_writer = itr_writer->second;
260  // Additionally the dependency distance is kept less than the
261  // window size parameter to limit the memory allocation to
262  // nodes in the graph. If the window were tending to infinite
263  // we would have to load a large number of node objects during
264  // replay.
265  if (seq_num - last_writer < depWindowSize) {
266  // Record a physical register dependency.
267  exec_info_ptr->physRegDepSet.insert(last_writer);
268  }
269  }
270 
271  }
272 
273  }
274 
275  // Loop through the destination registers of this instruction and update
276  // the physical register dependency map for last writers to registers.
277  max_regs = dyn_inst->numDestRegs();
278  for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
279  // For data dependency tracking the register must be an int, float or
280  // CC register and not a Misc register.
281  const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
282  if (!dest_reg.is(MiscRegClass) && !dest_reg.is(InvalidRegClass)) {
283  // Get the physical register index of the i'th destination
284  // register.
285  PhysRegIdPtr phys_dest_reg =
286  dyn_inst->renamedDestIdx(dest_idx);
287  DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
288  " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
289  dest_reg.className());
290  physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
291  }
292  }
293  stats.maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
294  (std::size_t)stats.maxPhysRegDepMapSize.value());
295 }
296 
297 void
299 {
300  DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
301  inst_reg_pair.second);
302  auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
303  if (itr_regdep_map != physRegDepMap.end())
304  physRegDepMap.erase(itr_regdep_map);
305 }
306 
307 void
309 {
310  // If the squashed instruction was squashed before being processed by
311  // execute stage then it will not be in the temporary store. In this case
312  // do nothing and return.
313  auto itr_exec_info = tempStore.find(head_inst->seqNum);
314  if (itr_exec_info == tempStore.end())
315  return;
316 
317  // If there is a squashed load for which a read request was
318  // sent before it got squashed then add it to the trace.
319  DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
320  head_inst->seqNum);
321  // Get pointer to the execution info object corresponding to the inst.
322  InstExecInfo* exec_info_ptr = itr_exec_info->second;
323  if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
324  exec_info_ptr->toCommitTick != MaxTick &&
325  head_inst->hasRequest() &&
326  head_inst->getFault() == NoFault) {
327  // Add record to depTrace with commit parameter as false.
328  addDepTraceRecord(head_inst, exec_info_ptr, false);
329  }
330  // As the information contained is no longer needed, remove the execution
331  // info object from the temporary store.
332  clearTempStoreUntil(head_inst);
333 }
334 
335 void
337 {
338  DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
339  head_inst->seqNum);
340 
341  // Add the instruction to the depTrace.
342  if (!head_inst->isNop()) {
343 
344  // If tracing has just been enabled then the instruction at this stage
345  // of execution is far enough that we cannot gather info about its past
346  // like the tick it started execution. Simply return until we see an
347  // instruction that is found in the tempStore.
348  auto itr_temp_store = tempStore.find(head_inst->seqNum);
349  if (itr_temp_store == tempStore.end()) {
350  DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
351  "store, skipping.\n", head_inst->seqNum);
352  return;
353  }
354 
355  // Get pointer to the execution info object corresponding to the inst.
356  InstExecInfo* exec_info_ptr = itr_temp_store->second;
357  assert(exec_info_ptr->executeTick != MaxTick);
358  assert(exec_info_ptr->toCommitTick != MaxTick);
359 
360  // Check if the instruction had a fault, if it predicated false and
361  // thus previous register values were restored or if it was a
362  // load/store that did not have a request (e.g. when the size of the
363  // request is zero). In all these cases the instruction is set as
364  // executed and is picked up by the commit probe listener. But a
365  // request is not issued and registers are not written. So practically,
366  // skipping these should not hurt as execution would not stall on them.
367  // Alternatively, these could be included merely as a compute node in
368  // the graph. Removing these for now. If correlation accuracy needs to
369  // be improved in future these can be turned into comp nodes at the
370  // cost of bigger traces.
371  if (head_inst->getFault() != NoFault) {
372  DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
373  "skip adding it to the trace\n",
374  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
375  head_inst->seqNum);
376  } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
377  DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
378  "skip adding it to the trace\n", head_inst->seqNum);
379  } else if (!head_inst->readPredicate()) {
380  DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
381  "skip adding it to the trace\n",
382  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
383  head_inst->seqNum);
384  } else {
385  // Add record to depTrace with commit parameter as true.
386  addDepTraceRecord(head_inst, exec_info_ptr, true);
387  }
388  }
389  // As the information contained is no longer needed, remove the execution
390  // info object from the temporary store.
391  clearTempStoreUntil(head_inst);
392 }
393 
394 void
396  InstExecInfo* exec_info_ptr, bool commit)
397 {
398  // Create a record to assign dynamic intruction related fields.
399  TraceInfo* new_record = new TraceInfo;
400  // Add to map for sequence number look up to retrieve the TraceInfo pointer
401  traceInfoMap[head_inst->seqNum] = new_record;
402 
403  // Assign fields from the instruction
404  new_record->instNum = head_inst->seqNum;
405  new_record->commit = commit;
406  new_record->type = head_inst->isLoad() ? Record::LOAD :
407  (head_inst->isStore() ? Record::STORE :
408  Record::COMP);
409 
410  // Assign fields for creating a request in case of a load/store
411  new_record->reqFlags = head_inst->memReqFlags;
412  new_record->virtAddr = head_inst->effAddr;
413  new_record->physAddr = head_inst->physEffAddr;
414  // Currently the tracing does not support split requests.
415  new_record->size = head_inst->effSize;
416  new_record->pc = head_inst->pcState().instAddr();
417 
418  // Assign the timing information stored in the execution info object
419  new_record->executeTick = exec_info_ptr->executeTick;
420  new_record->toCommitTick = exec_info_ptr->toCommitTick;
421  new_record->commitTick = curTick();
422 
423  // Assign initial values for number of dependents and computational delay
424  new_record->numDepts = 0;
425  new_record->compDelay = -1;
426 
427  // The physical register dependency set of the first instruction is
428  // empty. Since there are no records in the depTrace at this point, the
429  // case of adding an ROB dependency by using a reverse iterator is not
430  // applicable. Thus, populate the fields of the record corresponding to the
431  // first instruction and return.
432  if (depTrace.empty()) {
433  // Store the record in depTrace.
434  depTrace.push_back(new_record);
435  DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
436  new_record->instNum);
437  return;
438  }
439 
440  // Clear register dependencies for squashed loads as they may be dependent
441  // on squashed instructions and we do not add those to the trace.
442  if (head_inst->isLoad() && !commit) {
443  (exec_info_ptr->physRegDepSet).clear();
444  }
445 
446  // Assign the register dependencies stored in the execution info object
447  std::set<InstSeqNum>::const_iterator dep_set_it;
448  for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
449  dep_set_it != (exec_info_ptr->physRegDepSet).end();
450  ++dep_set_it) {
451  auto trace_info_itr = traceInfoMap.find(*dep_set_it);
452  if (trace_info_itr != traceInfoMap.end()) {
453  // The register dependency is valid. Assign it and calculate
454  // computational delay
455  new_record->physRegDepList.push_back(*dep_set_it);
456  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
457  "%lli\n", new_record->instNum, *dep_set_it);
458  TraceInfo* reg_dep = trace_info_itr->second;
459  reg_dep->numDepts++;
460  compDelayPhysRegDep(reg_dep, new_record);
461  ++stats.numRegDep;
462  } else {
463  // The instruction that this has a register dependency on was
464  // not added to the trace because of one of the following
465  // 1. it was an instruction that had a fault
466  // 2. it was an instruction that was predicated false and
467  // previous register values were restored
468  // 3. it was load/store that did not have a request (e.g. when
469  // the size of the request is zero but this may not be a fault)
470  // In all these cases the instruction is set as executed and is
471  // picked up by the commit probe listener. But a request is not
472  // issued and registers are not written to in these cases.
473  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
474  "%lli is skipped\n",new_record->instNum, *dep_set_it);
475  }
476  }
477 
478  // Check for and assign an ROB dependency in addition to register
479  // dependency before adding the record to the trace.
480  // As stores have to commit in order a store is dependent on the last
481  // committed load/store. This is recorded in the ROB dependency.
482  if (head_inst->isStore()) {
483  // Look up store-after-store order dependency
484  updateCommitOrderDep(new_record, false);
485  // Look up store-after-load order dependency
486  updateCommitOrderDep(new_record, true);
487  }
488 
489  // In case a node is dependency-free or its dependency got discarded
490  // because it was outside the window, it is marked ready in the ROB at the
491  // time of issue. A request is sent as soon as possible. To model this, a
492  // node is assigned an issue order dependency on a committed instruction
493  // that completed earlier than it. This is done to avoid the problem of
494  // determining the issue times of such dependency-free nodes during replay
495  // which could lead to too much parallelism, thinking conservatively.
496  if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
497  updateIssueOrderDep(new_record);
498  }
499 
500  // Store the record in depTrace.
501  depTrace.push_back(new_record);
502  DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
503  (commit ? "committed" : "squashed"), new_record->instNum);
504 
505  // To process the number of records specified by depWindowSize in the
506  // forward direction, the depTrace must have twice as many records
507  // to check for dependencies.
508  if (depTrace.size() == 2 * depWindowSize) {
509 
510  DPRINTF(ElasticTrace, "Writing out trace...\n");
511 
512  // Write out the records which have been processed to the trace
513  // and remove them from the depTrace.
515 
516  // After the first window, writeDepTrace() must check for valid
517  // compDelay.
518  firstWin = false;
519  }
520 }
521 
522 void
524  bool find_load_not_store)
525 {
526  assert(new_record->isStore());
527  // Iterate in reverse direction to search for the last committed
528  // load/store that completed earlier than the new record
529  depTraceRevItr from_itr(depTrace.end());
530  depTraceRevItr until_itr(depTrace.begin());
531  TraceInfo* past_record = *from_itr;
532  uint32_t num_go_back = 0;
533 
534  // The execution time of this store is when it is sent, that is committed
535  Tick execute_tick = curTick();
536  // Search for store-after-load or store-after-store order dependency
537  while (num_go_back < depWindowSize && from_itr != until_itr) {
538  if (find_load_not_store) {
539  // Check if previous inst is a load completed earlier by comparing
540  // with execute tick
541  if (hasLoadCompleted(past_record, execute_tick)) {
542  // Assign rob dependency and calculate the computational delay
543  assignRobDep(past_record, new_record);
544  ++stats.numRegDep;
545  return;
546  }
547  } else {
548  // Check if previous inst is a store sent earlier by comparing with
549  // execute tick
550  if (hasStoreCommitted(past_record, execute_tick)) {
551  // Assign rob dependency and calculate the computational delay
552  assignRobDep(past_record, new_record);
553  ++stats.numRegDep;
554  return;
555  }
556  }
557  ++from_itr;
558  past_record = *from_itr;
559  ++num_go_back;
560  }
561 }
562 
563 void
565 {
566  // Interate in reverse direction to search for the last committed
567  // record that completed earlier than the new record
568  depTraceRevItr from_itr(depTrace.end());
569  depTraceRevItr until_itr(depTrace.begin());
570  TraceInfo* past_record = *from_itr;
571 
572  uint32_t num_go_back = 0;
573  Tick execute_tick = 0;
574 
575  if (new_record->isLoad()) {
576  // The execution time of a load is when a request is sent
577  execute_tick = new_record->executeTick;
579  } else if (new_record->isStore()) {
580  // The execution time of a store is when it is sent, i.e. committed
581  execute_tick = curTick();
583  } else {
584  // The execution time of a non load/store is when it completes
585  execute_tick = new_record->toCommitTick;
587  }
588 
589  // We search if this record has an issue order dependency on a past record.
590  // Once we find it, we update both the new record and the record it depends
591  // on and return.
592  while (num_go_back < depWindowSize && from_itr != until_itr) {
593  // Check if a previous inst is a load sent earlier, or a store sent
594  // earlier, or a comp inst completed earlier by comparing with execute
595  // tick
596  if (hasLoadBeenSent(past_record, execute_tick) ||
597  hasStoreCommitted(past_record, execute_tick) ||
598  hasCompCompleted(past_record, execute_tick)) {
599  // Assign rob dependency and calculate the computational delay
600  assignRobDep(past_record, new_record);
601  return;
602  }
603  ++from_itr;
604  past_record = *from_itr;
605  ++num_go_back;
606  }
607 }
608 
609 void
610 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
611  DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
612  new_record->typeToStr(), new_record->instNum,
613  past_record->instNum);
614  // Add dependency on past record
615  new_record->robDepList.push_back(past_record->instNum);
616  // Update new_record's compute delay with respect to the past record
617  compDelayRob(past_record, new_record);
618  // Increment number of dependents of the past record
619  ++(past_record->numDepts);
620  // Update stat to log max number of dependents
621  stats.maxNumDependents = std::max(past_record->numDepts,
622  (uint32_t)stats.maxNumDependents.value());
623 }
624 
625 bool
627  Tick execute_tick) const
628 {
629  return (past_record->isStore() && past_record->commitTick <= execute_tick);
630 }
631 
632 bool
634  Tick execute_tick) const
635 {
636  return(past_record->isLoad() && past_record->commit &&
637  past_record->toCommitTick <= execute_tick);
638 }
639 
640 bool
642  Tick execute_tick) const
643 {
644  // Check if previous inst is a load sent earlier than this
645  return (past_record->isLoad() && past_record->commit &&
646  past_record->executeTick <= execute_tick);
647 }
648 
649 bool
651  Tick execute_tick) const
652 {
653  return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
654 }
655 
656 void
658 {
659  // Clear from temp store starting with the execution info object
660  // corresponding the head_inst and continue clearing by decrementing the
661  // sequence number until the last cleared sequence number.
662  InstSeqNum temp_sn = (head_inst->seqNum);
663  while (temp_sn > lastClearedSeqNum) {
664  auto itr_exec_info = tempStore.find(temp_sn);
665  if (itr_exec_info != tempStore.end()) {
666  InstExecInfo* exec_info_ptr = itr_exec_info->second;
667  // Free allocated memory for the info object
668  delete exec_info_ptr;
669  // Remove entry from temporary store
670  tempStore.erase(itr_exec_info);
671  }
672  temp_sn--;
673  }
674  // Update the last cleared sequence number to that of the head_inst
675  lastClearedSeqNum = head_inst->seqNum;
676 }
677 
678 void
680 {
681  // The computation delay is the delay between the completion tick of the
682  // inst. pointed to by past_record and the execution tick of its dependent
683  // inst. pointed to by new_record.
684  int64_t comp_delay = -1;
685  Tick execution_tick = 0, completion_tick = 0;
686 
687  DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
688  new_record->instNum, past_record->instNum);
689 
690  // Get the tick when the node is executed as per the modelling of
691  // computation delay
692  execution_tick = new_record->getExecuteTick();
693 
694  if (past_record->isLoad()) {
695  if (new_record->isStore()) {
696  completion_tick = past_record->toCommitTick;
697  } else {
698  completion_tick = past_record->executeTick;
699  }
700  } else if (past_record->isStore()) {
701  completion_tick = past_record->commitTick;
702  } else if (past_record->isComp()){
703  completion_tick = past_record->toCommitTick;
704  }
705  assert(execution_tick >= completion_tick);
706  comp_delay = execution_tick - completion_tick;
707 
708  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
709  execution_tick, completion_tick, comp_delay);
710 
711  // Assign the computational delay with respect to the dependency which
712  // completes the latest.
713  if (new_record->compDelay == -1)
714  new_record->compDelay = comp_delay;
715  else
716  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
717  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
718  new_record->compDelay);
719 }
720 
721 void
723  TraceInfo* new_record)
724 {
725  // The computation delay is the delay between the completion tick of the
726  // inst. pointed to by past_record and the execution tick of its dependent
727  // inst. pointed to by new_record.
728  int64_t comp_delay = -1;
729  Tick execution_tick = 0, completion_tick = 0;
730 
731  DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
732  " %lli.\n", new_record->instNum, past_record->instNum);
733 
734  // Get the tick when the node is executed as per the modelling of
735  // computation delay
736  execution_tick = new_record->getExecuteTick();
737 
738  // When there is a physical register dependency on an instruction, the
739  // completion tick of that instruction is when it wrote to the register,
740  // that is toCommitTick. In case, of a store updating a destination
741  // register, this is approximated to commitTick instead
742  if (past_record->isStore()) {
743  completion_tick = past_record->commitTick;
744  } else {
745  completion_tick = past_record->toCommitTick;
746  }
747  assert(execution_tick >= completion_tick);
748  comp_delay = execution_tick - completion_tick;
749  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
750  execution_tick, completion_tick, comp_delay);
751 
752  // Assign the computational delay with respect to the dependency which
753  // completes the latest.
754  if (new_record->compDelay == -1)
755  new_record->compDelay = comp_delay;
756  else
757  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
758  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
759  new_record->compDelay);
760 }
761 
762 Tick
764 {
765  if (isLoad()) {
766  // Execution tick for a load instruction is when the request was sent,
767  // that is executeTick.
768  return executeTick;
769  } else if (isStore()) {
770  // Execution tick for a store instruction is when the request was sent,
771  // that is commitTick.
772  return commitTick;
773  } else {
774  // Execution tick for a non load/store instruction is when the register
775  // value was written to, that is commitTick.
776  return toCommitTick;
777  }
778 }
779 
780 void
781 ElasticTrace::writeDepTrace(uint32_t num_to_write)
782 {
783  // Write the trace with fields as follows:
784  // Instruction sequence number
785  // If instruction was a load
786  // If instruction was a store
787  // If instruction has addr
788  // If instruction has size
789  // If instruction has flags
790  // List of order dependencies - optional, repeated
791  // Computational delay with respect to last completed dependency
792  // List of physical register RAW dependencies - optional, repeated
793  // Weight of a node equal to no. of filtered nodes before it - optional
794  uint16_t num_filtered_nodes = 0;
795  depTraceItr dep_trace_itr(depTrace.begin());
796  depTraceItr dep_trace_itr_start = dep_trace_itr;
797  while (num_to_write > 0) {
798  TraceInfo* temp_ptr = *dep_trace_itr;
799  assert(temp_ptr->type != Record::INVALID);
800  // If no node dependends on a comp node then there is no reason to
801  // track the comp node in the dependency graph. We filter out such
802  // nodes but count them and add a weight field to the subsequent node
803  // that we do include in the trace.
804  if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
805  DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
806  "is as follows:\n", temp_ptr->instNum);
807  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
808  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
809  DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
810  "size %i, flags %i\n", temp_ptr->physAddr,
811  temp_ptr->size, temp_ptr->reqFlags);
812  } else {
813  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
814  }
815  if (firstWin && temp_ptr->compDelay == -1) {
816  if (temp_ptr->isLoad()) {
817  temp_ptr->compDelay = temp_ptr->executeTick;
818  } else if (temp_ptr->isStore()) {
819  temp_ptr->compDelay = temp_ptr->commitTick;
820  } else {
821  temp_ptr->compDelay = temp_ptr->toCommitTick;
822  }
823  }
824  assert(temp_ptr->compDelay != -1);
825  DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
826  temp_ptr->compDelay);
827 
828  // Create a protobuf message for the dependency record
829  ProtoMessage::InstDepRecord dep_pkt;
830  dep_pkt.set_seq_num(temp_ptr->instNum);
831  dep_pkt.set_type(temp_ptr->type);
832  dep_pkt.set_pc(temp_ptr->pc);
833  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
834  dep_pkt.set_flags(temp_ptr->reqFlags);
835  dep_pkt.set_p_addr(temp_ptr->physAddr);
836  // If tracing of virtual addresses is enabled, set the optional
837  // field for it
838  if (traceVirtAddr)
839  dep_pkt.set_v_addr(temp_ptr->virtAddr);
840  dep_pkt.set_size(temp_ptr->size);
841  }
842  dep_pkt.set_comp_delay(temp_ptr->compDelay);
843  if (temp_ptr->robDepList.empty()) {
844  DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
845  }
846  while (!temp_ptr->robDepList.empty()) {
847  DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
848  temp_ptr->robDepList.front());
849  dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
850  temp_ptr->robDepList.pop_front();
851  }
852  if (temp_ptr->physRegDepList.empty()) {
853  DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
854  }
855  while (!temp_ptr->physRegDepList.empty()) {
856  DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
857  temp_ptr->physRegDepList.front());
858  dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
859  temp_ptr->physRegDepList.pop_front();
860  }
861  if (num_filtered_nodes != 0) {
862  // Set the weight of this node as the no. of filtered nodes
863  // between this node and the last node that we wrote to output
864  // stream. The weight will be used during replay to model ROB
865  // occupancy of filtered nodes.
866  dep_pkt.set_weight(num_filtered_nodes);
867  num_filtered_nodes = 0;
868  }
869  // Write the message to the protobuf output stream
870  dataTraceStream->write(dep_pkt);
871  } else {
872  // Don't write the node to the trace but note that we have filtered
873  // out a node.
875  ++num_filtered_nodes;
876  }
877  dep_trace_itr++;
878  traceInfoMap.erase(temp_ptr->instNum);
879  delete temp_ptr;
880  num_to_write--;
881  }
882  depTrace.erase(dep_trace_itr_start, dep_trace_itr);
883 }
884 
886  : statistics::Group(parent),
887  ADD_STAT(numRegDep, statistics::units::Count::get(),
888  "Number of register dependencies recorded during tracing"),
889  ADD_STAT(numOrderDepStores, statistics::units::Count::get(),
890  "Number of commit order (rob) dependencies for a store "
891  "recorded on a past load/store during tracing"),
892  ADD_STAT(numIssueOrderDepLoads, statistics::units::Count::get(),
893  "Number of loads that got assigned issue order dependency "
894  "because they were dependency-free"),
895  ADD_STAT(numIssueOrderDepStores, statistics::units::Count::get(),
896  "Number of stores that got assigned issue order dependency "
897  "because they were dependency-free"),
898  ADD_STAT(numIssueOrderDepOther, statistics::units::Count::get(),
899  "Number of non load/store insts that got assigned issue order "
900  "dependency because they were dependency-free"),
901  ADD_STAT(numFilteredNodes, statistics::units::Count::get(),
902  "No. of nodes filtered out before writing the output trace"),
903  ADD_STAT(maxNumDependents, statistics::units::Count::get(),
904  "Maximum number or dependents on any instruction"),
905  ADD_STAT(maxTempStoreSize, statistics::units::Count::get(),
906  "Maximum size of the temporary store during the run"),
907  ADD_STAT(maxPhysRegDepMapSize, statistics::units::Count::get(),
908  "Maximum size of register dependency map")
909 {
910 }
911 
912 const std::string&
914 {
915  return Record::RecordType_Name(type);
916 }
917 
918 void
920 {
921  // Write to trace all records in the depTrace.
922  writeDepTrace(depTrace.size());
923  // Delete the stream objects
924  delete dataTraceStream;
925  delete instTraceStream;
926 }
927 
928 } // namespace o3
929 } // namespace gem5
#define DPRINTFR(x,...)
Definition: trace.hh:200
#define DPRINTF(x,...)
Definition: trace.hh:186
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition: protoio.hh:91
void write(const google::protobuf::Message &msg)
Write a message to the stream, preprending it with the message size.
Definition: protoio.cc:84
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:284
static Counter numSimulatedInsts()
Definition: base.hh:592
std::string resolve(const std::string &name) const
Returns relative file names prepended with name of this directory.
Definition: output.cc:204
Physical register ID.
Definition: reg_class.hh:392
constexpr const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:273
const RegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:449
ProbeListenerArg generates a listener for the class of Arg and the class type T which is the class co...
Definition: probe.hh:229
This class is a minimal wrapper around SimObject.
Definition: probe.hh:108
std::vector< ProbeListener * > listeners
Definition: probe.hh:111
If you want a reference counting pointer to a mutable object, create it like this:
Definition: refcnt.hh:127
Register ID: describe an architectural register with its class and index.
Definition: reg_class.hh:91
constexpr bool is(RegClassType reg_class) const
Definition: reg_class.hh:267
constexpr const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:273
virtual void scheduleInstCountEvent(Event *event, Tick count)=0
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:94
The elastic trace is a type of probe listener and listens to probe points in multiple stages of the O...
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record's Rob dependency ...
ElasticTrace(const ElasticTraceParams &params)
Constructor.
CPU * cpu
Pointer to the O3CPU that is this listener's parent a.k.a.
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
void regEtraceListeners()
Register all listeners.
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight.
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call.
std::unordered_map< RegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object.
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request.
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
gem5::o3::ElasticTrace::ElasticTraceStats stats
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
Statistics container.
Definition: group.hh:94
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:622
STL pair class.
Definition: stl.hh:58
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
Tick executeTick
Timestamp when instruction was first processed by execute stage.
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
const std::string & typeToStr() const
Return string specifying the type of the node.
bool isLoad() const
Is the record a load.
std::list< InstSeqNum > robDepList
std::list< InstSeqNum > physRegDepList
int64_t compDelay
Computational delay after the last dependent inst.
bool isComp() const
Is the record a fetch triggering an Icache request.
RecordType type
The type of trace record for the instruction node.
bool isStore() const
Is the record a store.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
#define inform(...)
Definition: logging.hh:247
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:48
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
OutputDirectory simout
Definition: output.cc:62
uint64_t Tick
Tick count type.
Definition: types.hh:58
const Tick MaxTick
Definition: types.hh:60
void registerExitCallback(const std::function< void()> &callback)
Register an exit callback.
Definition: core.cc:146
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
uint64_t InstSeqNum
Definition: inst_seq.hh:40
@ InvalidRegClass
Definition: reg_class.hh:69
@ MiscRegClass
Control (misc) register.
Definition: reg_class.hh:68
Declaration of the Packet class.
statistics::Scalar maxNumDependents
Maximum number of dependents on any instruction.
statistics::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
statistics::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free.
statistics::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free.
ElasticTraceStats(statistics::Group *parent)
statistics::Scalar numRegDep
Number of register dependencies recorded during tracing.
statistics::Scalar numFilteredNodes
Number of filtered nodes.
statistics::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
statistics::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
Tick getExecuteTick() const
Get the execute tick of the instruction.
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:31 for gem5 by doxygen 1.9.1