gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
elastic_trace.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2015 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
39 
40 #include "base/callback.hh"
41 #include "base/output.hh"
42 #include "base/trace.hh"
43 #include "cpu/reg_class.hh"
44 #include "debug/ElasticTrace.hh"
45 #include "mem/packet.hh"
46 
47 ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
48  : ProbeListenerObject(params),
49  regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
50  firstWin(true),
52  depWindowSize(params->depWindowSize),
53  dataTraceStream(nullptr),
54  instTraceStream(nullptr),
55  startTraceInst(params->startTraceInst),
56  allProbesReg(false),
57  traceVirtAddr(params->traceVirtAddr)
58 {
59  cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
60  fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
61  "support dependency tracing.\n", name());
62 
63  fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
64  "Recommended size is 3x ROB size in the O3CPU.\n");
65 
66  fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
67  "single-threaded workload only", cpu->numThreads, name());
68  // Initialize the protobuf output stream
69  fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
70  "trace file path to instFetchTraceFile");
71  fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
72  "trace file path to dataDepTraceFile");
73  std::string filename = simout.resolve(name() + "." +
74  params->instFetchTraceFile);
75  instTraceStream = new ProtoOutputStream(filename);
76  filename = simout.resolve(name() + "." + params->dataDepTraceFile);
77  dataTraceStream = new ProtoOutputStream(filename);
78  // Create a protobuf message for the header and write it to the stream
79  ProtoMessage::PacketHeader inst_pkt_header;
80  inst_pkt_header.set_obj_id(name());
81  inst_pkt_header.set_tick_freq(SimClock::Frequency);
82  instTraceStream->write(inst_pkt_header);
83  // Create a protobuf message for the header and write it to
84  // the stream
85  ProtoMessage::InstDepRecordHeader data_rec_header;
86  data_rec_header.set_obj_id(name());
87  data_rec_header.set_tick_freq(SimClock::Frequency);
88  data_rec_header.set_window_size(depWindowSize);
89  dataTraceStream->write(data_rec_header);
90  // Register a callback to flush trace records and close the output streams.
94 }
95 
96 void
98 {
99  inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
101  if (startTraceInst == 0) {
102  // If we want to start tracing from the start of the simulation,
103  // register all elastic trace probes now.
105  } else {
106  // Schedule an event to register all elastic trace probes when
107  // specified no. of instructions are committed.
110  }
111 }
112 
113 void
115 {
116  assert(!allProbesReg);
117  inform("@%llu: No. of instructions committed = %llu, registering elastic"
118  " probe listeners", curTick(), cpu->numSimulatedInsts());
119  // Create new listeners: provide method to be called upon a notify() for
120  // each probe point.
122  "FetchRequest", &ElasticTrace::fetchReqTrace));
124  DynInstConstPtr>(this, "Execute",
127  DynInstConstPtr>(this, "ToCommit",
130  DynInstConstPtr>(this, "Rename",
133  "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
135  DynInstConstPtr>(this, "Squash",
138  DynInstConstPtr>(this, "Commit",
140  allProbesReg = true;
141 }
142 
143 void
145 {
146 
147  DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
148  (MemCmd::ReadReq),
149  req->getPC(), req->getVaddr(), req->getPaddr(),
150  req->getFlags(), req->getSize(), curTick());
151 
152  // Create a protobuf message including the request fields necessary to
153  // recreate the request in the TraceCPU.
154  ProtoMessage::Packet inst_fetch_pkt;
155  inst_fetch_pkt.set_tick(curTick());
156  inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
157  inst_fetch_pkt.set_pc(req->getPC());
158  inst_fetch_pkt.set_flags(req->getFlags());
159  inst_fetch_pkt.set_addr(req->getPaddr());
160  inst_fetch_pkt.set_size(req->getSize());
161  // Write the message to the stream.
162  instTraceStream->write(inst_fetch_pkt);
163 }
164 
165 void
167 {
168 
169  // In a corner case, a retired instruction is propagated backward to the
170  // IEW instruction queue to handle some side-channel information. But we
171  // must not process an instruction again. So we test the sequence number
172  // against the lastClearedSeqNum and skip adding the instruction for such
173  // corner cases.
174  if (dyn_inst->seqNum <= lastClearedSeqNum) {
175  DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
176  has already retired (mostly squashed)", dyn_inst->seqNum);
177  // Do nothing as program has proceeded and this inst has been
178  // propagated backwards to handle something.
179  return;
180  }
181 
182  DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
183  curTick());
184  // Either the execution info object will already exist if this
185  // instruction had a register dependency recorded in the rename probe
186  // listener before entering execute stage or it will not exist and will
187  // need to be created here.
188  InstExecInfo* exec_info_ptr;
189  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
190  if (itr_exec_info != tempStore.end()) {
191  exec_info_ptr = itr_exec_info->second;
192  } else {
193  exec_info_ptr = new InstExecInfo;
194  tempStore[dyn_inst->seqNum] = exec_info_ptr;
195  }
196 
197  exec_info_ptr->executeTick = curTick();
198  maxTempStoreSize = std::max(tempStore.size(),
199  (std::size_t)maxTempStoreSize.value());
200 }
201 
202 void
204 {
205  // If tracing has just been enabled then the instruction at this stage of
206  // execution is far enough that we cannot gather info about its past like
207  // the tick it started execution. Simply return until we see an instruction
208  // that is found in the tempStore.
209  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
210  if (itr_exec_info == tempStore.end()) {
211  DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
212  " skipping.\n", dyn_inst->seqNum);
213  return;
214  }
215 
216  DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
217  curTick());
218  InstExecInfo* exec_info_ptr = itr_exec_info->second;
219  exec_info_ptr->toCommitTick = curTick();
220 
221 }
222 
223 void
225 {
226  // Get the sequence number of the instruction
227  InstSeqNum seq_num = dyn_inst->seqNum;
228 
229  assert(dyn_inst->seqNum > lastClearedSeqNum);
230 
231  // Since this is the first probe activated in the pipeline, create
232  // a new execution info object to track this instruction as it
233  // progresses through the pipeline.
234  InstExecInfo* exec_info_ptr = new InstExecInfo;
235  tempStore[seq_num] = exec_info_ptr;
236 
237  // Loop through the source registers and look up the dependency map. If
238  // the source register entry is found in the dependency map, add a
239  // dependency on the last writer.
240  int8_t max_regs = dyn_inst->numSrcRegs();
241  for (int src_idx = 0; src_idx < max_regs; src_idx++) {
242 
243  const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
244  if (!src_reg.isMiscReg() &&
245  !src_reg.isZeroReg()) {
246  // Get the physical register index of the i'th source register.
247  PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
248  DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
249  " %i (%s)\n", seq_num,
250  phys_src_reg->flatIndex(), phys_src_reg->className());
251  auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
252  if (itr_writer != physRegDepMap.end()) {
253  InstSeqNum last_writer = itr_writer->second;
254  // Additionally the dependency distance is kept less than the
255  // window size parameter to limit the memory allocation to
256  // nodes in the graph. If the window were tending to infinite
257  // we would have to load a large number of node objects during
258  // replay.
259  if (seq_num - last_writer < depWindowSize) {
260  // Record a physical register dependency.
261  exec_info_ptr->physRegDepSet.insert(last_writer);
262  }
263  }
264 
265  }
266 
267  }
268 
269  // Loop through the destination registers of this instruction and update
270  // the physical register dependency map for last writers to registers.
271  max_regs = dyn_inst->numDestRegs();
272  for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
273  // For data dependency tracking the register must be an int, float or
274  // CC register and not a Misc register.
275  const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
276  if (!dest_reg.isMiscReg() &&
277  !dest_reg.isZeroReg()) {
278  // Get the physical register index of the i'th destination
279  // register.
280  PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
281  DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
282  " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
283  dest_reg.className());
284  physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
285  }
286  }
287  maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
288  (std::size_t)maxPhysRegDepMapSize.value());
289 }
290 
291 void
293 {
294  DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
295  inst_reg_pair.second);
296  auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
297  if (itr_regdep_map != physRegDepMap.end())
298  physRegDepMap.erase(itr_regdep_map);
299 }
300 
301 void
303 {
304  // If the squashed instruction was squashed before being processed by
305  // execute stage then it will not be in the temporary store. In this case
306  // do nothing and return.
307  auto itr_exec_info = tempStore.find(head_inst->seqNum);
308  if (itr_exec_info == tempStore.end())
309  return;
310 
311  // If there is a squashed load for which a read request was
312  // sent before it got squashed then add it to the trace.
313  DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
314  head_inst->seqNum);
315  // Get pointer to the execution info object corresponding to the inst.
316  InstExecInfo* exec_info_ptr = itr_exec_info->second;
317  if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
318  exec_info_ptr->toCommitTick != MaxTick &&
319  head_inst->hasRequest() &&
320  head_inst->getFault() == NoFault) {
321  // Add record to depTrace with commit parameter as false.
322  addDepTraceRecord(head_inst, exec_info_ptr, false);
323  }
324  // As the information contained is no longer needed, remove the execution
325  // info object from the temporary store.
326  clearTempStoreUntil(head_inst);
327 }
328 
329 void
331 {
332  DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
333  head_inst->seqNum);
334 
335  // Add the instruction to the depTrace.
336  if (!head_inst->isNop()) {
337 
338  // If tracing has just been enabled then the instruction at this stage
339  // of execution is far enough that we cannot gather info about its past
340  // like the tick it started execution. Simply return until we see an
341  // instruction that is found in the tempStore.
342  auto itr_temp_store = tempStore.find(head_inst->seqNum);
343  if (itr_temp_store == tempStore.end()) {
344  DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
345  "store, skipping.\n", head_inst->seqNum);
346  return;
347  }
348 
349  // Get pointer to the execution info object corresponding to the inst.
350  InstExecInfo* exec_info_ptr = itr_temp_store->second;
351  assert(exec_info_ptr->executeTick != MaxTick);
352  assert(exec_info_ptr->toCommitTick != MaxTick);
353 
354  // Check if the instruction had a fault, if it predicated false and
355  // thus previous register values were restored or if it was a
356  // load/store that did not have a request (e.g. when the size of the
357  // request is zero). In all these cases the instruction is set as
358  // executed and is picked up by the commit probe listener. But a
359  // request is not issued and registers are not written. So practically,
360  // skipping these should not hurt as execution would not stall on them.
361  // Alternatively, these could be included merely as a compute node in
362  // the graph. Removing these for now. If correlation accuracy needs to
363  // be improved in future these can be turned into comp nodes at the
364  // cost of bigger traces.
365  if (head_inst->getFault() != NoFault) {
366  DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
367  "skip adding it to the trace\n",
368  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
369  head_inst->seqNum);
370  } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
371  DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
372  "skip adding it to the trace\n", head_inst->seqNum);
373  } else if (!head_inst->readPredicate()) {
374  DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
375  "skip adding it to the trace\n",
376  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
377  head_inst->seqNum);
378  } else {
379  // Add record to depTrace with commit parameter as true.
380  addDepTraceRecord(head_inst, exec_info_ptr, true);
381  }
382  }
383  // As the information contained is no longer needed, remove the execution
384  // info object from the temporary store.
385  clearTempStoreUntil(head_inst);
386 }
387 
388 void
390  InstExecInfo* exec_info_ptr, bool commit)
391 {
392  // Create a record to assign dynamic intruction related fields.
393  TraceInfo* new_record = new TraceInfo;
394  // Add to map for sequence number look up to retrieve the TraceInfo pointer
395  traceInfoMap[head_inst->seqNum] = new_record;
396 
397  // Assign fields from the instruction
398  new_record->instNum = head_inst->seqNum;
399  new_record->commit = commit;
400  new_record->type = head_inst->isLoad() ? Record::LOAD :
401  (head_inst->isStore() ? Record::STORE :
402  Record::COMP);
403 
404  // Assign fields for creating a request in case of a load/store
405  new_record->reqFlags = head_inst->memReqFlags;
406  new_record->virtAddr = head_inst->effAddr;
407  new_record->physAddr = head_inst->physEffAddr;
408  // Currently the tracing does not support split requests.
409  new_record->size = head_inst->effSize;
410  new_record->pc = head_inst->instAddr();
411 
412  // Assign the timing information stored in the execution info object
413  new_record->executeTick = exec_info_ptr->executeTick;
414  new_record->toCommitTick = exec_info_ptr->toCommitTick;
415  new_record->commitTick = curTick();
416 
417  // Assign initial values for number of dependents and computational delay
418  new_record->numDepts = 0;
419  new_record->compDelay = -1;
420 
421  // The physical register dependency set of the first instruction is
422  // empty. Since there are no records in the depTrace at this point, the
423  // case of adding an ROB dependency by using a reverse iterator is not
424  // applicable. Thus, populate the fields of the record corresponding to the
425  // first instruction and return.
426  if (depTrace.empty()) {
427  // Store the record in depTrace.
428  depTrace.push_back(new_record);
429  DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
430  new_record->instNum);
431  return;
432  }
433 
434  // Clear register dependencies for squashed loads as they may be dependent
435  // on squashed instructions and we do not add those to the trace.
436  if (head_inst->isLoad() && !commit) {
437  (exec_info_ptr->physRegDepSet).clear();
438  }
439 
440  // Assign the register dependencies stored in the execution info object
441  std::set<InstSeqNum>::const_iterator dep_set_it;
442  for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
443  dep_set_it != (exec_info_ptr->physRegDepSet).end();
444  ++dep_set_it) {
445  auto trace_info_itr = traceInfoMap.find(*dep_set_it);
446  if (trace_info_itr != traceInfoMap.end()) {
447  // The register dependency is valid. Assign it and calculate
448  // computational delay
449  new_record->physRegDepList.push_back(*dep_set_it);
450  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
451  "%lli\n", new_record->instNum, *dep_set_it);
452  TraceInfo* reg_dep = trace_info_itr->second;
453  reg_dep->numDepts++;
454  compDelayPhysRegDep(reg_dep, new_record);
455  ++numRegDep;
456  } else {
457  // The instruction that this has a register dependency on was
458  // not added to the trace because of one of the following
459  // 1. it was an instruction that had a fault
460  // 2. it was an instruction that was predicated false and
461  // previous register values were restored
462  // 3. it was load/store that did not have a request (e.g. when
463  // the size of the request is zero but this may not be a fault)
464  // In all these cases the instruction is set as executed and is
465  // picked up by the commit probe listener. But a request is not
466  // issued and registers are not written to in these cases.
467  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
468  "%lli is skipped\n",new_record->instNum, *dep_set_it);
469  }
470  }
471 
472  // Check for and assign an ROB dependency in addition to register
473  // dependency before adding the record to the trace.
474  // As stores have to commit in order a store is dependent on the last
475  // committed load/store. This is recorded in the ROB dependency.
476  if (head_inst->isStore()) {
477  // Look up store-after-store order dependency
478  updateCommitOrderDep(new_record, false);
479  // Look up store-after-load order dependency
480  updateCommitOrderDep(new_record, true);
481  }
482 
483  // In case a node is dependency-free or its dependency got discarded
484  // because it was outside the window, it is marked ready in the ROB at the
485  // time of issue. A request is sent as soon as possible. To model this, a
486  // node is assigned an issue order dependency on a committed instruction
487  // that completed earlier than it. This is done to avoid the problem of
488  // determining the issue times of such dependency-free nodes during replay
489  // which could lead to too much parallelism, thinking conservatively.
490  if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
491  updateIssueOrderDep(new_record);
492  }
493 
494  // Store the record in depTrace.
495  depTrace.push_back(new_record);
496  DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
497  (commit ? "committed" : "squashed"), new_record->instNum);
498 
499  // To process the number of records specified by depWindowSize in the
500  // forward direction, the depTrace must have twice as many records
501  // to check for dependencies.
502  if (depTrace.size() == 2 * depWindowSize) {
503 
504  DPRINTF(ElasticTrace, "Writing out trace...\n");
505 
506  // Write out the records which have been processed to the trace
507  // and remove them from the depTrace.
509 
510  // After the first window, writeDepTrace() must check for valid
511  // compDelay.
512  firstWin = false;
513  }
514 }
515 
516 void
518  bool find_load_not_store)
519 {
520  assert(new_record->isStore());
521  // Iterate in reverse direction to search for the last committed
522  // load/store that completed earlier than the new record
523  depTraceRevItr from_itr(depTrace.end());
524  depTraceRevItr until_itr(depTrace.begin());
525  TraceInfo* past_record = *from_itr;
526  uint32_t num_go_back = 0;
527 
528  // The execution time of this store is when it is sent, that is committed
529  Tick execute_tick = curTick();
530  // Search for store-after-load or store-after-store order dependency
531  while (num_go_back < depWindowSize && from_itr != until_itr) {
532  if (find_load_not_store) {
533  // Check if previous inst is a load completed earlier by comparing
534  // with execute tick
535  if (hasLoadCompleted(past_record, execute_tick)) {
536  // Assign rob dependency and calculate the computational delay
537  assignRobDep(past_record, new_record);
539  return;
540  }
541  } else {
542  // Check if previous inst is a store sent earlier by comparing with
543  // execute tick
544  if (hasStoreCommitted(past_record, execute_tick)) {
545  // Assign rob dependency and calculate the computational delay
546  assignRobDep(past_record, new_record);
548  return;
549  }
550  }
551  ++from_itr;
552  past_record = *from_itr;
553  ++num_go_back;
554  }
555 }
556 
557 void
559 {
560  // Interate in reverse direction to search for the last committed
561  // record that completed earlier than the new record
562  depTraceRevItr from_itr(depTrace.end());
563  depTraceRevItr until_itr(depTrace.begin());
564  TraceInfo* past_record = *from_itr;
565 
566  uint32_t num_go_back = 0;
567  Tick execute_tick = 0;
568 
569  if (new_record->isLoad()) {
570  // The execution time of a load is when a request is sent
571  execute_tick = new_record->executeTick;
573  } else if (new_record->isStore()) {
574  // The execution time of a store is when it is sent, i.e. committed
575  execute_tick = curTick();
577  } else {
578  // The execution time of a non load/store is when it completes
579  execute_tick = new_record->toCommitTick;
581  }
582 
583  // We search if this record has an issue order dependency on a past record.
584  // Once we find it, we update both the new record and the record it depends
585  // on and return.
586  while (num_go_back < depWindowSize && from_itr != until_itr) {
587  // Check if a previous inst is a load sent earlier, or a store sent
588  // earlier, or a comp inst completed earlier by comparing with execute
589  // tick
590  if (hasLoadBeenSent(past_record, execute_tick) ||
591  hasStoreCommitted(past_record, execute_tick) ||
592  hasCompCompleted(past_record, execute_tick)) {
593  // Assign rob dependency and calculate the computational delay
594  assignRobDep(past_record, new_record);
595  return;
596  }
597  ++from_itr;
598  past_record = *from_itr;
599  ++num_go_back;
600  }
601 }
602 
603 void
604 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
605  DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
606  new_record->typeToStr(), new_record->instNum,
607  past_record->instNum);
608  // Add dependency on past record
609  new_record->robDepList.push_back(past_record->instNum);
610  // Update new_record's compute delay with respect to the past record
611  compDelayRob(past_record, new_record);
612  // Increment number of dependents of the past record
613  ++(past_record->numDepts);
614  // Update stat to log max number of dependents
615  maxNumDependents = std::max(past_record->numDepts,
616  (uint32_t)maxNumDependents.value());
617 }
618 
619 bool
621  Tick execute_tick) const
622 {
623  return (past_record->isStore() && past_record->commitTick <= execute_tick);
624 }
625 
626 bool
628  Tick execute_tick) const
629 {
630  return(past_record->isLoad() && past_record->commit &&
631  past_record->toCommitTick <= execute_tick);
632 }
633 
634 bool
636  Tick execute_tick) const
637 {
638  // Check if previous inst is a load sent earlier than this
639  return (past_record->isLoad() && past_record->commit &&
640  past_record->executeTick <= execute_tick);
641 }
642 
643 bool
645  Tick execute_tick) const
646 {
647  return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
648 }
649 
650 void
652 {
653  // Clear from temp store starting with the execution info object
654  // corresponding the head_inst and continue clearing by decrementing the
655  // sequence number until the last cleared sequence number.
656  InstSeqNum temp_sn = (head_inst->seqNum);
657  while (temp_sn > lastClearedSeqNum) {
658  auto itr_exec_info = tempStore.find(temp_sn);
659  if (itr_exec_info != tempStore.end()) {
660  InstExecInfo* exec_info_ptr = itr_exec_info->second;
661  // Free allocated memory for the info object
662  delete exec_info_ptr;
663  // Remove entry from temporary store
664  tempStore.erase(itr_exec_info);
665  }
666  temp_sn--;
667  }
668  // Update the last cleared sequence number to that of the head_inst
669  lastClearedSeqNum = head_inst->seqNum;
670 }
671 
672 void
674 {
675  // The computation delay is the delay between the completion tick of the
676  // inst. pointed to by past_record and the execution tick of its dependent
677  // inst. pointed to by new_record.
678  int64_t comp_delay = -1;
679  Tick execution_tick = 0, completion_tick = 0;
680 
681  DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
682  new_record->instNum, past_record->instNum);
683 
684  // Get the tick when the node is executed as per the modelling of
685  // computation delay
686  execution_tick = new_record->getExecuteTick();
687 
688  if (past_record->isLoad()) {
689  if (new_record->isStore()) {
690  completion_tick = past_record->toCommitTick;
691  } else {
692  completion_tick = past_record->executeTick;
693  }
694  } else if (past_record->isStore()) {
695  completion_tick = past_record->commitTick;
696  } else if (past_record->isComp()){
697  completion_tick = past_record->toCommitTick;
698  }
699  assert(execution_tick >= completion_tick);
700  comp_delay = execution_tick - completion_tick;
701 
702  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
703  execution_tick, completion_tick, comp_delay);
704 
705  // Assign the computational delay with respect to the dependency which
706  // completes the latest.
707  if (new_record->compDelay == -1)
708  new_record->compDelay = comp_delay;
709  else
710  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
711  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
712  new_record->compDelay);
713 }
714 
715 void
717  TraceInfo* new_record)
718 {
719  // The computation delay is the delay between the completion tick of the
720  // inst. pointed to by past_record and the execution tick of its dependent
721  // inst. pointed to by new_record.
722  int64_t comp_delay = -1;
723  Tick execution_tick = 0, completion_tick = 0;
724 
725  DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
726  " %lli.\n", new_record->instNum, past_record->instNum);
727 
728  // Get the tick when the node is executed as per the modelling of
729  // computation delay
730  execution_tick = new_record->getExecuteTick();
731 
732  // When there is a physical register dependency on an instruction, the
733  // completion tick of that instruction is when it wrote to the register,
734  // that is toCommitTick. In case, of a store updating a destination
735  // register, this is approximated to commitTick instead
736  if (past_record->isStore()) {
737  completion_tick = past_record->commitTick;
738  } else {
739  completion_tick = past_record->toCommitTick;
740  }
741  assert(execution_tick >= completion_tick);
742  comp_delay = execution_tick - completion_tick;
743  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
744  execution_tick, completion_tick, comp_delay);
745 
746  // Assign the computational delay with respect to the dependency which
747  // completes the latest.
748  if (new_record->compDelay == -1)
749  new_record->compDelay = comp_delay;
750  else
751  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
752  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
753  new_record->compDelay);
754 }
755 
756 Tick
758 {
759  if (isLoad()) {
760  // Execution tick for a load instruction is when the request was sent,
761  // that is executeTick.
762  return executeTick;
763  } else if (isStore()) {
764  // Execution tick for a store instruction is when the request was sent,
765  // that is commitTick.
766  return commitTick;
767  } else {
768  // Execution tick for a non load/store instruction is when the register
769  // value was written to, that is commitTick.
770  return toCommitTick;
771  }
772 }
773 
774 void
775 ElasticTrace::writeDepTrace(uint32_t num_to_write)
776 {
777  // Write the trace with fields as follows:
778  // Instruction sequence number
779  // If instruction was a load
780  // If instruction was a store
781  // If instruction has addr
782  // If instruction has size
783  // If instruction has flags
784  // List of order dependencies - optional, repeated
785  // Computational delay with respect to last completed dependency
786  // List of physical register RAW dependencies - optional, repeated
787  // Weight of a node equal to no. of filtered nodes before it - optional
788  uint16_t num_filtered_nodes = 0;
789  depTraceItr dep_trace_itr(depTrace.begin());
790  depTraceItr dep_trace_itr_start = dep_trace_itr;
791  while (num_to_write > 0) {
792  TraceInfo* temp_ptr = *dep_trace_itr;
793  assert(temp_ptr->type != Record::INVALID);
794  // If no node dependends on a comp node then there is no reason to
795  // track the comp node in the dependency graph. We filter out such
796  // nodes but count them and add a weight field to the subsequent node
797  // that we do include in the trace.
798  if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
799  DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
800  "is as follows:\n", temp_ptr->instNum);
801  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
802  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
803  DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
804  "size %i, flags %i\n", temp_ptr->physAddr,
805  temp_ptr->size, temp_ptr->reqFlags);
806  } else {
807  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
808  }
809  if (firstWin && temp_ptr->compDelay == -1) {
810  if (temp_ptr->isLoad()) {
811  temp_ptr->compDelay = temp_ptr->executeTick;
812  } else if (temp_ptr->isStore()) {
813  temp_ptr->compDelay = temp_ptr->commitTick;
814  } else {
815  temp_ptr->compDelay = temp_ptr->toCommitTick;
816  }
817  }
818  assert(temp_ptr->compDelay != -1);
819  DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
820  temp_ptr->compDelay);
821 
822  // Create a protobuf message for the dependency record
823  ProtoMessage::InstDepRecord dep_pkt;
824  dep_pkt.set_seq_num(temp_ptr->instNum);
825  dep_pkt.set_type(temp_ptr->type);
826  dep_pkt.set_pc(temp_ptr->pc);
827  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
828  dep_pkt.set_flags(temp_ptr->reqFlags);
829  dep_pkt.set_p_addr(temp_ptr->physAddr);
830  // If tracing of virtual addresses is enabled, set the optional
831  // field for it
832  if (traceVirtAddr)
833  dep_pkt.set_v_addr(temp_ptr->virtAddr);
834  dep_pkt.set_size(temp_ptr->size);
835  }
836  dep_pkt.set_comp_delay(temp_ptr->compDelay);
837  if (temp_ptr->robDepList.empty()) {
838  DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
839  }
840  while (!temp_ptr->robDepList.empty()) {
841  DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
842  temp_ptr->robDepList.front());
843  dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
844  temp_ptr->robDepList.pop_front();
845  }
846  if (temp_ptr->physRegDepList.empty()) {
847  DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
848  }
849  while (!temp_ptr->physRegDepList.empty()) {
850  DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
851  temp_ptr->physRegDepList.front());
852  dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
853  temp_ptr->physRegDepList.pop_front();
854  }
855  if (num_filtered_nodes != 0) {
856  // Set the weight of this node as the no. of filtered nodes
857  // between this node and the last node that we wrote to output
858  // stream. The weight will be used during replay to model ROB
859  // occupancy of filtered nodes.
860  dep_pkt.set_weight(num_filtered_nodes);
861  num_filtered_nodes = 0;
862  }
863  // Write the message to the protobuf output stream
864  dataTraceStream->write(dep_pkt);
865  } else {
866  // Don't write the node to the trace but note that we have filtered
867  // out a node.
869  ++num_filtered_nodes;
870  }
871  dep_trace_itr++;
872  traceInfoMap.erase(temp_ptr->instNum);
873  delete temp_ptr;
874  num_to_write--;
875  }
876  depTrace.erase(dep_trace_itr_start, dep_trace_itr);
877 }
878 
879 void
882 
883  using namespace Stats;
884  numRegDep
885  .name(name() + ".numRegDep")
886  .desc("Number of register dependencies recorded during tracing")
887  ;
888 
890  .name(name() + ".numOrderDepStores")
891  .desc("Number of commit order (rob) dependencies for a store recorded"
892  " on a past load/store during tracing")
893  ;
894 
896  .name(name() + ".numIssueOrderDepLoads")
897  .desc("Number of loads that got assigned issue order dependency"
898  " because they were dependency-free")
899  ;
900 
902  .name(name() + ".numIssueOrderDepStores")
903  .desc("Number of stores that got assigned issue order dependency"
904  " because they were dependency-free")
905  ;
906 
908  .name(name() + ".numIssueOrderDepOther")
909  .desc("Number of non load/store insts that got assigned issue order"
910  " dependency because they were dependency-free")
911  ;
912 
914  .name(name() + ".numFilteredNodes")
915  .desc("No. of nodes filtered out before writing the output trace")
916  ;
917 
919  .name(name() + ".maxNumDependents")
920  .desc("Maximum number or dependents on any instruction")
921  ;
922 
924  .name(name() + ".maxTempStoreSize")
925  .desc("Maximum size of the temporary store during the run")
926  ;
927 
929  .name(name() + ".maxPhysRegDepMapSize")
930  .desc("Maximum size of register dependency map")
931  ;
932 }
933 
934 const std::string&
936 {
937  return Record::RecordType_Name(type);
938 }
939 
940 const std::string
942 {
943  return ProbeListenerObject::name();
944 }
945 
946 void
948 {
949  // Write to trace all records in the depTrace.
950  writeDepTrace(depTrace.size());
951  // Delete the stream objects
952  delete dataTraceStream;
953  delete instTraceStream;
954 }
955 
957 ElasticTraceParams::create()
958 {
959  return new ElasticTrace(this);
960 }
int64_t compDelay
Computational delay after the last dependent inst.
Stats::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
#define DPRINTF(x,...)
Definition: trace.hh:222
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
std::string resolve(const std::string &name) const
Returns relative file names prepended with name of this directory.
Definition: output.cc:201
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call...
ElasticTrace(const ElasticTraceParams *params)
Constructor.
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
OutputDirectory simout
Definition: output.cc:61
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition: protoio.hh:90
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
FullO3CPU< O3CPUImpl > * cpu
Pointer to the O3CPU that is this listener&#39;s parent a.k.a.
void regEtraceListeners()
Register all listeners.
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
void write(const google::protobuf::Message &msg)
Write a message to the stream, preprending it with the message size.
Definition: protoio.cc:82
Generic callback class.
Definition: callback.hh:39
bool isMiscReg() const
true if it is a condition-code physical register.
Definition: reg_class.hh:161
The elastic trace is a type of probe listener and listens to probe points in multiple stages of the O...
bool isStore() const
Is the record a store.
STL pair class.
Definition: stl.hh:58
ThreadID numThreads
Number of threads we&#39;re actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:374
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record&#39;s Rob dependency ...
bool isComp() const
Is the record a fetch triggering an Icache request.
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
bool isLoad() const
Is the record a load.
const std::string & typeToStr() const
Return string specifying the type of the node.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:46
If you want a reference counting pointer to a mutable object, create it like this: ...
Definition: refcnt.hh:118
Tick getExecuteTick() const
Get the execute tick of the instruction.
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
Stats::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free...
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
uint8_t type
Definition: inet.hh:328
#define inform(...)
Definition: logging.hh:209
Stats::Scalar numOrderDepStores
Number of stores that got assigned a commit order dependency on a past load/store.
const Tick MaxTick
Definition: types.hh:63
Tick curTick()
The current simulated tick.
Definition: core.hh:44
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
Stats::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free...
Stats::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
uint64_t Tick
Tick count type.
Definition: types.hh:61
virtual void scheduleInstCountEvent(Event *event, Tick count)=0
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
This class is a minimal wrapper around SimObject.
Definition: probe.hh:98
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object...
void registerExitCallback(Callback *callback)
Register an exit callback.
Definition: core.cc:140
uint64_t InstSeqNum
Definition: inst_seq.hh:37
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:199
std::list< InstSeqNum > physRegDepList
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
void regStats()
Register statistics for the elastic trace.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
const Params * params() const
Definition: sim_object.hh:118
Request::FlagsType reqFlags
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick. ...
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
std::vector< ProbeListener * > listeners
Definition: probe.hh:102
Physical register ID.
Definition: reg_class.hh:223
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
virtual const std::string name() const
Definition: sim_object.hh:128
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request...
const PhysRegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:305
Stats::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
Declaration of the Packet class.
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
std::list< InstSeqNum > robDepList
Stats::Scalar maxNumDependents
Maximum number of dependents on any instruction.
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:294
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
Tick executeTick
Timestamp when instruction was first processed by execute stage.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
Register ID: describe an architectural register with its class and index.
Definition: reg_class.hh:75
std::unordered_map< PhysRegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:64
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
Helper template class to turn a simple class member function into a callback.
Definition: callback.hh:62
Stats::Scalar numRegDep
Number of register dependencies recorded during tracing.
RecordType type
The type of trace record for the instruction node.
ProbeListenerArg generates a listener for the class of Arg and the class type T which is the class co...
Definition: probe.hh:215
const std::string name() const
Returns the name of the trace probe listener.
bool isZeroReg() const
Check if this is the zero register.
Definition: reg_class.hh:137
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:700
Stats::Scalar numFilteredNodes
Number of filtered nodes.
const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:202
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
#define DPRINTFR(...)
Definition: trace.hh:224
static int numSimulatedInsts()
Definition: cpu_dummy.hh:46

Generated on Mon Jun 8 2020 15:45:08 for gem5 by doxygen 1.8.13