gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
elastic_trace.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2015 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Authors: Radhika Jagtap
38  * Andreas Hansson
39  * Thomas Grass
40  */
41 
43 
44 #include "base/callback.hh"
45 #include "base/output.hh"
46 #include "base/trace.hh"
47 #include "cpu/reg_class.hh"
48 #include "debug/ElasticTrace.hh"
49 #include "mem/packet.hh"
50 
51 ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
52  : ProbeListenerObject(params),
53  regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
54  firstWin(true),
56  depWindowSize(params->depWindowSize),
57  dataTraceStream(nullptr),
58  instTraceStream(nullptr),
59  startTraceInst(params->startTraceInst),
60  allProbesReg(false),
61  traceVirtAddr(params->traceVirtAddr)
62 {
63  cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
64  fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
65  "support dependency tracing.\n", name());
66 
67  fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
68  "Recommended size is 3x ROB size in the O3CPU.\n");
69 
70  fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
71  "single-threaded workload only", cpu->numThreads, name());
72  // Initialize the protobuf output stream
73  fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
74  "trace file path to instFetchTraceFile");
75  fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
76  "trace file path to dataDepTraceFile");
77  std::string filename = simout.resolve(name() + "." +
78  params->instFetchTraceFile);
79  instTraceStream = new ProtoOutputStream(filename);
80  filename = simout.resolve(name() + "." + params->dataDepTraceFile);
81  dataTraceStream = new ProtoOutputStream(filename);
82  // Create a protobuf message for the header and write it to the stream
83  ProtoMessage::PacketHeader inst_pkt_header;
84  inst_pkt_header.set_obj_id(name());
85  inst_pkt_header.set_tick_freq(SimClock::Frequency);
86  instTraceStream->write(inst_pkt_header);
87  // Create a protobuf message for the header and write it to
88  // the stream
89  ProtoMessage::InstDepRecordHeader data_rec_header;
90  data_rec_header.set_obj_id(name());
91  data_rec_header.set_tick_freq(SimClock::Frequency);
92  data_rec_header.set_window_size(depWindowSize);
93  dataTraceStream->write(data_rec_header);
94  // Register a callback to flush trace records and close the output streams.
98 }
99 
100 void
102 {
103  inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
105  if (startTraceInst == 0) {
106  // If we want to start tracing from the start of the simulation,
107  // register all elastic trace probes now.
109  } else {
110  // Schedule an event to register all elastic trace probes when
111  // specified no. of instructions are committed.
114  }
115 }
116 
117 void
119 {
120  assert(!allProbesReg);
121  inform("@%llu: No. of instructions committed = %llu, registering elastic"
122  " probe listeners", curTick(), cpu->numSimulatedInsts());
123  // Create new listeners: provide method to be called upon a notify() for
124  // each probe point.
126  "FetchRequest", &ElasticTrace::fetchReqTrace));
128  DynInstConstPtr>(this, "Execute",
131  DynInstConstPtr>(this, "ToCommit",
134  DynInstConstPtr>(this, "Rename",
137  "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
139  DynInstConstPtr>(this, "Squash",
142  DynInstConstPtr>(this, "Commit",
144  allProbesReg = true;
145 }
146 
147 void
149 {
150 
151  DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
152  (MemCmd::ReadReq),
153  req->getPC(), req->getVaddr(), req->getPaddr(),
154  req->getFlags(), req->getSize(), curTick());
155 
156  // Create a protobuf message including the request fields necessary to
157  // recreate the request in the TraceCPU.
158  ProtoMessage::Packet inst_fetch_pkt;
159  inst_fetch_pkt.set_tick(curTick());
160  inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
161  inst_fetch_pkt.set_pc(req->getPC());
162  inst_fetch_pkt.set_flags(req->getFlags());
163  inst_fetch_pkt.set_addr(req->getPaddr());
164  inst_fetch_pkt.set_size(req->getSize());
165  // Write the message to the stream.
166  instTraceStream->write(inst_fetch_pkt);
167 }
168 
169 void
171 {
172 
173  // In a corner case, a retired instruction is propagated backward to the
174  // IEW instruction queue to handle some side-channel information. But we
175  // must not process an instruction again. So we test the sequence number
176  // against the lastClearedSeqNum and skip adding the instruction for such
177  // corner cases.
178  if (dyn_inst->seqNum <= lastClearedSeqNum) {
179  DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
180  has already retired (mostly squashed)", dyn_inst->seqNum);
181  // Do nothing as program has proceeded and this inst has been
182  // propagated backwards to handle something.
183  return;
184  }
185 
186  DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
187  curTick());
188  // Either the execution info object will already exist if this
189  // instruction had a register dependency recorded in the rename probe
190  // listener before entering execute stage or it will not exist and will
191  // need to be created here.
192  InstExecInfo* exec_info_ptr;
193  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
194  if (itr_exec_info != tempStore.end()) {
195  exec_info_ptr = itr_exec_info->second;
196  } else {
197  exec_info_ptr = new InstExecInfo;
198  tempStore[dyn_inst->seqNum] = exec_info_ptr;
199  }
200 
201  exec_info_ptr->executeTick = curTick();
202  maxTempStoreSize = std::max(tempStore.size(),
203  (std::size_t)maxTempStoreSize.value());
204 }
205 
206 void
208 {
209  // If tracing has just been enabled then the instruction at this stage of
210  // execution is far enough that we cannot gather info about its past like
211  // the tick it started execution. Simply return until we see an instruction
212  // that is found in the tempStore.
213  auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
214  if (itr_exec_info == tempStore.end()) {
215  DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
216  " skipping.\n", dyn_inst->seqNum);
217  return;
218  }
219 
220  DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
221  curTick());
222  InstExecInfo* exec_info_ptr = itr_exec_info->second;
223  exec_info_ptr->toCommitTick = curTick();
224 
225 }
226 
227 void
229 {
230  // Get the sequence number of the instruction
231  InstSeqNum seq_num = dyn_inst->seqNum;
232 
233  assert(dyn_inst->seqNum > lastClearedSeqNum);
234 
235  // Since this is the first probe activated in the pipeline, create
236  // a new execution info object to track this instruction as it
237  // progresses through the pipeline.
238  InstExecInfo* exec_info_ptr = new InstExecInfo;
239  tempStore[seq_num] = exec_info_ptr;
240 
241  // Loop through the source registers and look up the dependency map. If
242  // the source register entry is found in the dependency map, add a
243  // dependency on the last writer.
244  int8_t max_regs = dyn_inst->numSrcRegs();
245  for (int src_idx = 0; src_idx < max_regs; src_idx++) {
246 
247  const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
248  if (!src_reg.isMiscReg() &&
249  !src_reg.isZeroReg()) {
250  // Get the physical register index of the i'th source register.
251  PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
252  DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
253  " %i (%s)\n", seq_num,
254  phys_src_reg->flatIndex(), phys_src_reg->className());
255  auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
256  if (itr_writer != physRegDepMap.end()) {
257  InstSeqNum last_writer = itr_writer->second;
258  // Additionally the dependency distance is kept less than the
259  // window size parameter to limit the memory allocation to
260  // nodes in the graph. If the window were tending to infinite
261  // we would have to load a large number of node objects during
262  // replay.
263  if (seq_num - last_writer < depWindowSize) {
264  // Record a physical register dependency.
265  exec_info_ptr->physRegDepSet.insert(last_writer);
266  }
267  }
268 
269  }
270 
271  }
272 
273  // Loop through the destination registers of this instruction and update
274  // the physical register dependency map for last writers to registers.
275  max_regs = dyn_inst->numDestRegs();
276  for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
277  // For data dependency tracking the register must be an int, float or
278  // CC register and not a Misc register.
279  const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
280  if (!dest_reg.isMiscReg() &&
281  !dest_reg.isZeroReg()) {
282  // Get the physical register index of the i'th destination
283  // register.
284  PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
285  DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
286  " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
287  dest_reg.className());
288  physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
289  }
290  }
291  maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
292  (std::size_t)maxPhysRegDepMapSize.value());
293 }
294 
295 void
297 {
298  DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
299  inst_reg_pair.second);
300  auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
301  if (itr_regdep_map != physRegDepMap.end())
302  physRegDepMap.erase(itr_regdep_map);
303 }
304 
305 void
307 {
308  // If the squashed instruction was squashed before being processed by
309  // execute stage then it will not be in the temporary store. In this case
310  // do nothing and return.
311  auto itr_exec_info = tempStore.find(head_inst->seqNum);
312  if (itr_exec_info == tempStore.end())
313  return;
314 
315  // If there is a squashed load for which a read request was
316  // sent before it got squashed then add it to the trace.
317  DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
318  head_inst->seqNum);
319  // Get pointer to the execution info object corresponding to the inst.
320  InstExecInfo* exec_info_ptr = itr_exec_info->second;
321  if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
322  exec_info_ptr->toCommitTick != MaxTick &&
323  head_inst->hasRequest() &&
324  head_inst->getFault() == NoFault) {
325  // Add record to depTrace with commit parameter as false.
326  addDepTraceRecord(head_inst, exec_info_ptr, false);
327  }
328  // As the information contained is no longer needed, remove the execution
329  // info object from the temporary store.
330  clearTempStoreUntil(head_inst);
331 }
332 
333 void
335 {
336  DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
337  head_inst->seqNum);
338 
339  // Add the instruction to the depTrace.
340  if (!head_inst->isNop()) {
341 
342  // If tracing has just been enabled then the instruction at this stage
343  // of execution is far enough that we cannot gather info about its past
344  // like the tick it started execution. Simply return until we see an
345  // instruction that is found in the tempStore.
346  auto itr_temp_store = tempStore.find(head_inst->seqNum);
347  if (itr_temp_store == tempStore.end()) {
348  DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
349  "store, skipping.\n", head_inst->seqNum);
350  return;
351  }
352 
353  // Get pointer to the execution info object corresponding to the inst.
354  InstExecInfo* exec_info_ptr = itr_temp_store->second;
355  assert(exec_info_ptr->executeTick != MaxTick);
356  assert(exec_info_ptr->toCommitTick != MaxTick);
357 
358  // Check if the instruction had a fault, if it predicated false and
359  // thus previous register values were restored or if it was a
360  // load/store that did not have a request (e.g. when the size of the
361  // request is zero). In all these cases the instruction is set as
362  // executed and is picked up by the commit probe listener. But a
363  // request is not issued and registers are not written. So practically,
364  // skipping these should not hurt as execution would not stall on them.
365  // Alternatively, these could be included merely as a compute node in
366  // the graph. Removing these for now. If correlation accuracy needs to
367  // be improved in future these can be turned into comp nodes at the
368  // cost of bigger traces.
369  if (head_inst->getFault() != NoFault) {
370  DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
371  "skip adding it to the trace\n",
372  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
373  head_inst->seqNum);
374  } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
375  DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
376  "skip adding it to the trace\n", head_inst->seqNum);
377  } else if (!head_inst->readPredicate()) {
378  DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
379  "skip adding it to the trace\n",
380  (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
381  head_inst->seqNum);
382  } else {
383  // Add record to depTrace with commit parameter as true.
384  addDepTraceRecord(head_inst, exec_info_ptr, true);
385  }
386  }
387  // As the information contained is no longer needed, remove the execution
388  // info object from the temporary store.
389  clearTempStoreUntil(head_inst);
390 }
391 
392 void
394  InstExecInfo* exec_info_ptr, bool commit)
395 {
396  // Create a record to assign dynamic intruction related fields.
397  TraceInfo* new_record = new TraceInfo;
398  // Add to map for sequence number look up to retrieve the TraceInfo pointer
399  traceInfoMap[head_inst->seqNum] = new_record;
400 
401  // Assign fields from the instruction
402  new_record->instNum = head_inst->seqNum;
403  new_record->commit = commit;
404  new_record->type = head_inst->isLoad() ? Record::LOAD :
405  (head_inst->isStore() ? Record::STORE :
406  Record::COMP);
407 
408  // Assign fields for creating a request in case of a load/store
409  new_record->reqFlags = head_inst->memReqFlags;
410  new_record->virtAddr = head_inst->effAddr;
411  new_record->asid = head_inst->asid;
412  new_record->physAddr = head_inst->physEffAddr;
413  // Currently the tracing does not support split requests.
414  new_record->size = head_inst->effSize;
415  new_record->pc = head_inst->instAddr();
416 
417  // Assign the timing information stored in the execution info object
418  new_record->executeTick = exec_info_ptr->executeTick;
419  new_record->toCommitTick = exec_info_ptr->toCommitTick;
420  new_record->commitTick = curTick();
421 
422  // Assign initial values for number of dependents and computational delay
423  new_record->numDepts = 0;
424  new_record->compDelay = -1;
425 
426  // The physical register dependency set of the first instruction is
427  // empty. Since there are no records in the depTrace at this point, the
428  // case of adding an ROB dependency by using a reverse iterator is not
429  // applicable. Thus, populate the fields of the record corresponding to the
430  // first instruction and return.
431  if (depTrace.empty()) {
432  // Store the record in depTrace.
433  depTrace.push_back(new_record);
434  DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
435  new_record->instNum);
436  return;
437  }
438 
439  // Clear register dependencies for squashed loads as they may be dependent
440  // on squashed instructions and we do not add those to the trace.
441  if (head_inst->isLoad() && !commit) {
442  (exec_info_ptr->physRegDepSet).clear();
443  }
444 
445  // Assign the register dependencies stored in the execution info object
446  std::set<InstSeqNum>::const_iterator dep_set_it;
447  for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
448  dep_set_it != (exec_info_ptr->physRegDepSet).end();
449  ++dep_set_it) {
450  auto trace_info_itr = traceInfoMap.find(*dep_set_it);
451  if (trace_info_itr != traceInfoMap.end()) {
452  // The register dependency is valid. Assign it and calculate
453  // computational delay
454  new_record->physRegDepList.push_back(*dep_set_it);
455  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
456  "%lli\n", new_record->instNum, *dep_set_it);
457  TraceInfo* reg_dep = trace_info_itr->second;
458  reg_dep->numDepts++;
459  compDelayPhysRegDep(reg_dep, new_record);
460  ++numRegDep;
461  } else {
462  // The instruction that this has a register dependency on was
463  // not added to the trace because of one of the following
464  // 1. it was an instruction that had a fault
465  // 2. it was an instruction that was predicated false and
466  // previous register values were restored
467  // 3. it was load/store that did not have a request (e.g. when
468  // the size of the request is zero but this may not be a fault)
469  // In all these cases the instruction is set as executed and is
470  // picked up by the commit probe listener. But a request is not
471  // issued and registers are not written to in these cases.
472  DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
473  "%lli is skipped\n",new_record->instNum, *dep_set_it);
474  }
475  }
476 
477  // Check for and assign an ROB dependency in addition to register
478  // dependency before adding the record to the trace.
479  // As stores have to commit in order a store is dependent on the last
480  // committed load/store. This is recorded in the ROB dependency.
481  if (head_inst->isStore()) {
482  // Look up store-after-store order dependency
483  updateCommitOrderDep(new_record, false);
484  // Look up store-after-load order dependency
485  updateCommitOrderDep(new_record, true);
486  }
487 
488  // In case a node is dependency-free or its dependency got discarded
489  // because it was outside the window, it is marked ready in the ROB at the
490  // time of issue. A request is sent as soon as possible. To model this, a
491  // node is assigned an issue order dependency on a committed instruction
492  // that completed earlier than it. This is done to avoid the problem of
493  // determining the issue times of such dependency-free nodes during replay
494  // which could lead to too much parallelism, thinking conservatively.
495  if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
496  updateIssueOrderDep(new_record);
497  }
498 
499  // Store the record in depTrace.
500  depTrace.push_back(new_record);
501  DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
502  (commit ? "committed" : "squashed"), new_record->instNum);
503 
504  // To process the number of records specified by depWindowSize in the
505  // forward direction, the depTrace must have twice as many records
506  // to check for dependencies.
507  if (depTrace.size() == 2 * depWindowSize) {
508 
509  DPRINTF(ElasticTrace, "Writing out trace...\n");
510 
511  // Write out the records which have been processed to the trace
512  // and remove them from the depTrace.
514 
515  // After the first window, writeDepTrace() must check for valid
516  // compDelay.
517  firstWin = false;
518  }
519 }
520 
521 void
523  bool find_load_not_store)
524 {
525  assert(new_record->isStore());
526  // Iterate in reverse direction to search for the last committed
527  // load/store that completed earlier than the new record
528  depTraceRevItr from_itr(depTrace.end());
529  depTraceRevItr until_itr(depTrace.begin());
530  TraceInfo* past_record = *from_itr;
531  uint32_t num_go_back = 0;
532 
533  // The execution time of this store is when it is sent, that is committed
534  Tick execute_tick = curTick();
535  // Search for store-after-load or store-after-store order dependency
536  while (num_go_back < depWindowSize && from_itr != until_itr) {
537  if (find_load_not_store) {
538  // Check if previous inst is a load completed earlier by comparing
539  // with execute tick
540  if (hasLoadCompleted(past_record, execute_tick)) {
541  // Assign rob dependency and calculate the computational delay
542  assignRobDep(past_record, new_record);
544  return;
545  }
546  } else {
547  // Check if previous inst is a store sent earlier by comparing with
548  // execute tick
549  if (hasStoreCommitted(past_record, execute_tick)) {
550  // Assign rob dependency and calculate the computational delay
551  assignRobDep(past_record, new_record);
553  return;
554  }
555  }
556  ++from_itr;
557  past_record = *from_itr;
558  ++num_go_back;
559  }
560 }
561 
562 void
564 {
565  // Interate in reverse direction to search for the last committed
566  // record that completed earlier than the new record
567  depTraceRevItr from_itr(depTrace.end());
568  depTraceRevItr until_itr(depTrace.begin());
569  TraceInfo* past_record = *from_itr;
570 
571  uint32_t num_go_back = 0;
572  Tick execute_tick = 0;
573 
574  if (new_record->isLoad()) {
575  // The execution time of a load is when a request is sent
576  execute_tick = new_record->executeTick;
578  } else if (new_record->isStore()) {
579  // The execution time of a store is when it is sent, i.e. committed
580  execute_tick = curTick();
582  } else {
583  // The execution time of a non load/store is when it completes
584  execute_tick = new_record->toCommitTick;
586  }
587 
588  // We search if this record has an issue order dependency on a past record.
589  // Once we find it, we update both the new record and the record it depends
590  // on and return.
591  while (num_go_back < depWindowSize && from_itr != until_itr) {
592  // Check if a previous inst is a load sent earlier, or a store sent
593  // earlier, or a comp inst completed earlier by comparing with execute
594  // tick
595  if (hasLoadBeenSent(past_record, execute_tick) ||
596  hasStoreCommitted(past_record, execute_tick) ||
597  hasCompCompleted(past_record, execute_tick)) {
598  // Assign rob dependency and calculate the computational delay
599  assignRobDep(past_record, new_record);
600  return;
601  }
602  ++from_itr;
603  past_record = *from_itr;
604  ++num_go_back;
605  }
606 }
607 
608 void
609 ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
610  DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
611  new_record->typeToStr(), new_record->instNum,
612  past_record->instNum);
613  // Add dependency on past record
614  new_record->robDepList.push_back(past_record->instNum);
615  // Update new_record's compute delay with respect to the past record
616  compDelayRob(past_record, new_record);
617  // Increment number of dependents of the past record
618  ++(past_record->numDepts);
619  // Update stat to log max number of dependents
620  maxNumDependents = std::max(past_record->numDepts,
621  (uint32_t)maxNumDependents.value());
622 }
623 
624 bool
626  Tick execute_tick) const
627 {
628  return (past_record->isStore() && past_record->commitTick <= execute_tick);
629 }
630 
631 bool
633  Tick execute_tick) const
634 {
635  return(past_record->isLoad() && past_record->commit &&
636  past_record->toCommitTick <= execute_tick);
637 }
638 
639 bool
641  Tick execute_tick) const
642 {
643  // Check if previous inst is a load sent earlier than this
644  return (past_record->isLoad() && past_record->commit &&
645  past_record->executeTick <= execute_tick);
646 }
647 
648 bool
650  Tick execute_tick) const
651 {
652  return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
653 }
654 
655 void
657 {
658  // Clear from temp store starting with the execution info object
659  // corresponding the head_inst and continue clearing by decrementing the
660  // sequence number until the last cleared sequence number.
661  InstSeqNum temp_sn = (head_inst->seqNum);
662  while (temp_sn > lastClearedSeqNum) {
663  auto itr_exec_info = tempStore.find(temp_sn);
664  if (itr_exec_info != tempStore.end()) {
665  InstExecInfo* exec_info_ptr = itr_exec_info->second;
666  // Free allocated memory for the info object
667  delete exec_info_ptr;
668  // Remove entry from temporary store
669  tempStore.erase(itr_exec_info);
670  }
671  temp_sn--;
672  }
673  // Update the last cleared sequence number to that of the head_inst
674  lastClearedSeqNum = head_inst->seqNum;
675 }
676 
677 void
679 {
680  // The computation delay is the delay between the completion tick of the
681  // inst. pointed to by past_record and the execution tick of its dependent
682  // inst. pointed to by new_record.
683  int64_t comp_delay = -1;
684  Tick execution_tick = 0, completion_tick = 0;
685 
686  DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
687  new_record->instNum, past_record->instNum);
688 
689  // Get the tick when the node is executed as per the modelling of
690  // computation delay
691  execution_tick = new_record->getExecuteTick();
692 
693  if (past_record->isLoad()) {
694  if (new_record->isStore()) {
695  completion_tick = past_record->toCommitTick;
696  } else {
697  completion_tick = past_record->executeTick;
698  }
699  } else if (past_record->isStore()) {
700  completion_tick = past_record->commitTick;
701  } else if (past_record->isComp()){
702  completion_tick = past_record->toCommitTick;
703  }
704  assert(execution_tick >= completion_tick);
705  comp_delay = execution_tick - completion_tick;
706 
707  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
708  execution_tick, completion_tick, comp_delay);
709 
710  // Assign the computational delay with respect to the dependency which
711  // completes the latest.
712  if (new_record->compDelay == -1)
713  new_record->compDelay = comp_delay;
714  else
715  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
716  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
717  new_record->compDelay);
718 }
719 
720 void
722  TraceInfo* new_record)
723 {
724  // The computation delay is the delay between the completion tick of the
725  // inst. pointed to by past_record and the execution tick of its dependent
726  // inst. pointed to by new_record.
727  int64_t comp_delay = -1;
728  Tick execution_tick = 0, completion_tick = 0;
729 
730  DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
731  " %lli.\n", new_record->instNum, past_record->instNum);
732 
733  // Get the tick when the node is executed as per the modelling of
734  // computation delay
735  execution_tick = new_record->getExecuteTick();
736 
737  // When there is a physical register dependency on an instruction, the
738  // completion tick of that instruction is when it wrote to the register,
739  // that is toCommitTick. In case, of a store updating a destination
740  // register, this is approximated to commitTick instead
741  if (past_record->isStore()) {
742  completion_tick = past_record->commitTick;
743  } else {
744  completion_tick = past_record->toCommitTick;
745  }
746  assert(execution_tick >= completion_tick);
747  comp_delay = execution_tick - completion_tick;
748  DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
749  execution_tick, completion_tick, comp_delay);
750 
751  // Assign the computational delay with respect to the dependency which
752  // completes the latest.
753  if (new_record->compDelay == -1)
754  new_record->compDelay = comp_delay;
755  else
756  new_record->compDelay = std::min(comp_delay, new_record->compDelay);
757  DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
758  new_record->compDelay);
759 }
760 
761 Tick
763 {
764  if (isLoad()) {
765  // Execution tick for a load instruction is when the request was sent,
766  // that is executeTick.
767  return executeTick;
768  } else if (isStore()) {
769  // Execution tick for a store instruction is when the request was sent,
770  // that is commitTick.
771  return commitTick;
772  } else {
773  // Execution tick for a non load/store instruction is when the register
774  // value was written to, that is commitTick.
775  return toCommitTick;
776  }
777 }
778 
779 void
780 ElasticTrace::writeDepTrace(uint32_t num_to_write)
781 {
782  // Write the trace with fields as follows:
783  // Instruction sequence number
784  // If instruction was a load
785  // If instruction was a store
786  // If instruction has addr
787  // If instruction has size
788  // If instruction has flags
789  // List of order dependencies - optional, repeated
790  // Computational delay with respect to last completed dependency
791  // List of physical register RAW dependencies - optional, repeated
792  // Weight of a node equal to no. of filtered nodes before it - optional
793  uint16_t num_filtered_nodes = 0;
794  depTraceItr dep_trace_itr(depTrace.begin());
795  depTraceItr dep_trace_itr_start = dep_trace_itr;
796  while (num_to_write > 0) {
797  TraceInfo* temp_ptr = *dep_trace_itr;
798  assert(temp_ptr->type != Record::INVALID);
799  // If no node dependends on a comp node then there is no reason to
800  // track the comp node in the dependency graph. We filter out such
801  // nodes but count them and add a weight field to the subsequent node
802  // that we do include in the trace.
803  if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
804  DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
805  "is as follows:\n", temp_ptr->instNum);
806  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
807  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
808  DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
809  "size %i, flags %i\n", temp_ptr->physAddr,
810  temp_ptr->size, temp_ptr->reqFlags);
811  } else {
812  DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
813  }
814  if (firstWin && temp_ptr->compDelay == -1) {
815  if (temp_ptr->isLoad()) {
816  temp_ptr->compDelay = temp_ptr->executeTick;
817  } else if (temp_ptr->isStore()) {
818  temp_ptr->compDelay = temp_ptr->commitTick;
819  } else {
820  temp_ptr->compDelay = temp_ptr->toCommitTick;
821  }
822  }
823  assert(temp_ptr->compDelay != -1);
824  DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
825  temp_ptr->compDelay);
826 
827  // Create a protobuf message for the dependency record
828  ProtoMessage::InstDepRecord dep_pkt;
829  dep_pkt.set_seq_num(temp_ptr->instNum);
830  dep_pkt.set_type(temp_ptr->type);
831  dep_pkt.set_pc(temp_ptr->pc);
832  if (temp_ptr->isLoad() || temp_ptr->isStore()) {
833  dep_pkt.set_flags(temp_ptr->reqFlags);
834  dep_pkt.set_p_addr(temp_ptr->physAddr);
835  // If tracing of virtual addresses is enabled, set the optional
836  // field for it
837  if (traceVirtAddr) {
838  dep_pkt.set_v_addr(temp_ptr->virtAddr);
839  dep_pkt.set_asid(temp_ptr->asid);
840  }
841  dep_pkt.set_size(temp_ptr->size);
842  }
843  dep_pkt.set_comp_delay(temp_ptr->compDelay);
844  if (temp_ptr->robDepList.empty()) {
845  DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
846  }
847  while (!temp_ptr->robDepList.empty()) {
848  DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
849  temp_ptr->robDepList.front());
850  dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
851  temp_ptr->robDepList.pop_front();
852  }
853  if (temp_ptr->physRegDepList.empty()) {
854  DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
855  }
856  while (!temp_ptr->physRegDepList.empty()) {
857  DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
858  temp_ptr->physRegDepList.front());
859  dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
860  temp_ptr->physRegDepList.pop_front();
861  }
862  if (num_filtered_nodes != 0) {
863  // Set the weight of this node as the no. of filtered nodes
864  // between this node and the last node that we wrote to output
865  // stream. The weight will be used during replay to model ROB
866  // occupancy of filtered nodes.
867  dep_pkt.set_weight(num_filtered_nodes);
868  num_filtered_nodes = 0;
869  }
870  // Write the message to the protobuf output stream
871  dataTraceStream->write(dep_pkt);
872  } else {
873  // Don't write the node to the trace but note that we have filtered
874  // out a node.
876  ++num_filtered_nodes;
877  }
878  dep_trace_itr++;
879  traceInfoMap.erase(temp_ptr->instNum);
880  delete temp_ptr;
881  num_to_write--;
882  }
883  depTrace.erase(dep_trace_itr_start, dep_trace_itr);
884 }
885 
886 void
889 
890  using namespace Stats;
891  numRegDep
892  .name(name() + ".numRegDep")
893  .desc("Number of register dependencies recorded during tracing")
894  ;
895 
897  .name(name() + ".numOrderDepStores")
898  .desc("Number of commit order (rob) dependencies for a store recorded"
899  " on a past load/store during tracing")
900  ;
901 
903  .name(name() + ".numIssueOrderDepLoads")
904  .desc("Number of loads that got assigned issue order dependency"
905  " because they were dependency-free")
906  ;
907 
909  .name(name() + ".numIssueOrderDepStores")
910  .desc("Number of stores that got assigned issue order dependency"
911  " because they were dependency-free")
912  ;
913 
915  .name(name() + ".numIssueOrderDepOther")
916  .desc("Number of non load/store insts that got assigned issue order"
917  " dependency because they were dependency-free")
918  ;
919 
921  .name(name() + ".numFilteredNodes")
922  .desc("No. of nodes filtered out before writing the output trace")
923  ;
924 
926  .name(name() + ".maxNumDependents")
927  .desc("Maximum number or dependents on any instruction")
928  ;
929 
931  .name(name() + ".maxTempStoreSize")
932  .desc("Maximum size of the temporary store during the run")
933  ;
934 
936  .name(name() + ".maxPhysRegDepMapSize")
937  .desc("Maximum size of register dependency map")
938  ;
939 }
940 
941 const std::string&
943 {
944  return Record::RecordType_Name(type);
945 }
946 
947 const std::string
949 {
950  return ProbeListenerObject::name();
951 }
952 
953 void
955 {
956  // Write to trace all records in the depTrace.
957  writeDepTrace(depTrace.size());
958  // Delete the stream objects
959  delete dataTraceStream;
960  delete instTraceStream;
961 }
962 
964 ElasticTraceParams::create()
965 {
966  return new ElasticTrace(this);
967 }
int64_t compDelay
Computational delay after the last dependent inst.
Stats::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
#define DPRINTF(x,...)
Definition: trace.hh:229
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
std::string resolve(const std::string &name) const
Returns relative file names prepended with name of this directory.
Definition: output.cc:200
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call...
ElasticTrace(const ElasticTraceParams *params)
Constructor.
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
OutputDirectory simout
Definition: output.cc:65
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition: protoio.hh:92
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
FullO3CPU< O3CPUImpl > * cpu
Pointer to the O3CPU that is this listener&#39;s parent a.k.a.
void regEtraceListeners()
Register all listeners.
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
void write(const google::protobuf::Message &msg)
Write a message to the stream, preprending it with the message size.
Definition: protoio.cc:84
Generic callback class.
Definition: callback.hh:41
bool isMiscReg() const
true if it is a condition-code physical register.
Definition: reg_class.hh:167
The elastic trace is a type of probe listener and listens to probe points in multiple stages of the O...
bool isStore() const
Is the record a store.
STL pair class.
Definition: stl.hh:61
ThreadID numThreads
Number of threads we&#39;re actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:378
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record&#39;s Rob dependency ...
bool isComp() const
Is the record a fetch triggering an Icache request.
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
bool isLoad() const
Is the record a load.
const std::string & typeToStr() const
Return string specifying the type of the node.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
virtual void regStats()
Callback to set stat parameters.
Definition: group.cc:66
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:49
If you want a reference counting pointer to a mutable object, create it like this: ...
Definition: refcnt.hh:120
Tick getExecuteTick() const
Get the execute tick of the instruction.
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
Stats::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free...
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
uint8_t type
Definition: inet.hh:333
#define inform(...)
Definition: logging.hh:213
Stats::Scalar numOrderDepStores
Number of stores that got assigned a commit order dependency on a past load/store.
const Tick MaxTick
Definition: types.hh:65
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
Stats::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free...
Stats::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
uint64_t Tick
Tick count type.
Definition: types.hh:63
const Params * params() const
Definition: sim_object.hh:114
virtual void scheduleInstCountEvent(Event *event, Tick count)=0
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
This class is a minimal wrapper around SimObject.
Definition: probe.hh:100
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object...
void registerExitCallback(Callback *callback)
Register an exit callback.
Definition: core.cc:143
uint64_t InstSeqNum
Definition: inst_seq.hh:40
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
std::list< InstSeqNum > physRegDepList
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
void regStats()
Register statistics for the elastic trace.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
Request::FlagsType reqFlags
virtual const std::string name() const
Definition: sim_object.hh:120
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick. ...
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
std::vector< ProbeListener * > listeners
Definition: probe.hh:104
Physical register ID.
Definition: reg_class.hh:229
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request...
const PhysRegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:311
Stats::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
Declaration of the Packet class.
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
std::list< InstSeqNum > robDepList
Stats::Scalar maxNumDependents
Maximum number of dependents on any instruction.
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:298
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
Tick executeTick
Timestamp when instruction was first processed by execute stage.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
Register ID: describe an architectural register with its class and index.
Definition: reg_class.hh:79
std::unordered_map< PhysRegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
Helper template class to turn a simple class member function into a callback.
Definition: callback.hh:64
Stats::Scalar numRegDep
Number of register dependencies recorded during tracing.
RecordType type
The type of trace record for the instruction node.
ProbeListenerArg generates a listener for the class of Arg and the class type T which is the class co...
Definition: probe.hh:217
const std::string name() const
Returns the name of the trace probe listener.
bool isZeroReg() const
Check if this is the zero register.
Definition: reg_class.hh:141
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:703
Stats::Scalar numFilteredNodes
Number of filtered nodes.
const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:208
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:104
#define DPRINTFR(...)
Definition: trace.hh:231
static int numSimulatedInsts()
Definition: cpu_dummy.hh:48

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13