gem5  v20.0.0.3
trace_cpu.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2016 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include "cpu/trace/trace_cpu.hh"
39 
40 #include "sim/sim_exit.hh"
41 
42 // Declare and initialize the static counter for number of trace CPUs.
44 
45 TraceCPU::TraceCPU(TraceCPUParams *params)
46  : BaseCPU(params),
47  icachePort(this),
48  dcachePort(this),
49  instMasterID(params->system->getMasterId(this, "inst")),
50  dataMasterID(params->system->getMasterId(this, "data")),
51  instTraceFile(params->instTraceFile),
52  dataTraceFile(params->dataTraceFile),
53  icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
54  dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
55  params),
56  icacheNextEvent([this]{ schedIcacheNext(); }, name()),
57  dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
58  oneTraceComplete(false),
59  traceOffset(0),
60  execCompleteEvent(nullptr),
61  enableEarlyExit(params->enableEarlyExit),
62  progressMsgInterval(params->progressMsgInterval),
63  progressMsgThreshold(params->progressMsgInterval)
64 {
65  // Increment static counter for number of Trace CPUs.
67 
68  // Check that the python parameters for sizes of ROB, store buffer and
69  // load buffer do not overflow the corresponding C++ variables.
70  fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
71  "max. value of %d.\n", params->sizeROB, UINT16_MAX);
72  fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
73  "exceeds the max. value of %d.\n", params->sizeROB,
74  UINT16_MAX);
75  fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
76  " %d exceeds the max. value of %d.\n",
77  params->sizeLoadBuffer, UINT16_MAX);
78 }
79 
81 {
82 
83 }
84 
85 TraceCPU*
86 TraceCPUParams::create()
87 {
88  return new TraceCPU(this);
89 }
90 
91 void
92 TraceCPU::updateNumOps(uint64_t rob_num)
93 {
94  numOps = rob_num;
96  inform("%s: %i insts committed\n", name(), progressMsgThreshold);
98  }
99 }
100 
101 void
103 {
104  // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
105  getInstPort().takeOverFrom(&oldCPU->getInstPort());
106  getDataPort().takeOverFrom(&oldCPU->getDataPort());
107 }
108 
109 void
111 {
112  DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
113  "\n", instTraceFile);
114  DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
115  dataTraceFile);
116 
117  BaseCPU::init();
118 
119  // Get the send tick of the first instruction read request
120  Tick first_icache_tick = icacheGen.init();
121 
122  // Get the send tick of the first data read/write request
123  Tick first_dcache_tick = dcacheGen.init();
124 
125  // Set the trace offset as the minimum of that in both traces
126  traceOffset = std::min(first_icache_tick, first_dcache_tick);
127  inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
128  name(), traceOffset);
129 
130  // Schedule next icache and dcache event by subtracting the offset
131  schedule(icacheNextEvent, first_icache_tick - traceOffset);
132  schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
133 
134  // Adjust the trace offset for the dcache generator's ready nodes
135  // We don't need to do this for the icache generator as it will
136  // send its first request at the first event and schedule subsequent
137  // events using a relative tick delta
139 
140  // If the Trace CPU simulation is configured to exit on any one trace
141  // completion then we don't need a counted event to count down all Trace
142  // CPUs in the system. If not then instantiate a counted event.
143  if (!enableEarlyExit) {
144  // The static counter for number of Trace CPUs is correctly set at
145  // this point so create an event and pass it.
146  execCompleteEvent = new CountedExitEvent("end of all traces reached.",
147  numTraceCPUs);
148  }
149 
150 }
151 
152 void
154 {
155  DPRINTF(TraceCPUInst, "IcacheGen event.\n");
156 
157  // Try to send the current packet or a retry packet if there is one
158  bool sched_next = icacheGen.tryNext();
159  // If packet sent successfully, schedule next event
160  if (sched_next) {
161  DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
162  "at %d.\n", curTick() + icacheGen.tickDelta());
165  } else {
166  // check if traceComplete. If not, do nothing because sending failed
167  // and next event will be scheduled via RecvRetry()
168  if (icacheGen.isTraceComplete()) {
169  // If this is the first trace to complete, set the variable. If it
170  // is already set then both traces are complete to exit sim.
172  }
173  }
174  return;
175 }
176 
177 void
179 {
180  DPRINTF(TraceCPUData, "DcacheGen event.\n");
181 
182  // Update stat for numCycles
184 
185  dcacheGen.execute();
186  if (dcacheGen.isExecComplete()) {
188  }
189 }
190 
191 void
193 {
194  if (!oneTraceComplete) {
195  oneTraceComplete = true;
196  } else {
197  // Schedule event to indicate execution is complete as both
198  // instruction and data access traces have been played back.
199  inform("%s: Execution complete.\n", name());
200  // If the replay is configured to exit early, that is when any one
201  // execution is complete then exit immediately and return. Otherwise,
202  // schedule the counted exit that counts down completion of each Trace
203  // CPU.
204  if (enableEarlyExit) {
205  exitSimLoop("End of trace reached");
206  } else {
208  }
209  }
210 }
211 
212 void
214 {
215 
217 
219  .name(name() + ".numSchedDcacheEvent")
220  .desc("Number of events scheduled to trigger data request generator")
221  ;
222 
224  .name(name() + ".numSchedIcacheEvent")
225  .desc("Number of events scheduled to trigger instruction request generator")
226  ;
227 
228  numOps
229  .name(name() + ".numOps")
230  .desc("Number of micro-ops simulated by the Trace CPU")
231  ;
232 
233  cpi
234  .name(name() + ".cpi")
235  .desc("Cycles per micro-op used as a proxy for CPI")
236  .precision(6)
237  ;
238  cpi = numCycles/numOps;
239 
242 }
243 
244 void
246 {
247  using namespace Stats;
248 
249  maxDependents
250  .name(name() + ".maxDependents")
251  .desc("Max number of dependents observed on a node")
252  ;
253 
254  maxReadyListSize
255  .name(name() + ".maxReadyListSize")
256  .desc("Max size of the ready list observed")
257  ;
258 
259  numSendAttempted
260  .name(name() + ".numSendAttempted")
261  .desc("Number of first attempts to send a request")
262  ;
263 
264  numSendSucceeded
265  .name(name() + ".numSendSucceeded")
266  .desc("Number of successful first attempts")
267  ;
268 
269  numSendFailed
270  .name(name() + ".numSendFailed")
271  .desc("Number of failed first attempts")
272  ;
273 
274  numRetrySucceeded
275  .name(name() + ".numRetrySucceeded")
276  .desc("Number of successful retries")
277  ;
278 
279  numSplitReqs
280  .name(name() + ".numSplitReqs")
281  .desc("Number of split requests")
282  ;
283 
284  numSOLoads
285  .name(name() + ".numSOLoads")
286  .desc("Number of strictly ordered loads")
287  ;
288 
289  numSOStores
290  .name(name() + ".numSOStores")
291  .desc("Number of strictly ordered stores")
292  ;
293 
294  dataLastTick
295  .name(name() + ".dataLastTick")
296  .desc("Last tick simulated from the elastic data trace")
297  ;
298 }
299 
300 Tick
302 {
303  DPRINTF(TraceCPUData, "Initializing data memory request generator "
304  "DcacheGen: elastic issue with retry.\n");
305 
306  if (!readNextWindow())
307  panic("Trace has %d elements. It must have at least %d elements.\n",
308  depGraph.size(), 2 * windowSize);
309  DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
310  depGraph.size());
311 
312  if (!readNextWindow())
313  panic("Trace has %d elements. It must have at least %d elements.\n",
314  depGraph.size(), 2 * windowSize);
315  DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
316  depGraph.size());
317 
318  // Print readyList
319  if (DTRACE(TraceCPUData)) {
320  printReadyList();
321  }
322  auto free_itr = readyList.begin();
323  DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
324  " is %d.\n", free_itr->seqNum, free_itr->execTick);
325  // Return the execute tick of the earliest ready node so that an event
326  // can be scheduled to call execute()
327  return (free_itr->execTick);
328 }
329 
330 void
332  for (auto& free_node : readyList) {
333  free_node.execTick -= offset;
334  }
335 }
336 
337 void
339 {
340  trace.reset();
341 }
342 
343 bool
345 {
346 
347  // Read and add next window
348  DPRINTF(TraceCPUData, "Reading next window from file.\n");
349 
350  if (traceComplete) {
351  // We are at the end of the file, thus we have no more records.
352  // Return false.
353  return false;
354  }
355 
356  DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
357  depGraph.size());
358 
359  uint32_t num_read = 0;
360  while (num_read != windowSize) {
361 
362  // Create a new graph node
363  GraphNode* new_node = new GraphNode;
364 
365  // Read the next line to get the next record. If that fails then end of
366  // trace has been reached and traceComplete needs to be set in addition
367  // to returning false.
368  if (!trace.read(new_node)) {
369  DPRINTF(TraceCPUData, "\tTrace complete!\n");
370  traceComplete = true;
371  return false;
372  }
373 
374  // Annotate the ROB dependencies of the new node onto the parent nodes.
375  addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
376  // Annotate the register dependencies of the new node onto the parent
377  // nodes.
378  addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
379 
380  num_read++;
381  // Add to map
382  depGraph[new_node->seqNum] = new_node;
383  if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
384  // Source dependencies are already complete, check if resources
385  // are available and issue. The execution time is approximated
386  // to current time plus the computational delay.
387  checkAndIssue(new_node);
388  }
389  }
390 
391  DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
392  depGraph.size());
393  return true;
394 }
395 
396 template<typename T> void
398  T& dep_array, uint8_t& num_dep)
399 {
400  for (auto& a_dep : dep_array) {
401  // The convention is to set the dependencies starting with the first
402  // index in the ROB and register dependency arrays. Thus, when we reach
403  // a dependency equal to the initialisation value of zero, we know have
404  // iterated over all dependencies and can break.
405  if (a_dep == 0)
406  break;
407  // We look up the valid dependency, i.e. the parent of this node
408  auto parent_itr = depGraph.find(a_dep);
409  if (parent_itr != depGraph.end()) {
410  // If the parent is found, it is yet to be executed. Append a
411  // pointer to the new node to the dependents list of the parent
412  // node.
413  parent_itr->second->dependents.push_back(new_node);
414  auto num_depts = parent_itr->second->dependents.size();
415  maxDependents = std::max<double>(num_depts, maxDependents.value());
416  } else {
417  // The dependency is not found in the graph. So consider
418  // the execution of the parent is complete, i.e. remove this
419  // dependency.
420  a_dep = 0;
421  num_dep--;
422  }
423  }
424 }
425 
426 void
428 {
429  DPRINTF(TraceCPUData, "Execute start occupancy:\n");
430  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
431  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
432  depFreeQueue.size());
433  hwResource.printOccupancy();
434 
435  // Read next window to make sure that dependents of all dep-free nodes
436  // are in the depGraph
437  if (nextRead) {
438  readNextWindow();
439  nextRead = false;
440  }
441 
442  // First attempt to issue the pending dependency-free nodes held
443  // in depFreeQueue. If resources have become available for a node,
444  // then issue it, i.e. add the node to readyList.
445  while (!depFreeQueue.empty()) {
446  if (checkAndIssue(depFreeQueue.front(), false)) {
447  DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
448  "%lli.\n", (depFreeQueue.front())->seqNum);
449  depFreeQueue.pop();
450  } else {
451  break;
452  }
453  }
454  // Proceed to execute from readyList
455  auto graph_itr = depGraph.begin();
456  auto free_itr = readyList.begin();
457  // Iterate through readyList until the next free node has its execute
458  // tick later than curTick or the end of readyList is reached
459  while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
460 
461  // Get pointer to the node to be executed
462  graph_itr = depGraph.find(free_itr->seqNum);
463  assert(graph_itr != depGraph.end());
464  GraphNode* node_ptr = graph_itr->second;
465 
466  // If there is a retryPkt send that else execute the load
467  if (retryPkt) {
468  // The retryPkt must be the request that was created by the
469  // first node in the readyList.
470  if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
471  panic("Retry packet's seqence number does not match "
472  "the first node in the readyList.\n");
473  }
474  if (port.sendTimingReq(retryPkt)) {
475  ++numRetrySucceeded;
476  retryPkt = nullptr;
477  }
478  } else if (node_ptr->isLoad() || node_ptr->isStore()) {
479  // If there is no retryPkt, attempt to send a memory request in
480  // case of a load or store node. If the send fails, executeMemReq()
481  // returns a packet pointer, which we save in retryPkt. In case of
482  // a comp node we don't do anything and simply continue as if the
483  // execution of the comp node succedded.
484  retryPkt = executeMemReq(node_ptr);
485  }
486  // If the retryPkt or a new load/store node failed, we exit from here
487  // as a retry from cache will bring the control to execute(). The
488  // first node in readyList then, will be the failed node.
489  if (retryPkt) {
490  break;
491  }
492 
493  // Proceed to remove dependencies for the successfully executed node.
494  // If it is a load which is not strictly ordered and we sent a
495  // request for it successfully, we do not yet mark any register
496  // dependencies complete. But as per dependency modelling we need
497  // to mark ROB dependencies of load and non load/store nodes which
498  // are based on successful sending of the load as complete.
499  if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
500  // If execute succeeded mark its dependents as complete
501  DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
502  "dependents..\n", node_ptr->seqNum);
503 
504  auto child_itr = (node_ptr->dependents).begin();
505  while (child_itr != (node_ptr->dependents).end()) {
506  // ROB dependency of a store on a load must not be removed
507  // after load is sent but after response is received
508  if (!(*child_itr)->isStore() &&
509  (*child_itr)->removeRobDep(node_ptr->seqNum)) {
510 
511  // Check if the child node has become dependency free
512  if ((*child_itr)->numRobDep == 0 &&
513  (*child_itr)->numRegDep == 0) {
514 
515  // Source dependencies are complete, check if
516  // resources are available and issue
517  checkAndIssue(*child_itr);
518  }
519  // Remove this child for the sent load and point to new
520  // location of the element following the erased element
521  child_itr = node_ptr->dependents.erase(child_itr);
522  } else {
523  // This child is not dependency-free, point to the next
524  // child
525  child_itr++;
526  }
527  }
528  } else {
529  // If it is a strictly ordered load mark its dependents as complete
530  // as we do not send a request for this case. If it is a store or a
531  // comp node we also mark all its dependents complete.
532  DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
533  " up dependents..\n", node_ptr->seqNum);
534 
535  for (auto child : node_ptr->dependents) {
536  // If the child node is dependency free removeDepOnInst()
537  // returns true.
538  if (child->removeDepOnInst(node_ptr->seqNum)) {
539  // Source dependencies are complete, check if resources
540  // are available and issue
541  checkAndIssue(child);
542  }
543  }
544  }
545 
546  // After executing the node, remove from readyList and delete node.
547  readyList.erase(free_itr);
548  // If it is a cacheable load which was sent, don't delete
549  // just yet. Delete it in completeMemAccess() after the
550  // response is received. If it is an strictly ordered
551  // load, it was not sent and all dependencies were simply
552  // marked complete. Thus it is safe to delete it. For
553  // stores and non load/store nodes all dependencies were
554  // marked complete so it is safe to delete it.
555  if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
556  // Release all resources occupied by the completed node
557  hwResource.release(node_ptr);
558  // clear the dynamically allocated set of dependents
559  (node_ptr->dependents).clear();
560  // Update the stat for numOps simulated
561  owner.updateNumOps(node_ptr->robNum);
562  // delete node
563  delete node_ptr;
564  // remove from graph
565  depGraph.erase(graph_itr);
566  }
567  // Point to first node to continue to next iteration of while loop
568  free_itr = readyList.begin();
569  } // end of while loop
570 
571  // Print readyList, sizes of queues and resource status after updating
572  if (DTRACE(TraceCPUData)) {
573  printReadyList();
574  DPRINTF(TraceCPUData, "Execute end occupancy:\n");
575  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
576  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
577  depFreeQueue.size());
578  hwResource.printOccupancy();
579  }
580 
581  if (retryPkt) {
582  DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
583  "event from the cache for seq. num %lli.\n",
584  retryPkt->req->getReqInstSeqNum());
585  return;
586  }
587  // If the size of the dependency graph is less than the dependency window
588  // then read from the trace file to populate the graph next time we are in
589  // execute.
590  if (depGraph.size() < windowSize && !traceComplete)
591  nextRead = true;
592 
593  // If cache is not blocked, schedule an event for the first execTick in
594  // readyList else retry from cache will schedule the event. If the ready
595  // list is empty then check if the next pending node has resources
596  // available to issue. If yes, then schedule an event for the next cycle.
597  if (!readyList.empty()) {
598  Tick next_event_tick = std::max(readyList.begin()->execTick,
599  curTick());
600  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
601  next_event_tick);
602  owner.schedDcacheNextEvent(next_event_tick);
603  } else if (readyList.empty() && !depFreeQueue.empty() &&
604  hwResource.isAvailable(depFreeQueue.front())) {
605  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
606  owner.clockEdge(Cycles(1)));
607  owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
608  }
609 
610  // If trace is completely read, readyList is empty and depGraph is empty,
611  // set execComplete to true
612  if (depGraph.empty() && readyList.empty() && traceComplete &&
613  !hwResource.awaitingResponse()) {
614  DPRINTF(TraceCPUData, "\tExecution Complete!\n");
615  execComplete = true;
616  dataLastTick = curTick();
617  }
618 }
619 
620 PacketPtr
622 {
623 
624  DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
625  "virt addr %d, pc %#x, size %d, flags %d).\n",
626  node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
627  node_ptr->pc, node_ptr->size, node_ptr->flags);
628 
629  // If the request is strictly ordered, do not send it. Just return nullptr
630  // as if it was succesfully sent.
631  if (node_ptr->isStrictlyOrdered()) {
632  node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
633  DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
634  node_ptr->seqNum);
635  return nullptr;
636  }
637 
638  // Check if the request spans two cache lines as this condition triggers
639  // an assert fail in the L1 cache. If it does then truncate the size to
640  // access only until the end of that line and ignore the remainder. The
641  // stat counting this is useful to keep a check on how frequently this
642  // happens. If required the code could be revised to mimick splitting such
643  // a request into two.
644  unsigned blk_size = owner.cacheLineSize();
645  Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
646  if (!(blk_offset + node_ptr->size <= blk_size)) {
647  node_ptr->size = blk_size - blk_offset;
648  ++numSplitReqs;
649  }
650 
651  // Create a request and the packet containing request
652  auto req = std::make_shared<Request>(
653  node_ptr->physAddr, node_ptr->size, node_ptr->flags, masterID);
654  req->setReqInstSeqNum(node_ptr->seqNum);
655 
656  req->setPC(node_ptr->pc);
657  // If virtual address is valid, set the virtual address field
658  // of the request.
659  if (node_ptr->virtAddr != 0) {
660  req->setVirt(node_ptr->virtAddr, node_ptr->size,
661  node_ptr->flags, masterID, node_ptr->pc);
662  req->setPaddr(node_ptr->physAddr);
663  req->setReqInstSeqNum(node_ptr->seqNum);
664  }
665 
666  PacketPtr pkt;
667  uint8_t* pkt_data = new uint8_t[req->getSize()];
668  if (node_ptr->isLoad()) {
669  pkt = Packet::createRead(req);
670  } else {
671  pkt = Packet::createWrite(req);
672  memset(pkt_data, 0xA, req->getSize());
673  }
674  pkt->dataDynamic(pkt_data);
675 
676  // Call MasterPort method to send a timing request for this packet
677  bool success = port.sendTimingReq(pkt);
678  ++numSendAttempted;
679 
680  if (!success) {
681  // If it fails, return the packet to retry when a retry is signalled by
682  // the cache
683  ++numSendFailed;
684  DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
685  return pkt;
686  } else {
687  // It is succeeds, return nullptr
688  ++numSendSucceeded;
689  return nullptr;
690  }
691 }
692 
693 bool
695 {
696  // Assert the node is dependency-free
697  assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
698 
699  // If this is the first attempt, print a debug message to indicate this.
700  if (first) {
701  DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
702  " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
703  node_ptr->robNum);
704  }
705 
706  // Check if resources are available to issue the specific node
707  if (hwResource.isAvailable(node_ptr)) {
708  // If resources are free only then add to readyList
709  DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
710  " to readyList, occupying resources.\n", node_ptr->seqNum);
711  // Compute the execute tick by adding the compute delay for the node
712  // and add the ready node to the ready list
713  addToSortedReadyList(node_ptr->seqNum,
714  owner.clockEdge() + node_ptr->compDelay);
715  // Account for the resources taken up by this issued node.
716  hwResource.occupy(node_ptr);
717  return true;
718 
719  } else {
720  if (first) {
721  // Although dependencies are complete, resources are not available.
722  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
723  " Adding to depFreeQueue.\n", node_ptr->seqNum);
724  depFreeQueue.push(node_ptr);
725  } else {
726  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
727  "Still pending issue.\n", node_ptr->seqNum);
728  }
729  return false;
730  }
731 }
732 
733 void
735 {
736  // Release the resources for this completed node.
737  if (pkt->isWrite()) {
738  // Consider store complete.
739  hwResource.releaseStoreBuffer();
740  // If it is a store response then do nothing since we do not model
741  // dependencies on store completion in the trace. But if we were
742  // blocking execution due to store buffer fullness, we need to schedule
743  // an event and attempt to progress.
744  } else {
745  // If it is a load response then release the dependents waiting on it.
746  // Get pointer to the completed load
747  auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
748  assert(graph_itr != depGraph.end());
749  GraphNode* node_ptr = graph_itr->second;
750 
751  // Release resources occupied by the load
752  hwResource.release(node_ptr);
753 
754  DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
755  " dependents..\n", node_ptr->seqNum);
756 
757  for (auto child : node_ptr->dependents) {
758  if (child->removeDepOnInst(node_ptr->seqNum)) {
759  checkAndIssue(child);
760  }
761  }
762 
763  // clear the dynamically allocated set of dependents
764  (node_ptr->dependents).clear();
765  // Update the stat for numOps completed
766  owner.updateNumOps(node_ptr->robNum);
767  // delete node
768  delete node_ptr;
769  // remove from graph
770  depGraph.erase(graph_itr);
771  }
772 
773  if (DTRACE(TraceCPUData)) {
774  printReadyList();
775  }
776 
777  // If the size of the dependency graph is less than the dependency window
778  // then read from the trace file to populate the graph next time we are in
779  // execute.
780  if (depGraph.size() < windowSize && !traceComplete)
781  nextRead = true;
782 
783  // If not waiting for retry, attempt to schedule next event
784  if (!retryPkt) {
785  // We might have new dep-free nodes in the list which will have execute
786  // tick greater than or equal to curTick. But a new dep-free node might
787  // have its execute tick earlier. Therefore, attempt to reschedule. It
788  // could happen that the readyList is empty and we got here via a
789  // last remaining response. So, either the trace is complete or there
790  // are pending nodes in the depFreeQueue. The checking is done in the
791  // execute() control flow, so schedule an event to go via that flow.
792  Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
793  std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
794  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
795  next_event_tick);
796  owner.schedDcacheNextEvent(next_event_tick);
797  }
798 }
799 
800 void
802  Tick exec_tick)
803 {
804  ReadyNode ready_node;
805  ready_node.seqNum = seq_num;
806  ready_node.execTick = exec_tick;
807 
808  // Iterator to readyList
809  auto itr = readyList.begin();
810 
811  // If the readyList is empty, simply insert the new node at the beginning
812  // and return
813  if (itr == readyList.end()) {
814  readyList.insert(itr, ready_node);
815  maxReadyListSize = std::max<double>(readyList.size(),
816  maxReadyListSize.value());
817  return;
818  }
819 
820  // If the new node has its execution tick equal to the first node in the
821  // list then go to the next node. If the first node in the list failed
822  // to execute, its position as the first is thus maintained.
823  if (retryPkt)
824  if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
825  itr++;
826 
827  // Increment the iterator and compare the node pointed to by it to the new
828  // node till the position to insert the new node is found.
829  bool found = false;
830  while (!found && itr != readyList.end()) {
831  // If the execution tick of the new node is less than the node then
832  // this is the position to insert
833  if (exec_tick < itr->execTick)
834  found = true;
835  // If the execution tick of the new node is equal to the node then
836  // sort in ascending order of sequence numbers
837  else if (exec_tick == itr->execTick) {
838  // If the sequence number of the new node is less than the node
839  // then this is the position to insert
840  if (seq_num < itr->seqNum)
841  found = true;
842  // Else go to next node
843  else
844  itr++;
845  }
846  // If the execution tick of the new node is greater than the node then
847  // go to the next node
848  else
849  itr++;
850  }
851  readyList.insert(itr, ready_node);
852  // Update the stat for max size reached of the readyList
853  maxReadyListSize = std::max<double>(readyList.size(),
854  maxReadyListSize.value());
855 }
856 
857 void
859 
860  auto itr = readyList.begin();
861  if (itr == readyList.end()) {
862  DPRINTF(TraceCPUData, "readyList is empty.\n");
863  return;
864  }
865  DPRINTF(TraceCPUData, "Printing readyList:\n");
866  while (itr != readyList.end()) {
867  auto graph_itr = depGraph.find(itr->seqNum);
868  GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
869  DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
870  node_ptr->typeToStr(), itr->execTick);
871  itr++;
872  }
873 }
874 
876  uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
877  : sizeROB(max_rob),
878  sizeStoreBuffer(max_stores),
879  sizeLoadBuffer(max_loads),
880  oldestInFlightRobNum(UINT64_MAX),
881  numInFlightLoads(0),
882  numInFlightStores(0)
883 {}
884 
885 void
887 {
888  // Occupy ROB entry for the issued node
889  // Merely maintain the oldest node, i.e. numerically least robNum by saving
890  // it in the variable oldestInFLightRobNum.
891  inFlightNodes[new_node->seqNum] = new_node->robNum;
892  oldestInFlightRobNum = inFlightNodes.begin()->second;
893 
894  // Occupy Load/Store Buffer entry for the issued node if applicable
895  if (new_node->isLoad()) {
897  } else if (new_node->isStore()) {
899  } // else if it is a non load/store node, no buffer entry is occupied
900 
901  printOccupancy();
902 }
903 
904 void
906 {
907  assert(!inFlightNodes.empty());
908  DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
909  done_node->seqNum);
910 
911  assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
912  inFlightNodes.erase(done_node->seqNum);
913 
914  if (inFlightNodes.empty()) {
915  // If we delete the only in-flight node and then the
916  // oldestInFlightRobNum is set to it's initialized (max) value.
917  oldestInFlightRobNum = UINT64_MAX;
918  } else {
919  // Set the oldest in-flight node rob number equal to the first node in
920  // the inFlightNodes since that will have the numerically least value.
921  oldestInFlightRobNum = inFlightNodes.begin()->second;
922  }
923 
924  DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
925  "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
927 
928  // A store is considered complete when a request is sent, thus ROB entry is
929  // freed. But it occupies an entry in the Store Buffer until its response
930  // is received. A load is considered complete when a response is received,
931  // thus both ROB and Load Buffer entries can be released.
932  if (done_node->isLoad()) {
933  assert(numInFlightLoads != 0);
935  }
936  // For normal writes, we send the requests out and clear a store buffer
937  // entry on response. For writes which are strictly ordered, for e.g.
938  // writes to device registers, we do that within release() which is called
939  // when node is executed and taken off from readyList.
940  if (done_node->isStore() && done_node->isStrictlyOrdered()) {
942  }
943 }
944 
945 void
947 {
948  assert(numInFlightStores != 0);
950 }
951 
952 bool
954  const GraphNode* new_node) const
955 {
956  uint16_t num_in_flight_nodes;
957  if (inFlightNodes.empty()) {
958  num_in_flight_nodes = 0;
959  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
960  " #in-flight nodes = 0", new_node->seqNum);
961  } else if (new_node->robNum > oldestInFlightRobNum) {
962  // This is the intuitive case where new dep-free node is younger
963  // instruction than the oldest instruction in-flight. Thus we make sure
964  // in_flight_nodes does not overflow.
965  num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
966  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
967  " #in-flight nodes = %d - %d = %d", new_node->seqNum,
968  new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
969  } else {
970  // This is the case where an instruction older than the oldest in-
971  // flight instruction becomes dep-free. Thus we must have already
972  // accounted for the entry in ROB for this new dep-free node.
973  // Immediately after this check returns true, oldestInFlightRobNum will
974  // be updated in occupy(). We simply let this node issue now.
975  num_in_flight_nodes = 0;
976  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
977  " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
978  new_node->seqNum, new_node->robNum);
979  }
980  DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
983  // Check if resources are available to issue the specific node
984  if (num_in_flight_nodes >= sizeROB) {
985  return false;
986  }
987  if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
988  return false;
989  }
990  if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
991  return false;
992  }
993  return true;
994 }
995 
996 bool
998  // Return true if there is at least one read or write request in flight
999  return (numInFlightStores != 0 || numInFlightLoads != 0);
1000 }
1001 
1002 void
1004  DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1005  "LQ = %d/%d, SQ = %d/%d.\n",
1009 }
1010 
1011 void
1013 {
1014  using namespace Stats;
1015 
1017  .name(name() + ".numSendAttempted")
1018  .desc("Number of first attempts to send a request")
1019  ;
1020 
1022  .name(name() + ".numSendSucceeded")
1023  .desc("Number of successful first attempts")
1024  ;
1025 
1027  .name(name() + ".numSendFailed")
1028  .desc("Number of failed first attempts")
1029  ;
1030 
1032  .name(name() + ".numRetrySucceeded")
1033  .desc("Number of successful retries")
1034  ;
1035 
1036  instLastTick
1037  .name(name() + ".instLastTick")
1038  .desc("Last tick simulated from the fixed inst trace")
1039  ;
1040 }
1041 
1042 Tick
1044 {
1045  DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1046  " IcacheGen: fixed issue with retry.\n");
1047 
1048  if (nextExecute()) {
1049  DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1050  return currElement.tick;
1051  } else {
1052  panic("Read of first message in the trace failed.\n");
1053  return MaxTick;
1054  }
1055 }
1056 
1057 bool
1059 {
1060  // If there is a retry packet, try to send it
1061  if (retryPkt) {
1062 
1063  DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1064 
1065  if (!port.sendTimingReq(retryPkt)) {
1066  // Still blocked! This should never occur.
1067  DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1068  return false;
1069  }
1071  } else {
1072 
1073  DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1074 
1075  // try sending current element
1076  assert(currElement.isValid());
1077 
1078  ++numSendAttempted;
1079 
1080  if (!send(currElement.addr, currElement.blocksize,
1081  currElement.cmd, currElement.flags, currElement.pc)) {
1082  DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1083  ++numSendFailed;
1084  // return false to indicate not to schedule next event
1085  return false;
1086  } else {
1087  ++numSendSucceeded;
1088  }
1089  }
1090  // If packet was sent successfully, either retryPkt or currElement, return
1091  // true to indicate to schedule event at current Tick plus delta. If packet
1092  // was sent successfully and there is no next packet to send, return false.
1093  DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1094  "element.\n");
1095  retryPkt = nullptr;
1096  // Read next element into currElement, currElement gets cleared so save the
1097  // tick to calculate delta
1098  Tick last_tick = currElement.tick;
1099  if (nextExecute()) {
1100  assert(currElement.tick >= last_tick);
1101  delta = currElement.tick - last_tick;
1102  }
1103  return !traceComplete;
1104 }
1105 
1106 void
1108 {
1109  trace.reset();
1110 }
1111 
1112 bool
1114 {
1115  if (traceComplete)
1116  // We are at the end of the file, thus we have no more messages.
1117  // Return false.
1118  return false;
1119 
1120 
1121  //Reset the currElement to the default values
1122  currElement.clear();
1123 
1124  // Read the next line to get the next message. If that fails then end of
1125  // trace has been reached and traceComplete needs to be set in addition
1126  // to returning false. If successful then next message is in currElement.
1127  if (!trace.read(&currElement)) {
1128  traceComplete = true;
1129  instLastTick = curTick();
1130  return false;
1131  }
1132 
1133  DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1134  currElement.cmd.isRead() ? 'r' : 'w',
1135  currElement.addr,
1136  currElement.pc,
1137  currElement.blocksize,
1138  currElement.tick);
1139 
1140  return true;
1141 }
1142 
1143 bool
1144 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1145  Request::FlagsType flags, Addr pc)
1146 {
1147 
1148  // Create new request
1149  auto req = std::make_shared<Request>(addr, size, flags, masterID);
1150  req->setPC(pc);
1151 
1152  // If this is not done it triggers assert in L1 cache for invalid contextId
1153  req->setContext(ContextID(0));
1154 
1155  // Embed it in a packet
1156  PacketPtr pkt = new Packet(req, cmd);
1157 
1158  uint8_t* pkt_data = new uint8_t[req->getSize()];
1159  pkt->dataDynamic(pkt_data);
1160 
1161  if (cmd.isWrite()) {
1162  memset(pkt_data, 0xA, req->getSize());
1163  }
1164 
1165  // Call MasterPort method to send a timing request for this packet
1166  bool success = port.sendTimingReq(pkt);
1167  if (!success) {
1168  // If it fails, save the packet to retry when a retry is signalled by
1169  // the cache
1170  retryPkt = pkt;
1171  }
1172  return success;
1173 }
1174 
1175 void
1177 {
1178  // Schedule an event to go through the control flow in the same tick as
1179  // retry is received
1180  DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1181  " event @%lli.\n", curTick());
1183 }
1184 
1185 void
1187 {
1188  // Schedule an event to go through the execute flow in the same tick as
1189  // retry is received
1190  DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1191  " event @%lli.\n", curTick());
1193 }
1194 
1195 void
1197 {
1198  if (!dcacheNextEvent.scheduled()) {
1199  DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1200  when);
1201  schedule(dcacheNextEvent, when);
1203  } else if (when < dcacheNextEvent.when()) {
1204  DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1205  " to %lli.\n", dcacheNextEvent.when(), when);
1206  reschedule(dcacheNextEvent, when);
1207  }
1208 
1209 }
1210 
1211 bool
1213 {
1214  // All responses on the instruction fetch side are ignored. Simply delete
1215  // the packet to free allocated memory
1216  delete pkt;
1217 
1218  return true;
1219 }
1220 
1221 void
1223 {
1225 }
1226 
1227 void
1229 {
1230  DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1232 }
1233 
1234 bool
1236 {
1237  // Handle the responses for data memory requests which is done inside the
1238  // elastic data generator
1240  // After processing the response delete the packet to free
1241  // memory
1242  delete pkt;
1243 
1244  return true;
1245 }
1246 
1247 void
1249 {
1251 }
1252 
1254  const std::string& filename,
1255  const double time_multiplier)
1256  : trace(filename),
1257  timeMultiplier(time_multiplier),
1258  microOpCount(0)
1259 {
1260  // Create a protobuf message for the header and read it from the stream
1261  ProtoMessage::InstDepRecordHeader header_msg;
1262  if (!trace.read(header_msg)) {
1263  panic("Failed to read packet header from %s\n", filename);
1264 
1265  if (header_msg.tick_freq() != SimClock::Frequency) {
1266  panic("Trace %s was recorded with a different tick frequency %d\n",
1267  header_msg.tick_freq());
1268  }
1269  } else {
1270  // Assign window size equal to the field in the trace that was recorded
1271  // when the data dependency trace was captured in the o3cpu model
1272  windowSize = header_msg.window_size();
1273  }
1274 }
1275 
1276 void
1278 {
1279  trace.reset();
1280 }
1281 
1282 bool
1284 {
1285  ProtoMessage::InstDepRecord pkt_msg;
1286  if (trace.read(pkt_msg)) {
1287  // Required fields
1288  element->seqNum = pkt_msg.seq_num();
1289  element->type = pkt_msg.type();
1290  // Scale the compute delay to effectively scale the Trace CPU frequency
1291  element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1292 
1293  // Repeated field robDepList
1294  element->clearRobDep();
1295  assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1296  for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1297  element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1298  element->numRobDep += 1;
1299  }
1300 
1301  // Repeated field
1302  element->clearRegDep();
1303  assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1304  for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1305  // There is a possibility that an instruction has both, a register
1306  // and order dependency on an instruction. In such a case, the
1307  // register dependency is omitted
1308  bool duplicate = false;
1309  for (int j = 0; j < element->numRobDep; j++) {
1310  duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1311  }
1312  if (!duplicate) {
1313  element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1314  element->numRegDep += 1;
1315  }
1316  }
1317 
1318  // Optional fields
1319  if (pkt_msg.has_p_addr())
1320  element->physAddr = pkt_msg.p_addr();
1321  else
1322  element->physAddr = 0;
1323 
1324  if (pkt_msg.has_v_addr())
1325  element->virtAddr = pkt_msg.v_addr();
1326  else
1327  element->virtAddr = 0;
1328 
1329  if (pkt_msg.has_size())
1330  element->size = pkt_msg.size();
1331  else
1332  element->size = 0;
1333 
1334  if (pkt_msg.has_flags())
1335  element->flags = pkt_msg.flags();
1336  else
1337  element->flags = 0;
1338 
1339  if (pkt_msg.has_pc())
1340  element->pc = pkt_msg.pc();
1341  else
1342  element->pc = 0;
1343 
1344  // ROB occupancy number
1345  ++microOpCount;
1346  if (pkt_msg.has_weight()) {
1347  microOpCount += pkt_msg.weight();
1348  }
1349  element->robNum = microOpCount;
1350  return true;
1351  }
1352 
1353  // We have reached the end of the file
1354  return false;
1355 }
1356 
1357 bool
1359 {
1360  for (auto& own_reg_dep : regDep) {
1361  if (own_reg_dep == reg_dep) {
1362  // If register dependency is found, make it zero and return true
1363  own_reg_dep = 0;
1364  assert(numRegDep > 0);
1365  --numRegDep;
1366  DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1367  "done.\n", seqNum, reg_dep);
1368  return true;
1369  }
1370  }
1371 
1372  // Return false if the dependency is not found
1373  return false;
1374 }
1375 
1376 bool
1378 {
1379  for (auto& own_rob_dep : robDep) {
1380  if (own_rob_dep == rob_dep) {
1381  // If the rob dependency is found, make it zero and return true
1382  own_rob_dep = 0;
1383  assert(numRobDep > 0);
1384  --numRobDep;
1385  DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1386  "done.\n", seqNum, rob_dep);
1387  return true;
1388  }
1389  }
1390  return false;
1391 }
1392 
1393 void
1395  for (auto& own_reg_dep : regDep) {
1396  own_reg_dep = 0;
1397  }
1398  numRegDep = 0;
1399 }
1400 
1401 void
1403  for (auto& own_rob_dep : robDep) {
1404  own_rob_dep = 0;
1405  }
1406  numRobDep = 0;
1407 }
1408 
1409 bool
1411 {
1412  // If it is an rob dependency then remove it
1413  if (!removeRobDep(done_seq_num)) {
1414  // If it is not an rob dependency then it must be a register dependency
1415  // If the register dependency is not found, it violates an assumption
1416  // and must be caught by assert.
1417  bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1418  assert(regdep_found);
1419  }
1420  // Return true if the node is dependency free
1421  return (numRobDep == 0 && numRegDep == 0);
1422 }
1423 
1424 void
1426 {
1427  DPRINTFR(TraceCPUData, "%lli", seqNum);
1428  DPRINTFR(TraceCPUData, ",%s", typeToStr());
1429  if (isLoad() || isStore()) {
1430  DPRINTFR(TraceCPUData, ",%i", physAddr);
1431  DPRINTFR(TraceCPUData, ",%i", size);
1432  DPRINTFR(TraceCPUData, ",%i", flags);
1433  }
1434  DPRINTFR(TraceCPUData, ",%lli", compDelay);
1435  int i = 0;
1436  DPRINTFR(TraceCPUData, "robDep:");
1437  while (robDep[i] != 0) {
1438  DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1439  i++;
1440  }
1441  i = 0;
1442  DPRINTFR(TraceCPUData, "regDep:");
1443  while (regDep[i] != 0) {
1444  DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1445  i++;
1446  }
1447  auto child_itr = dependents.begin();
1448  DPRINTFR(TraceCPUData, "dependents:");
1449  while (child_itr != dependents.end()) {
1450  DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1451  child_itr++;
1452  }
1453 
1454  DPRINTFR(TraceCPUData, "\n");
1455 }
1456 
1457 std::string
1459 {
1460  return Record::RecordType_Name(type);
1461 }
1462 
1464  : trace(filename)
1465 {
1466  // Create a protobuf message for the header and read it from the stream
1467  ProtoMessage::PacketHeader header_msg;
1468  if (!trace.read(header_msg)) {
1469  panic("Failed to read packet header from %s\n", filename);
1470 
1471  if (header_msg.tick_freq() != SimClock::Frequency) {
1472  panic("Trace %s was recorded with a different tick frequency %d\n",
1473  header_msg.tick_freq());
1474  }
1475  }
1476 }
1477 
1478 void
1480 {
1481  trace.reset();
1482 }
1483 
1484 bool
1486 {
1487  ProtoMessage::Packet pkt_msg;
1488  if (trace.read(pkt_msg)) {
1489  element->cmd = pkt_msg.cmd();
1490  element->addr = pkt_msg.addr();
1491  element->blocksize = pkt_msg.size();
1492  element->tick = pkt_msg.tick();
1493  element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1494  element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1495  return true;
1496  }
1497 
1498  // We have reached the end of the file
1499  return false;
1500 }
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:993
void execute()
This is the main execute function which consumes nodes from the sorted readyList. ...
Definition: trace_cpu.cc:427
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
Struct to store a ready-to-execute node and its execution tick.
Definition: trace_cpu.hh:678
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace...
Definition: trace_cpu.cc:178
#define DPRINTF(x,...)
Definition: trace.hh:225
const uint16_t sizeStoreBuffer
The size of store buffer.
Definition: trace_cpu.hh:753
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed...
Definition: trace_cpu.hh:1122
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
Definition: trace_cpu.cc:1144
bool isStore() const
Is the node a store.
Definition: trace_cpu.hh:643
Addr blocksize
The size of the access for the request.
Definition: trace_cpu.hh:356
TraceCPU & owner
Reference of the TraceCPU.
Definition: trace_cpu.hh:984
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
NodeSeqNum seqNum
The sequence number of the ready node.
Definition: trace_cpu.hh:681
void exit()
Exit the FixedRetryGen.
Definition: trace_cpu.cc:1107
Definition: packet.hh:70
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt. ...
Definition: trace_cpu.cc:1248
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition: trace_cpu.hh:811
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached...
Definition: trace_cpu.hh:492
EventFunctionWrapper dcacheNextEvent
Event for the control flow method schedDcacheNext()
Definition: trace_cpu.hh:1081
Bitfield< 7 > i
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
Definition: trace_cpu.cc:1358
uint16_t numInFlightStores
Number of ready stores for which request may or may not be sent.
Definition: trace_cpu.hh:780
Request::Flags flags
Request flags if any.
Definition: trace_cpu.hh:609
RecordType type
Type of the node corresponding to the instruction modelled by it.
Definition: trace_cpu.hh:597
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition: trace_cpu.hh:999
Stats::Scalar numSchedIcacheEvent
Definition: trace_cpu.hh:1132
bool nextExecute()
Reads a line of the trace file.
Definition: trace_cpu.cc:1113
void updateNumOps(uint64_t rob_num)
Definition: trace_cpu.cc:92
~TraceCPU()
Definition: trace_cpu.cc:80
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition: trace_cpu.cc:875
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition: trace_cpu.cc:621
Request::FlagsType flags
Potential request flags to use.
Definition: trace_cpu.hh:362
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it...
Definition: trace_cpu.cc:734
physAddr
Definition: misc.hh:831
ip6_addr_t addr
Definition: inet.hh:330
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:913
std::map< NodeSeqNum, NodeRobNum > inFlightNodes
A map from the sequence number to the ROB number of the in- flight nodes.
Definition: trace_cpu.hh:771
void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: trace_cpu.cc:110
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests...
Definition: trace_cpu.hh:1095
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition: trace_cpu.hh:962
Port & getInstPort()
Used to get a reference to the icache port.
Definition: trace_cpu.hh:1142
std::string instTraceFile
File names for input instruction and data traces.
Definition: trace_cpu.hh:331
void exit()
Exit the ElasticDataGen.
Definition: trace_cpu.cc:338
void regStats()
Callback to set stat parameters.
Definition: trace_cpu.cc:213
static const uint8_t maxRobDep
The maximum no.
Definition: trace_cpu.hh:582
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
Definition: trace_cpu.cc:1212
const uint16_t sizeROB
The size of the ROB used to throttle the max.
Definition: trace_cpu.hh:747
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition: trace_cpu.cc:192
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition: protoio.cc:180
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
Definition: trace_cpu.hh:1087
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace...
Definition: trace_cpu.cc:153
Stats::Scalar numSchedDcacheEvent
Definition: trace_cpu.hh:1131
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition: trace_cpu.hh:350
Bitfield< 23, 0 > offset
Definition: types.hh:152
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:441
void clearRobDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1402
uint64_t NodeSeqNum
Node sequence number type.
Definition: trace_cpu.hh:560
bool readNextWindow()
Reads a line of the trace file.
Definition: trace_cpu.cc:344
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1277
const int MaxInstSrcRegs
Definition: registers.hh:57
Tick clockPeriod() const
uint8_t numRegDep
Number of register dependencies.
Definition: trace_cpu.hh:630
bool isWrite() const
Definition: packet.hh:523
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:46
void addDepsOnParent(GraphNode *new_node, T &dep_array, uint8_t &num_dep)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition: trace_cpu.cc:397
Stats::Scalar numSendSucceeded
Definition: trace_cpu.hh:1046
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1479
bool traceComplete
Set to true when end of trace is reached.
Definition: trace_cpu.hh:1002
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:301
void clearRegDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1394
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321
void regStats() override
Callback to set stat parameters.
Definition: base.cc:384
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition: trace_cpu.hh:794
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
Definition: trace_cpu.hh:1110
uint8_t type
Definition: inet.hh:328
#define inform(...)
Definition: logging.hh:209
const Tick MaxTick
Definition: types.hh:63
const double timeMultiplier
A multiplier for the compute delays in the trace to modulate the Trace CPU frequency either up or dow...
Definition: trace_cpu.hh:802
Tick curTick()
The current simulated tick.
Definition: core.hh:44
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition: trace_cpu.cc:997
Bitfield< 4 > pc
virtual Port & getInstPort()=0
Purely virtual method that returns a reference to the instruction port.
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition: trace_cpu.hh:664
#define DTRACE(x)
Definition: trace.hh:223
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model...
Definition: trace_cpu.hh:140
uint64_t progressMsgThreshold
Definition: trace_cpu.hh:1129
NodeRobNum robNum
ROB occupancy number.
Definition: trace_cpu.hh:594
TraceCPU(TraceCPUParams *params)
Definition: trace_cpu.cc:45
uint64_t Tick
Tick count type.
Definition: types.hh:61
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
Definition: trace_cpu.hh:1116
EventFunctionWrapper icacheNextEvent
Event for the control flow method schedIcacheNext()
Definition: trace_cpu.hh:1078
The struct GraphNode stores an instruction in the trace file.
Definition: trace_cpu.hh:574
This struct stores a line in the trace file.
Definition: trace_cpu.hh:347
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Definition: trace_cpu.cc:1186
void reset()
Reset the input stream and seek to the beginning of the file.
Definition: protoio.cc:170
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
Definition: trace_cpu.hh:1057
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:199
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
Definition: trace_cpu.cc:1222
void schedule(Event &event, Tick when)
Definition: eventq.hh:934
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition: trace_cpu.cc:953
void reschedule(Event &event, Tick when, bool always=false)
Definition: eventq.hh:952
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition: trace_cpu.cc:905
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Definition: statistics.hh:321
uint16_t numInFlightLoads
Number of ready loads for which request may or may not be sent.
Definition: trace_cpu.hh:777
uint64_t compDelay
Computational delay.
Definition: trace_cpu.hh:621
MasterPort & port
Reference of the port to be used to issue memory requests.
Definition: trace_cpu.hh:987
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:907
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
Definition: trace_cpu.hh:1103
Stats::Formula cpi
Stat for the CPI.
Definition: trace_cpu.hh:1137
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
Definition: trace_cpu.cc:1228
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick execTick
The tick at which the ready node must be executed.
Definition: trace_cpu.hh:684
NodeRobNum oldestInFlightRobNum
The ROB number of the oldest in-flight node.
Definition: trace_cpu.hh:774
Bitfield< 15 > system
Definition: misc.hh:997
const uint16_t sizeLoadBuffer
The size of load buffer.
Definition: trace_cpu.hh:759
Bitfield< 24 > j
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:459
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
Stats::Scalar numOps
Stat for number of simulated micro-ops.
Definition: trace_cpu.hh:1135
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node&#39;s source dependencies are complete.
Definition: trace_cpu.cc:694
const MasterID masterID
MasterID used for the requests being sent.
Definition: trace_cpu.hh:990
virtual const std::string name() const
Definition: sim_object.hh:129
Tick tick
The time at which the request should be sent.
Definition: trace_cpu.hh:359
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:88
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
Definition: trace_cpu.cc:1235
uint8_t numRobDep
Number of order dependencies.
Definition: trace_cpu.hh:618
Stats::Scalar numSendFailed
Definition: trace_cpu.hh:1047
bool isWrite() const
Definition: packet.hh:192
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
Definition: trace_cpu.cc:1196
std::string dataTraceFile
Definition: trace_cpu.hh:331
Stats::Scalar numSendAttempted
Definition: trace_cpu.hh:1045
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
Definition: trace_cpu.hh:1060
Addr virtAddr
The virtual address for the request if any.
Definition: trace_cpu.hh:603
uint32_t size
Size of request if any.
Definition: trace_cpu.hh:606
void takeOverFrom(Port *old)
A utility function to make it easier to swap out ports.
Definition: port.hh:128
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition: trace_cpu.cc:858
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1253
Stats::Scalar numCycles
Definition: base.hh:599
bool isLoad() const
Is the node a load.
Definition: trace_cpu.hh:640
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition: trace_cpu.cc:946
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: base.cc:277
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition: trace_cpu.cc:331
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition: trace_cpu.cc:886
virtual Port & getDataPort()=0
Purely virtual method that returns a reference to the data port.
RegDepArray regDep
Array of register dependencies (incoming) if any.
Definition: trace_cpu.hh:627
Stats::Scalar numRetrySucceeded
Definition: trace_cpu.hh:1048
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1072
Addr addr
The address for the request.
Definition: trace_cpu.hh:353
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:1043
Port & getDataPort()
Used to get a reference to the dcache port.
Definition: trace_cpu.hh:1145
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
Definition: trace_cpu.cc:1176
Addr physAddr
The address for the request if any.
Definition: trace_cpu.hh:600
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
void printOccupancy()
Print resource occupancy for debugging.
Definition: trace_cpu.cc:1003
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition: trace_cpu.cc:801
uint64_t FlagsType
Definition: request.hh:89
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1485
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
Definition: trace_cpu.cc:1425
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:499
RobDepArray robDep
Array of order dependencies.
Definition: trace_cpu.hh:615
const Params * params() const
Definition: base.hh:307
std::string typeToStr() const
Return string specifying the type of the node.
Definition: trace_cpu.cc:1458
void takeOverFrom(BaseCPU *oldCPU)
Load the state of a CPU from the previous CPU object, invoked on all new CPUs that are about to be sw...
Definition: trace_cpu.cc:102
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:700
int ContextID
Globally unique thread context ID.
Definition: types.hh:229
const std::string & name() const
Returns name of the ElasticDataGen instance.
Definition: trace_cpu.hh:886
Command cmd
Definition: packet.hh:181
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1283
uint64_t microOpCount
Count of committed ops read from trace plus the filtered ops.
Definition: trace_cpu.hh:805
NodeSeqNum seqNum
Instruction sequence number.
Definition: trace_cpu.hh:591
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Definition: trace_cpu.cc:1377
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
Definition: trace_cpu.cc:1410
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:103
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1463
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
Definition: trace_cpu.cc:1058
#define DPRINTFR(...)
Definition: trace.hh:227

Generated on Fri Jul 3 2020 15:53:01 for gem5 by doxygen 1.8.13