gem5  v22.1.0.0
trace_cpu.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2016 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include "cpu/trace/trace_cpu.hh"
39 
40 #include "base/compiler.hh"
41 #include "sim/sim_exit.hh"
42 
43 namespace gem5
44 {
45 
46 // Declare and initialize the static counter for number of trace CPUs.
48 
49 TraceCPU::TraceCPU(const TraceCPUParams &params)
50  : BaseCPU(params),
51  icachePort(this),
52  dcachePort(this),
53  instRequestorID(params.system->getRequestorId(this, "inst")),
54  dataRequestorID(params.system->getRequestorId(this, "data")),
55  instTraceFile(params.instTraceFile),
56  dataTraceFile(params.dataTraceFile),
57  icacheGen(*this, ".iside", icachePort, instRequestorID, instTraceFile),
58  dcacheGen(*this, ".dside", dcachePort, dataRequestorID, dataTraceFile,
59  params),
60  icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61  dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62  oneTraceComplete(false),
63  traceOffset(0),
64  execCompleteEvent(nullptr),
65  enableEarlyExit(params.enableEarlyExit),
66  progressMsgInterval(params.progressMsgInterval),
67  progressMsgThreshold(params.progressMsgInterval), traceStats(this)
68 {
69  // Increment static counter for number of Trace CPUs.
71 
72  // Check that the python parameters for sizes of ROB, store buffer and
73  // load buffer do not overflow the corresponding C++ variables.
74  fatal_if(params.sizeROB > UINT16_MAX,
75  "ROB size set to %d exceeds the max. value of %d.",
76  params.sizeROB, UINT16_MAX);
77  fatal_if(params.sizeStoreBuffer > UINT16_MAX,
78  "ROB size set to %d exceeds the max. value of %d.",
79  params.sizeROB, UINT16_MAX);
80  fatal_if(params.sizeLoadBuffer > UINT16_MAX,
81  "Load buffer size set to %d exceeds the max. value of %d.",
82  params.sizeLoadBuffer, UINT16_MAX);
83 }
84 
85 void
86 TraceCPU::updateNumOps(uint64_t rob_num)
87 {
88  traceStats.numOps = rob_num;
89  if (progressMsgInterval != 0 &&
91  inform("%s: %i insts committed\n", name(), progressMsgThreshold);
93  }
94 }
95 
96 void
98 {
99  // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
100  getInstPort().takeOverFrom(&oldCPU->getInstPort());
101  getDataPort().takeOverFrom(&oldCPU->getDataPort());
102 }
103 
104 void
106 {
107  DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\".\n",
108  instTraceFile);
109  DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
110  dataTraceFile);
111 
112  BaseCPU::init();
113 
114  // Get the send tick of the first instruction read request
115  Tick first_icache_tick = icacheGen.init();
116 
117  // Get the send tick of the first data read/write request
118  Tick first_dcache_tick = dcacheGen.init();
119 
120  // Set the trace offset as the minimum of that in both traces
121  traceOffset = std::min(first_icache_tick, first_dcache_tick);
122  inform("%s: Time offset (tick) found as min of both traces is %lli.",
123  name(), traceOffset);
124 
125  // Schedule next icache and dcache event by subtracting the offset
126  schedule(icacheNextEvent, first_icache_tick - traceOffset);
127  schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
128 
129  // Adjust the trace offset for the dcache generator's ready nodes
130  // We don't need to do this for the icache generator as it will
131  // send its first request at the first event and schedule subsequent
132  // events using a relative tick delta
134 
135  // If the Trace CPU simulation is configured to exit on any one trace
136  // completion then we don't need a counted event to count down all Trace
137  // CPUs in the system. If not then instantiate a counted event.
138  if (!enableEarlyExit) {
139  // The static counter for number of Trace CPUs is correctly set at
140  // this point so create an event and pass it.
141  execCompleteEvent = new CountedExitEvent("end of all traces reached.",
142  numTraceCPUs);
143  }
144 
145 }
146 
147 void
149 {
150  DPRINTF(TraceCPUInst, "IcacheGen event.\n");
151 
152  // Try to send the current packet or a retry packet if there is one
153  bool sched_next = icacheGen.tryNext();
154  // If packet sent successfully, schedule next event
155  if (sched_next) {
156  DPRINTF(TraceCPUInst,
157  "Scheduling next icacheGen event at %d.\n",
158  curTick() + icacheGen.tickDelta());
161  } else {
162  // check if traceComplete. If not, do nothing because sending failed
163  // and next event will be scheduled via RecvRetry()
164  if (icacheGen.isTraceComplete()) {
165  // If this is the first trace to complete, set the variable. If it
166  // is already set then both traces are complete to exit sim.
168  }
169  }
170  return;
171 }
172 
173 void
175 {
176  DPRINTF(TraceCPUData, "DcacheGen event.\n");
177 
178  // Update stat for numCycles
180 
181  dcacheGen.execute();
182  if (dcacheGen.isExecComplete()) {
184  }
185 }
186 
187 void
189 {
190  if (!oneTraceComplete) {
191  oneTraceComplete = true;
192  } else {
193  // Schedule event to indicate execution is complete as both
194  // instruction and data access traces have been played back.
195  inform("%s: Execution complete.", name());
196  // If the replay is configured to exit early, that is when any one
197  // execution is complete then exit immediately and return. Otherwise,
198  // schedule the counted exit that counts down completion of each Trace
199  // CPU.
200  if (enableEarlyExit) {
201  exitSimLoop("End of trace reached");
202  } else {
204  }
205  }
206 }
208  statistics::Group(trace),
209  ADD_STAT(numSchedDcacheEvent, statistics::units::Count::get(),
210  "Number of events scheduled to trigger data request generator"),
211  ADD_STAT(numSchedIcacheEvent, statistics::units::Count::get(),
212  "Number of events scheduled to trigger instruction request "
213  "generator"),
214  ADD_STAT(numOps, statistics::units::Count::get(),
215  "Number of micro-ops simulated by the Trace CPU"),
216  ADD_STAT(cpi, statistics::units::Rate<
217  statistics::units::Cycle, statistics::units::Count>::get(),
218  "Cycles per micro-op used as a proxy for CPI",
219  trace->baseStats.numCycles / numOps)
220 {
221  cpi.precision(6);
222 }
223 
226  const std::string& _name) :
227  statistics::Group(parent, _name.c_str()),
228  ADD_STAT(maxDependents, statistics::units::Count::get(),
229  "Max number of dependents observed on a node"),
230  ADD_STAT(maxReadyListSize, statistics::units::Count::get(),
231  "Max size of the ready list observed"),
232  ADD_STAT(numSendAttempted, statistics::units::Count::get(),
233  "Number of first attempts to send a request"),
234  ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
235  "Number of successful first attempts"),
236  ADD_STAT(numSendFailed, statistics::units::Count::get(),
237  "Number of failed first attempts"),
238  ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
239  "Number of successful retries"),
240  ADD_STAT(numSplitReqs, statistics::units::Count::get(),
241  "Number of split requests"),
242  ADD_STAT(numSOLoads, statistics::units::Count::get(),
243  "Number of strictly ordered loads"),
244  ADD_STAT(numSOStores, statistics::units::Count::get(),
245  "Number of strictly ordered stores"),
246  ADD_STAT(dataLastTick, statistics::units::Tick::get(),
247  "Last tick simulated from the elastic data trace")
248 {
249 }
250 
251 Tick
253 {
254  DPRINTF(TraceCPUData, "Initializing data memory request generator "
255  "DcacheGen: elastic issue with retry.\n");
256 
258  "Trace has %d elements. It must have at least %d elements.",
259  depGraph.size(), 2 * windowSize);
260  DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
261  depGraph.size());
262 
264  "Trace has %d elements. It must have at least %d elements.",
265  depGraph.size(), 2 * windowSize);
266  DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
267  depGraph.size());
268 
269  // Print readyList
270  if (debug::TraceCPUData) {
271  printReadyList();
272  }
273  auto free_itr = readyList.begin();
274  DPRINTF(TraceCPUData,
275  "Execute tick of the first dependency free node %lli is %d.\n",
276  free_itr->seqNum, free_itr->execTick);
277  // Return the execute tick of the earliest ready node so that an event
278  // can be scheduled to call execute()
279  return (free_itr->execTick);
280 }
281 
282 void
284 {
285  for (auto& free_node : readyList) {
286  free_node.execTick -= offset;
287  }
288 }
289 
290 void
292 {
293  trace.reset();
294 }
295 
296 bool
298 {
299  // Read and add next window
300  DPRINTF(TraceCPUData, "Reading next window from file.\n");
301 
302  if (traceComplete) {
303  // We are at the end of the file, thus we have no more records.
304  // Return false.
305  return false;
306  }
307 
308  DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
309  depGraph.size());
310 
311  uint32_t num_read = 0;
312  while (num_read != windowSize) {
313 
314  // Create a new graph node
315  GraphNode* new_node = new GraphNode;
316 
317  // Read the next line to get the next record. If that fails then end of
318  // trace has been reached and traceComplete needs to be set in addition
319  // to returning false.
320  if (!trace.read(new_node)) {
321  DPRINTF(TraceCPUData, "\tTrace complete!\n");
322  traceComplete = true;
323  return false;
324  }
325 
326  // Annotate the ROB dependencies of the new node onto the parent nodes.
327  addDepsOnParent(new_node, new_node->robDep);
328  // Annotate the register dependencies of the new node onto the parent
329  // nodes.
330  addDepsOnParent(new_node, new_node->regDep);
331 
332  num_read++;
333  // Add to map
334  depGraph[new_node->seqNum] = new_node;
335  if (new_node->robDep.empty() && new_node->regDep.empty()) {
336  // Source dependencies are already complete, check if resources
337  // are available and issue. The execution time is approximated
338  // to current time plus the computational delay.
339  checkAndIssue(new_node);
340  }
341  }
342 
343  DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
344  depGraph.size());
345  return true;
346 }
347 
348 template<typename T>
349 void
351 {
352  auto dep_it = dep_list.begin();
353  while (dep_it != dep_list.end()) {
354  // We look up the valid dependency, i.e. the parent of this node
355  auto parent_itr = depGraph.find(*dep_it);
356  if (parent_itr != depGraph.end()) {
357  // If the parent is found, it is yet to be executed. Append a
358  // pointer to the new node to the dependents list of the parent
359  // node.
360  parent_itr->second->dependents.push_back(new_node);
361  auto num_depts = parent_itr->second->dependents.size();
362  elasticStats.maxDependents = std::max<double>(num_depts,
364  dep_it++;
365  } else {
366  // The dependency is not found in the graph. So consider
367  // the execution of the parent is complete, i.e. remove this
368  // dependency.
369  dep_it = dep_list.erase(dep_it);
370  }
371  }
372 }
373 
374 void
376 {
377  DPRINTF(TraceCPUData, "Execute start occupancy:\n");
378  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
379  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
380  depFreeQueue.size());
382 
383  // Read next window to make sure that dependents of all dep-free nodes
384  // are in the depGraph
385  if (nextRead) {
386  readNextWindow();
387  nextRead = false;
388  }
389 
390  // First attempt to issue the pending dependency-free nodes held
391  // in depFreeQueue. If resources have become available for a node,
392  // then issue it, i.e. add the node to readyList.
393  while (!depFreeQueue.empty()) {
394  if (checkAndIssue(depFreeQueue.front(), false)) {
395  DPRINTF(TraceCPUData,
396  "Removing from depFreeQueue: seq. num %lli.\n",
397  (depFreeQueue.front())->seqNum);
398  depFreeQueue.pop();
399  } else {
400  break;
401  }
402  }
403  // Proceed to execute from readyList
404  auto graph_itr = depGraph.begin();
405  auto free_itr = readyList.begin();
406  // Iterate through readyList until the next free node has its execute
407  // tick later than curTick or the end of readyList is reached
408  while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
409 
410  // Get pointer to the node to be executed
411  graph_itr = depGraph.find(free_itr->seqNum);
412  assert(graph_itr != depGraph.end());
413  GraphNode* node_ptr = graph_itr->second;
414 
415  // If there is a retryPkt send that else execute the load
416  if (retryPkt) {
417  // The retryPkt must be the request that was created by the
418  // first node in the readyList.
419  if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
420  panic("Retry packet's seqence number does not match "
421  "the first node in the readyList.\n");
422  }
423  if (port.sendTimingReq(retryPkt)) {
425  retryPkt = nullptr;
426  }
427  } else if (node_ptr->isLoad() || node_ptr->isStore()) {
428  // If there is no retryPkt, attempt to send a memory request in
429  // case of a load or store node. If the send fails, executeMemReq()
430  // returns a packet pointer, which we save in retryPkt. In case of
431  // a comp node we don't do anything and simply continue as if the
432  // execution of the comp node succedded.
433  retryPkt = executeMemReq(node_ptr);
434  }
435  // If the retryPkt or a new load/store node failed, we exit from here
436  // as a retry from cache will bring the control to execute(). The
437  // first node in readyList then, will be the failed node.
438  if (retryPkt) {
439  break;
440  }
441 
442  // Proceed to remove dependencies for the successfully executed node.
443  // If it is a load which is not strictly ordered and we sent a
444  // request for it successfully, we do not yet mark any register
445  // dependencies complete. But as per dependency modelling we need
446  // to mark ROB dependencies of load and non load/store nodes which
447  // are based on successful sending of the load as complete.
448  if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
449  // If execute succeeded mark its dependents as complete
450  DPRINTF(TraceCPUData,
451  "Node seq. num %lli sent. Waking up dependents..\n",
452  node_ptr->seqNum);
453 
454  auto child_itr = (node_ptr->dependents).begin();
455  while (child_itr != (node_ptr->dependents).end()) {
456  // ROB dependency of a store on a load must not be removed
457  // after load is sent but after response is received
458  if (!(*child_itr)->isStore() &&
459  (*child_itr)->removeRobDep(node_ptr->seqNum)) {
460 
461  // Check if the child node has become dependency free
462  if ((*child_itr)->robDep.empty() &&
463  (*child_itr)->regDep.empty()) {
464 
465  // Source dependencies are complete, check if
466  // resources are available and issue
467  checkAndIssue(*child_itr);
468  }
469  // Remove this child for the sent load and point to new
470  // location of the element following the erased element
471  child_itr = node_ptr->dependents.erase(child_itr);
472  } else {
473  // This child is not dependency-free, point to the next
474  // child
475  child_itr++;
476  }
477  }
478  } else {
479  // If it is a strictly ordered load mark its dependents as complete
480  // as we do not send a request for this case. If it is a store or a
481  // comp node we also mark all its dependents complete.
482  DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
483  " up dependents..\n", node_ptr->seqNum);
484 
485  for (auto child : node_ptr->dependents) {
486  // If the child node is dependency free removeDepOnInst()
487  // returns true.
488  if (child->removeDepOnInst(node_ptr->seqNum)) {
489  // Source dependencies are complete, check if resources
490  // are available and issue
491  checkAndIssue(child);
492  }
493  }
494  }
495 
496  // After executing the node, remove from readyList and delete node.
497  readyList.erase(free_itr);
498  // If it is a cacheable load which was sent, don't delete
499  // just yet. Delete it in completeMemAccess() after the
500  // response is received. If it is an strictly ordered
501  // load, it was not sent and all dependencies were simply
502  // marked complete. Thus it is safe to delete it. For
503  // stores and non load/store nodes all dependencies were
504  // marked complete so it is safe to delete it.
505  if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
506  // Release all resources occupied by the completed node
507  hwResource.release(node_ptr);
508  // clear the dynamically allocated set of dependents
509  (node_ptr->dependents).clear();
510  // Update the stat for numOps simulated
511  owner.updateNumOps(node_ptr->robNum);
512  // delete node
513  delete node_ptr;
514  // remove from graph
515  depGraph.erase(graph_itr);
516  }
517  // Point to first node to continue to next iteration of while loop
518  free_itr = readyList.begin();
519  } // end of while loop
520 
521  // Print readyList, sizes of queues and resource status after updating
522  if (debug::TraceCPUData) {
523  printReadyList();
524  DPRINTF(TraceCPUData, "Execute end occupancy:\n");
525  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
526  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
527  depFreeQueue.size());
529  }
530 
531  if (retryPkt) {
532  DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
533  "event from the cache for seq. num %lli.\n",
534  retryPkt->req->getReqInstSeqNum());
535  return;
536  }
537  // If the size of the dependency graph is less than the dependency window
538  // then read from the trace file to populate the graph next time we are in
539  // execute.
540  if (depGraph.size() < windowSize && !traceComplete)
541  nextRead = true;
542 
543  // If cache is not blocked, schedule an event for the first execTick in
544  // readyList else retry from cache will schedule the event. If the ready
545  // list is empty then check if the next pending node has resources
546  // available to issue. If yes, then schedule an event for the next cycle.
547  if (!readyList.empty()) {
548  Tick next_event_tick = std::max(readyList.begin()->execTick,
549  curTick());
550  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
551  next_event_tick);
552  owner.schedDcacheNextEvent(next_event_tick);
553  } else if (readyList.empty() && !depFreeQueue.empty() &&
555  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
556  owner.clockEdge(Cycles(1)));
558  }
559 
560  // If trace is completely read, readyList is empty and depGraph is empty,
561  // set execComplete to true
562  if (depGraph.empty() && readyList.empty() && traceComplete &&
564  DPRINTF(TraceCPUData, "\tExecution Complete!\n");
565  execComplete = true;
567  }
568 }
569 
570 PacketPtr
572 {
573  DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
574  "virt addr %d, pc %#x, size %d, flags %d).\n",
575  node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
576  node_ptr->pc, node_ptr->size, node_ptr->flags);
577 
578  // If the request is strictly ordered, do not send it. Just return nullptr
579  // as if it was succesfully sent.
580  if (node_ptr->isStrictlyOrdered()) {
581  node_ptr->isLoad() ? ++elasticStats.numSOLoads :
583  DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
584  node_ptr->seqNum);
585  return nullptr;
586  }
587 
588  // Check if the request spans two cache lines as this condition triggers
589  // an assert fail in the L1 cache. If it does then truncate the size to
590  // access only until the end of that line and ignore the remainder. The
591  // stat counting this is useful to keep a check on how frequently this
592  // happens. If required the code could be revised to mimick splitting such
593  // a request into two.
594  unsigned blk_size = owner.cacheLineSize();
595  Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
596  if (!(blk_offset + node_ptr->size <= blk_size)) {
597  node_ptr->size = blk_size - blk_offset;
599  }
600 
601  // Create a request and the packet containing request
602  auto req = std::make_shared<Request>(
603  node_ptr->physAddr, node_ptr->size, node_ptr->flags, requestorId);
604  req->setReqInstSeqNum(node_ptr->seqNum);
605 
606  // If this is not done it triggers assert in L1 cache for invalid contextId
607  req->setContext(ContextID(0));
608 
609  req->setPC(node_ptr->pc);
610  // If virtual address is valid, set the virtual address field
611  // of the request.
612  if (node_ptr->virtAddr != 0) {
613  req->setVirt(node_ptr->virtAddr, node_ptr->size,
614  node_ptr->flags, requestorId, node_ptr->pc);
615  req->setPaddr(node_ptr->physAddr);
616  req->setReqInstSeqNum(node_ptr->seqNum);
617  }
618 
619  PacketPtr pkt;
620  uint8_t* pkt_data = new uint8_t[req->getSize()];
621  if (node_ptr->isLoad()) {
622  pkt = Packet::createRead(req);
623  } else {
624  pkt = Packet::createWrite(req);
625  memset(pkt_data, 0xA, req->getSize());
626  }
627  pkt->dataDynamic(pkt_data);
628 
629  // Call RequestPort method to send a timing request for this packet
630  bool success = port.sendTimingReq(pkt);
632 
633  if (!success) {
634  // If it fails, return the packet to retry when a retry is signalled by
635  // the cache
637  DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
638  return pkt;
639  } else {
640  // It is succeeds, return nullptr
642  return nullptr;
643  }
644 }
645 
646 bool
648 {
649  // Assert the node is dependency-free
650  assert(node_ptr->robDep.empty() && node_ptr->regDep.empty());
651 
652  // If this is the first attempt, print a debug message to indicate this.
653  if (first) {
654  DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
655  " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
656  node_ptr->robNum);
657  }
658 
659  // Check if resources are available to issue the specific node
660  if (hwResource.isAvailable(node_ptr)) {
661  // If resources are free only then add to readyList
662  DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. "
663  "Adding to readyList, occupying resources.\n",
664  node_ptr->seqNum);
665  // Compute the execute tick by adding the compute delay for the node
666  // and add the ready node to the ready list
667  addToSortedReadyList(node_ptr->seqNum,
668  owner.clockEdge() + node_ptr->compDelay);
669  // Account for the resources taken up by this issued node.
670  hwResource.occupy(node_ptr);
671  return true;
672  } else {
673  if (first) {
674  // Although dependencies are complete, resources are not available.
675  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
676  "%lli. Adding to depFreeQueue.\n", node_ptr->seqNum);
677  depFreeQueue.push(node_ptr);
678  } else {
679  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
680  "%lli. Still pending issue.\n", node_ptr->seqNum);
681  }
682  return false;
683  }
684 }
685 
686 void
688 {
689  // Release the resources for this completed node.
690  if (pkt->isWrite()) {
691  // Consider store complete.
693  // If it is a store response then do nothing since we do not model
694  // dependencies on store completion in the trace. But if we were
695  // blocking execution due to store buffer fullness, we need to schedule
696  // an event and attempt to progress.
697  } else {
698  // If it is a load response then release the dependents waiting on it.
699  // Get pointer to the completed load
700  auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
701  assert(graph_itr != depGraph.end());
702  GraphNode* node_ptr = graph_itr->second;
703 
704  // Release resources occupied by the load
705  hwResource.release(node_ptr);
706 
707  DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
708  " dependents..\n", node_ptr->seqNum);
709 
710  for (auto child : node_ptr->dependents) {
711  if (child->removeDepOnInst(node_ptr->seqNum)) {
712  checkAndIssue(child);
713  }
714  }
715 
716  // clear the dynamically allocated set of dependents
717  (node_ptr->dependents).clear();
718  // Update the stat for numOps completed
719  owner.updateNumOps(node_ptr->robNum);
720  // delete node
721  delete node_ptr;
722  // remove from graph
723  depGraph.erase(graph_itr);
724  }
725 
726  if (debug::TraceCPUData) {
727  printReadyList();
728  }
729 
730  // If the size of the dependency graph is less than the dependency window
731  // then read from the trace file to populate the graph next time we are in
732  // execute.
733  if (depGraph.size() < windowSize && !traceComplete)
734  nextRead = true;
735 
736  // If not waiting for retry, attempt to schedule next event
737  if (!retryPkt) {
738  // We might have new dep-free nodes in the list which will have execute
739  // tick greater than or equal to curTick. But a new dep-free node might
740  // have its execute tick earlier. Therefore, attempt to reschedule. It
741  // could happen that the readyList is empty and we got here via a
742  // last remaining response. So, either the trace is complete or there
743  // are pending nodes in the depFreeQueue. The checking is done in the
744  // execute() control flow, so schedule an event to go via that flow.
745  Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
746  std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
747  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
748  next_event_tick);
749  owner.schedDcacheNextEvent(next_event_tick);
750  }
751 }
752 
753 void
755  Tick exec_tick)
756 {
757  ReadyNode ready_node;
758  ready_node.seqNum = seq_num;
759  ready_node.execTick = exec_tick;
760 
761  // Iterator to readyList
762  auto itr = readyList.begin();
763 
764  // If the readyList is empty, simply insert the new node at the beginning
765  // and return
766  if (itr == readyList.end()) {
767  readyList.insert(itr, ready_node);
769  std::max<double>(readyList.size(),
771  return;
772  }
773 
774  // If the new node has its execution tick equal to the first node in the
775  // list then go to the next node. If the first node in the list failed
776  // to execute, its position as the first is thus maintained.
777  if (retryPkt) {
778  if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
779  itr++;
780  }
781 
782  // Increment the iterator and compare the node pointed to by it to the new
783  // node till the position to insert the new node is found.
784  bool found = false;
785  while (!found && itr != readyList.end()) {
786  // If the execution tick of the new node is less than the node then
787  // this is the position to insert
788  if (exec_tick < itr->execTick) {
789  found = true;
790  // If the execution tick of the new node is equal to the node then
791  // sort in ascending order of sequence numbers
792  } else if (exec_tick == itr->execTick) {
793  // If the sequence number of the new node is less than the node
794  // then this is the position to insert
795  if (seq_num < itr->seqNum) {
796  found = true;
797  // Else go to next node
798  } else {
799  itr++;
800  }
801  } else {
802  // If the execution tick of the new node is greater than the node
803  // then go to the next node.
804  itr++;
805  }
806  }
807  readyList.insert(itr, ready_node);
808  // Update the stat for max size reached of the readyList
809  elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
811 }
812 
813 void
815 {
816  auto itr = readyList.begin();
817  if (itr == readyList.end()) {
818  DPRINTF(TraceCPUData, "readyList is empty.\n");
819  return;
820  }
821  DPRINTF(TraceCPUData, "Printing readyList:\n");
822  while (itr != readyList.end()) {
823  auto graph_itr = depGraph.find(itr->seqNum);
824  [[maybe_unused]] GraphNode* node_ptr = graph_itr->second;
825  DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
826  node_ptr->typeToStr(), itr->execTick);
827  itr++;
828  }
829 }
830 
832  uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) :
833  sizeROB(max_rob),
834  sizeStoreBuffer(max_stores),
835  sizeLoadBuffer(max_loads),
836  oldestInFlightRobNum(UINT64_MAX),
837  numInFlightLoads(0),
838  numInFlightStores(0)
839 {}
840 
841 void
843 {
844  // Occupy ROB entry for the issued node
845  // Merely maintain the oldest node, i.e. numerically least robNum by saving
846  // it in the variable oldestInFLightRobNum.
847  inFlightNodes[new_node->seqNum] = new_node->robNum;
848  oldestInFlightRobNum = inFlightNodes.begin()->second;
849 
850  // Occupy Load/Store Buffer entry for the issued node if applicable
851  if (new_node->isLoad()) {
852  ++numInFlightLoads;
853  } else if (new_node->isStore()) {
854  ++numInFlightStores;
855  } // else if it is a non load/store node, no buffer entry is occupied
856 
857  printOccupancy();
858 }
859 
860 void
862 {
863  assert(!inFlightNodes.empty());
864  DPRINTFR(TraceCPUData,
865  "\tClearing done seq. num %d from inFlightNodes..\n",
866  done_node->seqNum);
867 
868  assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
869  inFlightNodes.erase(done_node->seqNum);
870 
871  if (inFlightNodes.empty()) {
872  // If we delete the only in-flight node and then the
873  // oldestInFlightRobNum is set to it's initialized (max) value.
874  oldestInFlightRobNum = UINT64_MAX;
875  } else {
876  // Set the oldest in-flight node rob number equal to the first node in
877  // the inFlightNodes since that will have the numerically least value.
878  oldestInFlightRobNum = inFlightNodes.begin()->second;
879  }
880 
881  DPRINTFR(TraceCPUData,
882  "\tCleared. inFlightNodes.size() = %d, "
883  "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
884  oldestInFlightRobNum);
885 
886  // A store is considered complete when a request is sent, thus ROB entry is
887  // freed. But it occupies an entry in the Store Buffer until its response
888  // is received. A load is considered complete when a response is received,
889  // thus both ROB and Load Buffer entries can be released.
890  if (done_node->isLoad()) {
891  assert(numInFlightLoads != 0);
892  --numInFlightLoads;
893  }
894  // For normal writes, we send the requests out and clear a store buffer
895  // entry on response. For writes which are strictly ordered, for e.g.
896  // writes to device registers, we do that within release() which is called
897  // when node is executed and taken off from readyList.
898  if (done_node->isStore() && done_node->isStrictlyOrdered()) {
899  releaseStoreBuffer();
900  }
901 }
902 
903 void
905 {
906  assert(numInFlightStores != 0);
907  --numInFlightStores;
908 }
909 
910 bool
912  const GraphNode* new_node) const
913 {
914  uint16_t num_in_flight_nodes;
915  if (inFlightNodes.empty()) {
916  num_in_flight_nodes = 0;
917  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
918  " #in-flight nodes = 0", new_node->seqNum);
919  } else if (new_node->robNum > oldestInFlightRobNum) {
920  // This is the intuitive case where new dep-free node is younger
921  // instruction than the oldest instruction in-flight. Thus we make sure
922  // in_flight_nodes does not overflow.
923  num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
924  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
925  " #in-flight nodes = %d - %d = %d", new_node->seqNum,
926  new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
927  } else {
928  // This is the case where an instruction older than the oldest in-
929  // flight instruction becomes dep-free. Thus we must have already
930  // accounted for the entry in ROB for this new dep-free node.
931  // Immediately after this check returns true, oldestInFlightRobNum will
932  // be updated in occupy(). We simply let this node issue now.
933  num_in_flight_nodes = 0;
934  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
935  " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
936  new_node->seqNum, new_node->robNum);
937  }
938  DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
939  numInFlightLoads, sizeLoadBuffer,
940  numInFlightStores, sizeStoreBuffer);
941  // Check if resources are available to issue the specific node
942  if (num_in_flight_nodes >= sizeROB) {
943  return false;
944  }
945  if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
946  return false;
947  }
948  if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
949  return false;
950  }
951  return true;
952 }
953 
954 bool
956 {
957  // Return true if there is at least one read or write request in flight
958  return (numInFlightStores != 0 || numInFlightLoads != 0);
959 }
960 
961 void
963 {
964  DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
965  "LQ = %d/%d, SQ = %d/%d.\n",
966  oldestInFlightRobNum,
967  numInFlightLoads, sizeLoadBuffer,
968  numInFlightStores, sizeStoreBuffer);
969 }
970 
972  statistics::Group *parent, const std::string& _name) :
973  statistics::Group(parent, _name.c_str()),
974  ADD_STAT(numSendAttempted, statistics::units::Count::get(),
975  "Number of first attempts to send a request"),
976  ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
977  "Number of successful first attempts"),
978  ADD_STAT(numSendFailed, statistics::units::Count::get(),
979  "Number of failed first attempts"),
980  ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
981  "Number of successful retries"),
982  ADD_STAT(instLastTick, statistics::units::Tick::get(),
983  "Last tick simulated from the fixed inst trace")
984 {
985 
986 }
987 
988 Tick
990 {
991  DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
992  " IcacheGen: fixed issue with retry.\n");
993 
994  if (nextExecute()) {
995  DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
996  return currElement.tick;
997  } else {
998  panic("Read of first message in the trace failed.\n");
999  return MaxTick;
1000  }
1001 }
1002 
1003 bool
1005 {
1006  // If there is a retry packet, try to send it
1007  if (retryPkt) {
1008  DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1009 
1010  if (!port.sendTimingReq(retryPkt)) {
1011  // Still blocked! This should never occur.
1012  DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1013  return false;
1014  }
1016  } else {
1017  DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1018 
1019  // try sending current element
1020  assert(currElement.isValid());
1021 
1023 
1026  DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1028  // return false to indicate not to schedule next event
1029  return false;
1030  } else {
1032  }
1033  }
1034  // If packet was sent successfully, either retryPkt or currElement, return
1035  // true to indicate to schedule event at current Tick plus delta. If packet
1036  // was sent successfully and there is no next packet to send, return false.
1037  DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1038  "element.\n");
1039  retryPkt = nullptr;
1040  // Read next element into currElement, currElement gets cleared so save the
1041  // tick to calculate delta
1042  Tick last_tick = currElement.tick;
1043  if (nextExecute()) {
1044  assert(currElement.tick >= last_tick);
1045  delta = currElement.tick - last_tick;
1046  }
1047  return !traceComplete;
1048 }
1049 
1050 void
1052 {
1053  trace.reset();
1054 }
1055 
1056 bool
1058 {
1059  if (traceComplete)
1060  // We are at the end of the file, thus we have no more messages.
1061  // Return false.
1062  return false;
1063 
1064 
1065  //Reset the currElement to the default values
1066  currElement.clear();
1067 
1068  // Read the next line to get the next message. If that fails then end of
1069  // trace has been reached and traceComplete needs to be set in addition
1070  // to returning false. If successful then next message is in currElement.
1071  if (!trace.read(&currElement)) {
1072  traceComplete = true;
1074  return false;
1075  }
1076 
1077  DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1078  currElement.cmd.isRead() ? 'r' : 'w',
1079  currElement.addr,
1080  currElement.pc,
1082  currElement.tick);
1083 
1084  return true;
1085 }
1086 
1087 bool
1088 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1090 {
1091 
1092  // Create new request
1093  auto req = std::make_shared<Request>(addr, size, flags, requestorId);
1094  req->setPC(pc);
1095 
1096  // If this is not done it triggers assert in L1 cache for invalid contextId
1097  req->setContext(ContextID(0));
1098 
1099  // Embed it in a packet
1100  PacketPtr pkt = new Packet(req, cmd);
1101 
1102  uint8_t* pkt_data = new uint8_t[req->getSize()];
1103  pkt->dataDynamic(pkt_data);
1104 
1105  if (cmd.isWrite()) {
1106  memset(pkt_data, 0xA, req->getSize());
1107  }
1108 
1109  // Call RequestPort method to send a timing request for this packet
1110  bool success = port.sendTimingReq(pkt);
1111  if (!success) {
1112  // If it fails, save the packet to retry when a retry is signalled by
1113  // the cache
1114  retryPkt = pkt;
1115  }
1116  return success;
1117 }
1118 
1119 void
1121 {
1122  // Schedule an event to go through the control flow in the same tick as
1123  // retry is received
1124  DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1125  " event @%lli.\n", curTick());
1127 }
1128 
1129 void
1131 {
1132  // Schedule an event to go through the execute flow in the same tick as
1133  // retry is received
1134  DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1135  " event @%lli.\n", curTick());
1137 }
1138 
1139 void
1141 {
1142  if (!dcacheNextEvent.scheduled()) {
1143  DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1144  when);
1145  schedule(dcacheNextEvent, when);
1147  } else if (when < dcacheNextEvent.when()) {
1148  DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1149  " to %lli.\n", dcacheNextEvent.when(), when);
1150  reschedule(dcacheNextEvent, when);
1151  }
1152 
1153 }
1154 
1155 bool
1157 {
1158  // All responses on the instruction fetch side are ignored. Simply delete
1159  // the packet to free allocated memory
1160  delete pkt;
1161 
1162  return true;
1163 }
1164 
1165 void
1167 {
1169 }
1170 
1171 void
1173 {
1174  DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1176 }
1177 
1178 bool
1180 {
1181  // Handle the responses for data memory requests which is done inside the
1182  // elastic data generator
1184  // After processing the response delete the packet to free
1185  // memory
1186  delete pkt;
1187 
1188  return true;
1189 }
1190 
1191 void
1193 {
1195 }
1196 
1198  const std::string& filename, const double time_multiplier) :
1199  trace(filename),
1200  timeMultiplier(time_multiplier),
1201  microOpCount(0)
1202 {
1203  // Create a protobuf message for the header and read it from the stream
1204  ProtoMessage::InstDepRecordHeader header_msg;
1205  if (!trace.read(header_msg)) {
1206  panic("Failed to read packet header from %s\n", filename);
1207 
1208  if (header_msg.tick_freq() != sim_clock::Frequency) {
1209  panic("Trace %s was recorded with a different tick frequency %d\n",
1210  header_msg.tick_freq());
1211  }
1212  } else {
1213  // Assign window size equal to the field in the trace that was recorded
1214  // when the data dependency trace was captured in the o3cpu model
1215  windowSize = header_msg.window_size();
1216  }
1217 }
1218 
1219 void
1221 {
1222  trace.reset();
1223 }
1224 
1225 bool
1227 {
1228  ProtoMessage::InstDepRecord pkt_msg;
1229  if (trace.read(pkt_msg)) {
1230  // Required fields
1231  element->seqNum = pkt_msg.seq_num();
1232  element->type = pkt_msg.type();
1233  // Scale the compute delay to effectively scale the Trace CPU frequency
1234  element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1235 
1236  // Repeated field robDepList
1237  element->robDep.clear();
1238  for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1239  element->robDep.push_back(pkt_msg.rob_dep(i));
1240  }
1241 
1242  // Repeated field
1243  element->regDep.clear();
1244  for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1245  // There is a possibility that an instruction has both, a register
1246  // and order dependency on an instruction. In such a case, the
1247  // register dependency is omitted
1248  bool duplicate = false;
1249  for (auto &dep: element->robDep) {
1250  duplicate |= (pkt_msg.reg_dep(i) == dep);
1251  }
1252  if (!duplicate)
1253  element->regDep.push_back(pkt_msg.reg_dep(i));
1254  }
1255 
1256  // Optional fields
1257  if (pkt_msg.has_p_addr())
1258  element->physAddr = pkt_msg.p_addr();
1259  else
1260  element->physAddr = 0;
1261 
1262  if (pkt_msg.has_v_addr())
1263  element->virtAddr = pkt_msg.v_addr();
1264  else
1265  element->virtAddr = 0;
1266 
1267  if (pkt_msg.has_size())
1268  element->size = pkt_msg.size();
1269  else
1270  element->size = 0;
1271 
1272  if (pkt_msg.has_flags())
1273  element->flags = pkt_msg.flags();
1274  else
1275  element->flags = 0;
1276 
1277  if (pkt_msg.has_pc())
1278  element->pc = pkt_msg.pc();
1279  else
1280  element->pc = 0;
1281 
1282  // ROB occupancy number
1283  ++microOpCount;
1284  if (pkt_msg.has_weight()) {
1285  microOpCount += pkt_msg.weight();
1286  }
1287  element->robNum = microOpCount;
1288  return true;
1289  }
1290 
1291  // We have reached the end of the file
1292  return false;
1293 }
1294 
1295 bool
1297 {
1298  for (auto it = regDep.begin(); it != regDep.end(); it++) {
1299  if (*it == reg_dep) {
1300  // If register dependency is found, erase it.
1301  regDep.erase(it);
1302  DPRINTFR(TraceCPUData,
1303  "\tFor %lli: Marking register dependency %lli done.\n",
1304  seqNum, reg_dep);
1305  return true;
1306  }
1307  }
1308 
1309  // Return false if the dependency is not found
1310  return false;
1311 }
1312 
1313 bool
1315 {
1316  for (auto it = robDep.begin(); it != robDep.end(); it++) {
1317  if (*it == rob_dep) {
1318  // If the rob dependency is found, erase it.
1319  robDep.erase(it);
1320  DPRINTFR(TraceCPUData,
1321  "\tFor %lli: Marking ROB dependency %lli done.\n",
1322  seqNum, rob_dep);
1323  return true;
1324  }
1325  }
1326  return false;
1327 }
1328 
1329 bool
1331 {
1332  // If it is an rob dependency then remove it
1333  if (!removeRobDep(done_seq_num)) {
1334  // If it is not an rob dependency then it must be a register dependency
1335  // If the register dependency is not found, it violates an assumption
1336  // and must be caught by assert.
1337  [[maybe_unused]] bool regdep_found = removeRegDep(done_seq_num);
1338  assert(regdep_found);
1339  }
1340  // Return true if the node is dependency free
1341  return robDep.empty() && regDep.empty();
1342 }
1343 
1344 void
1346 {
1347 #if TRACING_ON
1348  DPRINTFR(TraceCPUData, "%lli", seqNum);
1349  DPRINTFR(TraceCPUData, ",%s", typeToStr());
1350  if (isLoad() || isStore()) {
1351  DPRINTFR(TraceCPUData, ",%i", physAddr);
1352  DPRINTFR(TraceCPUData, ",%i", size);
1353  DPRINTFR(TraceCPUData, ",%i", flags);
1354  }
1355  DPRINTFR(TraceCPUData, ",%lli", compDelay);
1356  DPRINTFR(TraceCPUData, "robDep:");
1357  for (auto &dep: robDep) {
1358  DPRINTFR(TraceCPUData, ",%lli", dep);
1359  }
1360  DPRINTFR(TraceCPUData, "regDep:");
1361  for (auto &dep: regDep) {
1362  DPRINTFR(TraceCPUData, ",%lli", dep);
1363  }
1364  auto child_itr = dependents.begin();
1365  DPRINTFR(TraceCPUData, "dependents:");
1366  while (child_itr != dependents.end()) {
1367  DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1368  child_itr++;
1369  }
1370 
1371  DPRINTFR(TraceCPUData, "\n");
1372 #endif // TRACING_ON
1373 }
1374 
1375 std::string
1377 {
1378  return Record::RecordType_Name(type);
1379 }
1380 
1382  : trace(filename)
1383 {
1384  // Create a protobuf message for the header and read it from the stream
1385  ProtoMessage::PacketHeader header_msg;
1386  if (!trace.read(header_msg)) {
1387  panic("Failed to read packet header from %s\n", filename);
1388 
1389  if (header_msg.tick_freq() != sim_clock::Frequency) {
1390  panic("Trace %s was recorded with a different tick frequency %d\n",
1391  header_msg.tick_freq());
1392  }
1393  }
1394 }
1395 
1396 void
1398 {
1399  trace.reset();
1400 }
1401 
1402 bool
1404 {
1405  ProtoMessage::Packet pkt_msg;
1406  if (trace.read(pkt_msg)) {
1407  element->cmd = pkt_msg.cmd();
1408  element->addr = pkt_msg.addr();
1409  element->blocksize = pkt_msg.size();
1410  element->tick = pkt_msg.tick();
1411  element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1412  element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1413  return true;
1414  }
1415 
1416  // We have reached the end of the file
1417  return false;
1418 }
1419 
1420 } // namespace gem5
#define DPRINTFR(x,...)
Definition: trace.hh:200
#define DPRINTF(x,...)
Definition: trace.hh:186
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition: protoio.cc:182
virtual Port & getDataPort()=0
Purely virtual method that returns a reference to the data port.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: base.cc:273
virtual Port & getInstPort()=0
Purely virtual method that returns a reference to the instruction port.
gem5::BaseCPU::BaseCPUStats baseStats
unsigned int cacheLineSize() const
Get the cache line size of the system.
Definition: base.hh:380
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick clockPeriod() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
bool isRead() const
Definition: packet.hh:226
Command cmd
Definition: packet.hh:216
bool isWrite() const
Definition: packet.hh:227
const std::string _name
Definition: named.hh:41
virtual std::string name() const
Definition: named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:1041
bool isWrite() const
Definition: packet.hh:593
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:1035
RequestPtr req
A pointer to the original request.
Definition: packet.hh:376
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1200
void takeOverFrom(Port *old)
A utility function to make it easier to swap out ports.
Definition: port.hh:137
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
uint64_t FlagsType
Definition: request.hh:100
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
Definition: trace_cpu.cc:1179
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt.
Definition: trace_cpu.cc:1192
The struct GraphNode stores an instruction in the trace file.
Definition: trace_cpu.hh:558
bool isLoad() const
Is the node a load.
Definition: trace_cpu.hh:613
RecordType type
Type of the node corresponding to the instruction modeled by it.
Definition: trace_cpu.hh:576
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition: trace_cpu.hh:632
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
Definition: trace_cpu.cc:1330
std::vector< GraphNode * > dependents
A vector of nodes dependent (outgoing) on this node.
Definition: trace_cpu.hh:610
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Definition: trace_cpu.cc:1314
Request::Flags flags
Request flags if any.
Definition: trace_cpu.hh:588
RegDepList regDep
List of register dependencies (incoming) if any.
Definition: trace_cpu.hh:603
uint32_t size
Size of request if any.
Definition: trace_cpu.hh:585
NodeRobNum robNum
ROB occupancy number.
Definition: trace_cpu.hh:570
std::string typeToStr() const
Return string specifying the type of the node.
Definition: trace_cpu.cc:1376
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
Definition: trace_cpu.cc:1345
RobDepList robDep
List of order dependencies.
Definition: trace_cpu.hh:594
Addr physAddr
The address for the request if any.
Definition: trace_cpu.hh:579
Addr virtAddr
The virtual address for the request if any.
Definition: trace_cpu.hh:582
bool isStore() const
Is the node a store.
Definition: trace_cpu.hh:616
uint64_t compDelay
Computational delay.
Definition: trace_cpu.hh:597
NodeSeqNum seqNum
Instruction sequence number.
Definition: trace_cpu.hh:567
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
Definition: trace_cpu.cc:1296
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition: trace_cpu.cc:842
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition: trace_cpu.cc:861
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition: trace_cpu.cc:955
void printOccupancy()
Print resource occupancy for debugging.
Definition: trace_cpu.cc:962
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition: trace_cpu.cc:831
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition: trace_cpu.cc:904
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition: trace_cpu.cc:911
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition: trace_cpu.hh:765
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1197
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1226
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition: trace_cpu.hh:782
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1220
const uint32_t windowSize
Window size within which to check for dependencies.
Definition: trace_cpu.hh:985
bool readNextWindow()
Reads a line of the trace file.
Definition: trace_cpu.cc:297
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition: trace_cpu.cc:571
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition: trace_cpu.cc:814
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition: trace_cpu.hh:931
TraceCPU & owner
Reference of the TraceCPU.
Definition: trace_cpu.hh:950
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition: trace_cpu.hh:956
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:252
uint64_t NodeSeqNum
Node sequence number type.
Definition: trace_cpu.hh:543
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition: trace_cpu.cc:283
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node's source dependencies are complete.
Definition: trace_cpu.cc:647
std::list< ReadyNode > readyList
List of nodes that are ready to execute.
Definition: trace_cpu.hh:1006
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition: trace_cpu.hh:965
bool traceComplete
Set to true when end of trace is reached.
Definition: trace_cpu.hh:968
void exit()
Exit the ElasticDataGen.
Definition: trace_cpu.cc:291
bool nextRead
Set to true when the next window of instructions need to be read.
Definition: trace_cpu.hh:971
std::queue< const GraphNode * > depFreeQueue
Queue of dependency-free nodes that are pending issue because resources are not available.
Definition: trace_cpu.hh:1003
gem5::TraceCPU::ElasticDataGen::ElasticDataGenStatGroup elasticStats
std::unordered_map< NodeSeqNum, GraphNode * > depGraph
Store the depGraph of GraphNodes.
Definition: trace_cpu.hh:994
void addDepsOnParent(GraphNode *new_node, T &dep_list)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition: trace_cpu.cc:350
bool execComplete
Set true when execution of trace is complete.
Definition: trace_cpu.hh:974
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition: trace_cpu.hh:953
HardwareResource hwResource
Hardware resources required to contain in-flight nodes and to throttle issuing of new nodes when reso...
Definition: trace_cpu.hh:991
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition: trace_cpu.cc:754
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:959
void execute()
This is the main execute function which consumes nodes from the sorted readyList.
Definition: trace_cpu.cc:375
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it.
Definition: trace_cpu.cc:687
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1381
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1403
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1397
TraceCPU & owner
Reference of the TraceCPU.
Definition: trace_cpu.hh:480
int64_t delta
Stores the difference in the send ticks of the current and last packets.
Definition: trace_cpu.hh:502
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:989
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition: trace_cpu.hh:486
gem5::TraceCPU::FixedRetryGen::FixedRetryGenStatGroup fixedStats
TraceElement currElement
Store an element read from the trace to send as the next packet.
Definition: trace_cpu.hh:510
bool nextExecute()
Reads a line of the trace file.
Definition: trace_cpu.cc:1057
bool traceComplete
Set to true when end of trace is reached.
Definition: trace_cpu.hh:507
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition: trace_cpu.hh:495
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached.
Definition: trace_cpu.hh:474
void exit()
Exit the FixedRetryGen.
Definition: trace_cpu.cc:1051
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition: trace_cpu.hh:483
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:489
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
Definition: trace_cpu.cc:1088
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
Definition: trace_cpu.cc:1004
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
Definition: trace_cpu.cc:1166
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
Definition: trace_cpu.cc:1156
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model.
Definition: trace_cpu.hh:143
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed.
Definition: trace_cpu.hh:1099
Port & getInstPort()
Used to get a reference to the icache port.
Definition: trace_cpu.hh:1123
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
Definition: trace_cpu.hh:1093
Port & getDataPort()
Used to get a reference to the dcache port.
Definition: trace_cpu.hh:1126
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
Definition: trace_cpu.hh:1064
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Definition: trace_cpu.cc:1130
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests.
Definition: trace_cpu.hh:1072
gem5::TraceCPU::TraceStats traceStats
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
Definition: trace_cpu.hh:1034
std::string instTraceFile
File names for input instruction and data traces.
Definition: trace_cpu.hh:319
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
Definition: trace_cpu.hh:1031
TraceCPU(const TraceCPUParams &params)
Definition: trace_cpu.cc:49
void updateNumOps(uint64_t rob_num)
Definition: trace_cpu.cc:86
std::string dataTraceFile
Definition: trace_cpu.hh:319
uint64_t progressMsgThreshold
Definition: trace_cpu.hh:1106
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
Definition: trace_cpu.cc:1172
void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: trace_cpu.cc:105
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
Definition: trace_cpu.cc:1120
EventFunctionWrapper dcacheNextEvent
Event for the control flow method schedDcacheNext()
Definition: trace_cpu.hh:1055
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
Definition: trace_cpu.hh:1080
void takeOverFrom(BaseCPU *oldCPU)
Load the state of a CPU from the previous CPU object, invoked on all new CPUs that are about to be sw...
Definition: trace_cpu.cc:97
EventFunctionWrapper icacheNextEvent
Event for the control flow method schedIcacheNext()
Definition: trace_cpu.hh:1052
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
Definition: trace_cpu.cc:1140
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
Definition: trace_cpu.hh:1087
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition: trace_cpu.cc:188
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace.
Definition: trace_cpu.cc:174
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace.
Definition: trace_cpu.cc:148
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Definition: statistics.hh:346
Statistics container.
Definition: group.hh:94
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:622
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
void reschedule(Event &event, Tick when, bool always=false)
Definition: eventq.hh:1037
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
uint8_t flags
Definition: helpers.cc:66
#define inform(...)
Definition: logging.hh:247
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 4 > pc
Bitfield< 15 > system
Definition: misc.hh:1004
Bitfield< 3 > addr
Definition: types.hh:84
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:48
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:88
const Tick MaxTick
Definition: types.hh:60
int ContextID
Globally unique thread context ID.
Definition: types.hh:239
statistics::Scalar numCycles
Definition: base.hh:620
statistics::Scalar maxDependents
Stats for data memory accesses replayed.
Definition: trace_cpu.hh:1016
statistics::Scalar dataLastTick
Tick when ElasticDataGen completes execution.
Definition: trace_cpu.hh:1026
ElasticDataGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition: trace_cpu.cc:225
Struct to store a ready-to-execute node and its execution tick.
Definition: trace_cpu.hh:648
Tick execTick
The tick at which the ready node must be executed.
Definition: trace_cpu.hh:653
NodeSeqNum seqNum
The sequence number of the ready node.
Definition: trace_cpu.hh:650
statistics::Scalar numSendAttempted
Stats for instruction accesses replayed.
Definition: trace_cpu.hh:518
FixedRetryGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition: trace_cpu.cc:971
statistics::Scalar instLastTick
Last simulated tick by the FixedRetryGen.
Definition: trace_cpu.hh:523
This struct stores a line in the trace file.
Definition: trace_cpu.hh:336
void clear()
Make this element invalid.
Definition: trace_cpu.hh:366
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition: trace_cpu.hh:339
Addr addr
The address for the request.
Definition: trace_cpu.hh:342
Addr blocksize
The size of the access for the request.
Definition: trace_cpu.hh:345
Request::FlagsType flags
Potential request flags to use.
Definition: trace_cpu.hh:351
Tick tick
The time at which the request should be sent.
Definition: trace_cpu.hh:348
bool isValid() const
Check validity of this element.
Definition: trace_cpu.hh:361
statistics::Scalar numSchedDcacheEvent
Definition: trace_cpu.hh:1110
statistics::Scalar numSchedIcacheEvent
Definition: trace_cpu.hh:1111
statistics::Formula cpi
Stat for the CPI.
Definition: trace_cpu.hh:1117
TraceStats(TraceCPU *trace)
Definition: trace_cpu.cc:207
statistics::Scalar numOps
Stat for number of simulated micro-ops.
Definition: trace_cpu.hh:1114
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1