gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
trace_cpu.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 - 2016 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are
16  * met: redistributions of source code must retain the above copyright
17  * notice, this list of conditions and the following disclaimer;
18  * redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution;
21  * neither the name of the copyright holders nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Authors: Radhika Jagtap
38  * Andreas Hansson
39  * Thomas Grass
40  */
41 
42 #include "cpu/trace/trace_cpu.hh"
43 
44 #include "sim/sim_exit.hh"
45 
46 // Declare and initialize the static counter for number of trace CPUs.
48 
49 TraceCPU::TraceCPU(TraceCPUParams *params)
50  : BaseCPU(params),
51  icachePort(this),
52  dcachePort(this),
53  instMasterID(params->system->getMasterId(this, "inst")),
54  dataMasterID(params->system->getMasterId(this, "data")),
55  instTraceFile(params->instTraceFile),
56  dataTraceFile(params->dataTraceFile),
57  icacheGen(*this, ".iside", icachePort, instMasterID, instTraceFile),
58  dcacheGen(*this, ".dside", dcachePort, dataMasterID, dataTraceFile,
59  params),
60  icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61  dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62  oneTraceComplete(false),
63  traceOffset(0),
64  execCompleteEvent(nullptr),
65  enableEarlyExit(params->enableEarlyExit),
66  progressMsgInterval(params->progressMsgInterval),
67  progressMsgThreshold(params->progressMsgInterval)
68 {
69  // Increment static counter for number of Trace CPUs.
71 
72  // Check that the python parameters for sizes of ROB, store buffer and
73  // load buffer do not overflow the corresponding C++ variables.
74  fatal_if(params->sizeROB > UINT16_MAX, "ROB size set to %d exceeds the "
75  "max. value of %d.\n", params->sizeROB, UINT16_MAX);
76  fatal_if(params->sizeStoreBuffer > UINT16_MAX, "ROB size set to %d "
77  "exceeds the max. value of %d.\n", params->sizeROB,
78  UINT16_MAX);
79  fatal_if(params->sizeLoadBuffer > UINT16_MAX, "Load buffer size set to"
80  " %d exceeds the max. value of %d.\n",
81  params->sizeLoadBuffer, UINT16_MAX);
82 }
83 
85 {
86 
87 }
88 
89 TraceCPU*
90 TraceCPUParams::create()
91 {
92  return new TraceCPU(this);
93 }
94 
95 void
96 TraceCPU::updateNumOps(uint64_t rob_num)
97 {
98  numOps = rob_num;
100  inform("%s: %i insts committed\n", name(), progressMsgThreshold);
102  }
103 }
104 
105 void
107 {
108  // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
109  getInstPort().takeOverFrom(&oldCPU->getInstPort());
110  getDataPort().takeOverFrom(&oldCPU->getDataPort());
111 }
112 
113 void
115 {
116  DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\"."
117  "\n", instTraceFile);
118  DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
119  dataTraceFile);
120 
121  BaseCPU::init();
122 
123  // Get the send tick of the first instruction read request
124  Tick first_icache_tick = icacheGen.init();
125 
126  // Get the send tick of the first data read/write request
127  Tick first_dcache_tick = dcacheGen.init();
128 
129  // Set the trace offset as the minimum of that in both traces
130  traceOffset = std::min(first_icache_tick, first_dcache_tick);
131  inform("%s: Time offset (tick) found as min of both traces is %lli.\n",
132  name(), traceOffset);
133 
134  // Schedule next icache and dcache event by subtracting the offset
135  schedule(icacheNextEvent, first_icache_tick - traceOffset);
136  schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
137 
138  // Adjust the trace offset for the dcache generator's ready nodes
139  // We don't need to do this for the icache generator as it will
140  // send its first request at the first event and schedule subsequent
141  // events using a relative tick delta
143 
144  // If the Trace CPU simulation is configured to exit on any one trace
145  // completion then we don't need a counted event to count down all Trace
146  // CPUs in the system. If not then instantiate a counted event.
147  if (!enableEarlyExit) {
148  // The static counter for number of Trace CPUs is correctly set at
149  // this point so create an event and pass it.
150  execCompleteEvent = new CountedExitEvent("end of all traces reached.",
151  numTraceCPUs);
152  }
153 
154 }
155 
156 void
158 {
159  DPRINTF(TraceCPUInst, "IcacheGen event.\n");
160 
161  // Try to send the current packet or a retry packet if there is one
162  bool sched_next = icacheGen.tryNext();
163  // If packet sent successfully, schedule next event
164  if (sched_next) {
165  DPRINTF(TraceCPUInst, "Scheduling next icacheGen event "
166  "at %d.\n", curTick() + icacheGen.tickDelta());
169  } else {
170  // check if traceComplete. If not, do nothing because sending failed
171  // and next event will be scheduled via RecvRetry()
172  if (icacheGen.isTraceComplete()) {
173  // If this is the first trace to complete, set the variable. If it
174  // is already set then both traces are complete to exit sim.
176  }
177  }
178  return;
179 }
180 
181 void
183 {
184  DPRINTF(TraceCPUData, "DcacheGen event.\n");
185 
186  // Update stat for numCycles
188 
189  dcacheGen.execute();
190  if (dcacheGen.isExecComplete()) {
192  }
193 }
194 
195 void
197 {
198  if (!oneTraceComplete) {
199  oneTraceComplete = true;
200  } else {
201  // Schedule event to indicate execution is complete as both
202  // instruction and data access traces have been played back.
203  inform("%s: Execution complete.\n", name());
204  // If the replay is configured to exit early, that is when any one
205  // execution is complete then exit immediately and return. Otherwise,
206  // schedule the counted exit that counts down completion of each Trace
207  // CPU.
208  if (enableEarlyExit) {
209  exitSimLoop("End of trace reached");
210  } else {
212  }
213  }
214 }
215 
216 void
218 {
219 
221 
223  .name(name() + ".numSchedDcacheEvent")
224  .desc("Number of events scheduled to trigger data request generator")
225  ;
226 
228  .name(name() + ".numSchedIcacheEvent")
229  .desc("Number of events scheduled to trigger instruction request generator")
230  ;
231 
232  numOps
233  .name(name() + ".numOps")
234  .desc("Number of micro-ops simulated by the Trace CPU")
235  ;
236 
237  cpi
238  .name(name() + ".cpi")
239  .desc("Cycles per micro-op used as a proxy for CPI")
240  .precision(6)
241  ;
242  cpi = numCycles/numOps;
243 
246 }
247 
248 void
250 {
251  using namespace Stats;
252 
253  maxDependents
254  .name(name() + ".maxDependents")
255  .desc("Max number of dependents observed on a node")
256  ;
257 
258  maxReadyListSize
259  .name(name() + ".maxReadyListSize")
260  .desc("Max size of the ready list observed")
261  ;
262 
263  numSendAttempted
264  .name(name() + ".numSendAttempted")
265  .desc("Number of first attempts to send a request")
266  ;
267 
268  numSendSucceeded
269  .name(name() + ".numSendSucceeded")
270  .desc("Number of successful first attempts")
271  ;
272 
273  numSendFailed
274  .name(name() + ".numSendFailed")
275  .desc("Number of failed first attempts")
276  ;
277 
278  numRetrySucceeded
279  .name(name() + ".numRetrySucceeded")
280  .desc("Number of successful retries")
281  ;
282 
283  numSplitReqs
284  .name(name() + ".numSplitReqs")
285  .desc("Number of split requests")
286  ;
287 
288  numSOLoads
289  .name(name() + ".numSOLoads")
290  .desc("Number of strictly ordered loads")
291  ;
292 
293  numSOStores
294  .name(name() + ".numSOStores")
295  .desc("Number of strictly ordered stores")
296  ;
297 
298  dataLastTick
299  .name(name() + ".dataLastTick")
300  .desc("Last tick simulated from the elastic data trace")
301  ;
302 }
303 
304 Tick
306 {
307  DPRINTF(TraceCPUData, "Initializing data memory request generator "
308  "DcacheGen: elastic issue with retry.\n");
309 
310  if (!readNextWindow())
311  panic("Trace has %d elements. It must have at least %d elements.\n",
312  depGraph.size(), 2 * windowSize);
313  DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
314  depGraph.size());
315 
316  if (!readNextWindow())
317  panic("Trace has %d elements. It must have at least %d elements.\n",
318  depGraph.size(), 2 * windowSize);
319  DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
320  depGraph.size());
321 
322  // Print readyList
323  if (DTRACE(TraceCPUData)) {
324  printReadyList();
325  }
326  auto free_itr = readyList.begin();
327  DPRINTF(TraceCPUData, "Execute tick of the first dependency free node %lli"
328  " is %d.\n", free_itr->seqNum, free_itr->execTick);
329  // Return the execute tick of the earliest ready node so that an event
330  // can be scheduled to call execute()
331  return (free_itr->execTick);
332 }
333 
334 void
336  for (auto& free_node : readyList) {
337  free_node.execTick -= offset;
338  }
339 }
340 
341 void
343 {
344  trace.reset();
345 }
346 
347 bool
349 {
350 
351  // Read and add next window
352  DPRINTF(TraceCPUData, "Reading next window from file.\n");
353 
354  if (traceComplete) {
355  // We are at the end of the file, thus we have no more records.
356  // Return false.
357  return false;
358  }
359 
360  DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
361  depGraph.size());
362 
363  uint32_t num_read = 0;
364  while (num_read != windowSize) {
365 
366  // Create a new graph node
367  GraphNode* new_node = new GraphNode;
368 
369  // Read the next line to get the next record. If that fails then end of
370  // trace has been reached and traceComplete needs to be set in addition
371  // to returning false.
372  if (!trace.read(new_node)) {
373  DPRINTF(TraceCPUData, "\tTrace complete!\n");
374  traceComplete = true;
375  return false;
376  }
377 
378  // Annotate the ROB dependencies of the new node onto the parent nodes.
379  addDepsOnParent(new_node, new_node->robDep, new_node->numRobDep);
380  // Annotate the register dependencies of the new node onto the parent
381  // nodes.
382  addDepsOnParent(new_node, new_node->regDep, new_node->numRegDep);
383 
384  num_read++;
385  // Add to map
386  depGraph[new_node->seqNum] = new_node;
387  if (new_node->numRobDep == 0 && new_node->numRegDep == 0) {
388  // Source dependencies are already complete, check if resources
389  // are available and issue. The execution time is approximated
390  // to current time plus the computational delay.
391  checkAndIssue(new_node);
392  }
393  }
394 
395  DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
396  depGraph.size());
397  return true;
398 }
399 
400 template<typename T> void
402  T& dep_array, uint8_t& num_dep)
403 {
404  for (auto& a_dep : dep_array) {
405  // The convention is to set the dependencies starting with the first
406  // index in the ROB and register dependency arrays. Thus, when we reach
407  // a dependency equal to the initialisation value of zero, we know have
408  // iterated over all dependencies and can break.
409  if (a_dep == 0)
410  break;
411  // We look up the valid dependency, i.e. the parent of this node
412  auto parent_itr = depGraph.find(a_dep);
413  if (parent_itr != depGraph.end()) {
414  // If the parent is found, it is yet to be executed. Append a
415  // pointer to the new node to the dependents list of the parent
416  // node.
417  parent_itr->second->dependents.push_back(new_node);
418  auto num_depts = parent_itr->second->dependents.size();
419  maxDependents = std::max<double>(num_depts, maxDependents.value());
420  } else {
421  // The dependency is not found in the graph. So consider
422  // the execution of the parent is complete, i.e. remove this
423  // dependency.
424  a_dep = 0;
425  num_dep--;
426  }
427  }
428 }
429 
430 void
432 {
433  DPRINTF(TraceCPUData, "Execute start occupancy:\n");
434  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
435  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
436  depFreeQueue.size());
437  hwResource.printOccupancy();
438 
439  // Read next window to make sure that dependents of all dep-free nodes
440  // are in the depGraph
441  if (nextRead) {
442  readNextWindow();
443  nextRead = false;
444  }
445 
446  // First attempt to issue the pending dependency-free nodes held
447  // in depFreeQueue. If resources have become available for a node,
448  // then issue it, i.e. add the node to readyList.
449  while (!depFreeQueue.empty()) {
450  if (checkAndIssue(depFreeQueue.front(), false)) {
451  DPRINTF(TraceCPUData, "Removing from depFreeQueue: seq. num "
452  "%lli.\n", (depFreeQueue.front())->seqNum);
453  depFreeQueue.pop();
454  } else {
455  break;
456  }
457  }
458  // Proceed to execute from readyList
459  auto graph_itr = depGraph.begin();
460  auto free_itr = readyList.begin();
461  // Iterate through readyList until the next free node has its execute
462  // tick later than curTick or the end of readyList is reached
463  while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
464 
465  // Get pointer to the node to be executed
466  graph_itr = depGraph.find(free_itr->seqNum);
467  assert(graph_itr != depGraph.end());
468  GraphNode* node_ptr = graph_itr->second;
469 
470  // If there is a retryPkt send that else execute the load
471  if (retryPkt) {
472  // The retryPkt must be the request that was created by the
473  // first node in the readyList.
474  if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
475  panic("Retry packet's seqence number does not match "
476  "the first node in the readyList.\n");
477  }
478  if (port.sendTimingReq(retryPkt)) {
479  ++numRetrySucceeded;
480  retryPkt = nullptr;
481  }
482  } else if (node_ptr->isLoad() || node_ptr->isStore()) {
483  // If there is no retryPkt, attempt to send a memory request in
484  // case of a load or store node. If the send fails, executeMemReq()
485  // returns a packet pointer, which we save in retryPkt. In case of
486  // a comp node we don't do anything and simply continue as if the
487  // execution of the comp node succedded.
488  retryPkt = executeMemReq(node_ptr);
489  }
490  // If the retryPkt or a new load/store node failed, we exit from here
491  // as a retry from cache will bring the control to execute(). The
492  // first node in readyList then, will be the failed node.
493  if (retryPkt) {
494  break;
495  }
496 
497  // Proceed to remove dependencies for the successfully executed node.
498  // If it is a load which is not strictly ordered and we sent a
499  // request for it successfully, we do not yet mark any register
500  // dependencies complete. But as per dependency modelling we need
501  // to mark ROB dependencies of load and non load/store nodes which
502  // are based on successful sending of the load as complete.
503  if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
504  // If execute succeeded mark its dependents as complete
505  DPRINTF(TraceCPUData, "Node seq. num %lli sent. Waking up "
506  "dependents..\n", node_ptr->seqNum);
507 
508  auto child_itr = (node_ptr->dependents).begin();
509  while (child_itr != (node_ptr->dependents).end()) {
510  // ROB dependency of a store on a load must not be removed
511  // after load is sent but after response is received
512  if (!(*child_itr)->isStore() &&
513  (*child_itr)->removeRobDep(node_ptr->seqNum)) {
514 
515  // Check if the child node has become dependency free
516  if ((*child_itr)->numRobDep == 0 &&
517  (*child_itr)->numRegDep == 0) {
518 
519  // Source dependencies are complete, check if
520  // resources are available and issue
521  checkAndIssue(*child_itr);
522  }
523  // Remove this child for the sent load and point to new
524  // location of the element following the erased element
525  child_itr = node_ptr->dependents.erase(child_itr);
526  } else {
527  // This child is not dependency-free, point to the next
528  // child
529  child_itr++;
530  }
531  }
532  } else {
533  // If it is a strictly ordered load mark its dependents as complete
534  // as we do not send a request for this case. If it is a store or a
535  // comp node we also mark all its dependents complete.
536  DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
537  " up dependents..\n", node_ptr->seqNum);
538 
539  for (auto child : node_ptr->dependents) {
540  // If the child node is dependency free removeDepOnInst()
541  // returns true.
542  if (child->removeDepOnInst(node_ptr->seqNum)) {
543  // Source dependencies are complete, check if resources
544  // are available and issue
545  checkAndIssue(child);
546  }
547  }
548  }
549 
550  // After executing the node, remove from readyList and delete node.
551  readyList.erase(free_itr);
552  // If it is a cacheable load which was sent, don't delete
553  // just yet. Delete it in completeMemAccess() after the
554  // response is received. If it is an strictly ordered
555  // load, it was not sent and all dependencies were simply
556  // marked complete. Thus it is safe to delete it. For
557  // stores and non load/store nodes all dependencies were
558  // marked complete so it is safe to delete it.
559  if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
560  // Release all resources occupied by the completed node
561  hwResource.release(node_ptr);
562  // clear the dynamically allocated set of dependents
563  (node_ptr->dependents).clear();
564  // Update the stat for numOps simulated
565  owner.updateNumOps(node_ptr->robNum);
566  // delete node
567  delete node_ptr;
568  // remove from graph
569  depGraph.erase(graph_itr);
570  }
571  // Point to first node to continue to next iteration of while loop
572  free_itr = readyList.begin();
573  } // end of while loop
574 
575  // Print readyList, sizes of queues and resource status after updating
576  if (DTRACE(TraceCPUData)) {
577  printReadyList();
578  DPRINTF(TraceCPUData, "Execute end occupancy:\n");
579  DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
580  "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
581  depFreeQueue.size());
582  hwResource.printOccupancy();
583  }
584 
585  if (retryPkt) {
586  DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
587  "event from the cache for seq. num %lli.\n",
588  retryPkt->req->getReqInstSeqNum());
589  return;
590  }
591  // If the size of the dependency graph is less than the dependency window
592  // then read from the trace file to populate the graph next time we are in
593  // execute.
594  if (depGraph.size() < windowSize && !traceComplete)
595  nextRead = true;
596 
597  // If cache is not blocked, schedule an event for the first execTick in
598  // readyList else retry from cache will schedule the event. If the ready
599  // list is empty then check if the next pending node has resources
600  // available to issue. If yes, then schedule an event for the next cycle.
601  if (!readyList.empty()) {
602  Tick next_event_tick = std::max(readyList.begin()->execTick,
603  curTick());
604  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
605  next_event_tick);
606  owner.schedDcacheNextEvent(next_event_tick);
607  } else if (readyList.empty() && !depFreeQueue.empty() &&
608  hwResource.isAvailable(depFreeQueue.front())) {
609  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
610  owner.clockEdge(Cycles(1)));
611  owner.schedDcacheNextEvent(owner.clockEdge(Cycles(1)));
612  }
613 
614  // If trace is completely read, readyList is empty and depGraph is empty,
615  // set execComplete to true
616  if (depGraph.empty() && readyList.empty() && traceComplete &&
617  !hwResource.awaitingResponse()) {
618  DPRINTF(TraceCPUData, "\tExecution Complete!\n");
619  execComplete = true;
620  dataLastTick = curTick();
621  }
622 }
623 
624 PacketPtr
626 {
627 
628  DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
629  "virt addr %d, pc %#x, size %d, flags %d).\n",
630  node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
631  node_ptr->pc, node_ptr->size, node_ptr->flags);
632 
633  // If the request is strictly ordered, do not send it. Just return nullptr
634  // as if it was succesfully sent.
635  if (node_ptr->isStrictlyOrdered()) {
636  node_ptr->isLoad() ? ++numSOLoads : ++numSOStores;
637  DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
638  node_ptr->seqNum);
639  return nullptr;
640  }
641 
642  // Check if the request spans two cache lines as this condition triggers
643  // an assert fail in the L1 cache. If it does then truncate the size to
644  // access only until the end of that line and ignore the remainder. The
645  // stat counting this is useful to keep a check on how frequently this
646  // happens. If required the code could be revised to mimick splitting such
647  // a request into two.
648  unsigned blk_size = owner.cacheLineSize();
649  Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
650  if (!(blk_offset + node_ptr->size <= blk_size)) {
651  node_ptr->size = blk_size - blk_offset;
652  ++numSplitReqs;
653  }
654 
655  // Create a request and the packet containing request
656  auto req = std::make_shared<Request>(
657  node_ptr->physAddr, node_ptr->size,
658  node_ptr->flags, masterID, node_ptr->seqNum,
659  ContextID(0));
660 
661  req->setPC(node_ptr->pc);
662  // If virtual address is valid, set the asid and virtual address fields
663  // of the request.
664  if (node_ptr->virtAddr != 0) {
665  req->setVirt(node_ptr->asid, node_ptr->virtAddr, node_ptr->size,
666  node_ptr->flags, masterID, node_ptr->pc);
667  req->setPaddr(node_ptr->physAddr);
668  req->setReqInstSeqNum(node_ptr->seqNum);
669  }
670 
671  PacketPtr pkt;
672  uint8_t* pkt_data = new uint8_t[req->getSize()];
673  if (node_ptr->isLoad()) {
674  pkt = Packet::createRead(req);
675  } else {
676  pkt = Packet::createWrite(req);
677  memset(pkt_data, 0xA, req->getSize());
678  }
679  pkt->dataDynamic(pkt_data);
680 
681  // Call MasterPort method to send a timing request for this packet
682  bool success = port.sendTimingReq(pkt);
683  ++numSendAttempted;
684 
685  if (!success) {
686  // If it fails, return the packet to retry when a retry is signalled by
687  // the cache
688  ++numSendFailed;
689  DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
690  return pkt;
691  } else {
692  // It is succeeds, return nullptr
693  ++numSendSucceeded;
694  return nullptr;
695  }
696 }
697 
698 bool
700 {
701  // Assert the node is dependency-free
702  assert(node_ptr->numRobDep == 0 && node_ptr->numRegDep == 0);
703 
704  // If this is the first attempt, print a debug message to indicate this.
705  if (first) {
706  DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
707  " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
708  node_ptr->robNum);
709  }
710 
711  // Check if resources are available to issue the specific node
712  if (hwResource.isAvailable(node_ptr)) {
713  // If resources are free only then add to readyList
714  DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. Adding"
715  " to readyList, occupying resources.\n", node_ptr->seqNum);
716  // Compute the execute tick by adding the compute delay for the node
717  // and add the ready node to the ready list
718  addToSortedReadyList(node_ptr->seqNum,
719  owner.clockEdge() + node_ptr->compDelay);
720  // Account for the resources taken up by this issued node.
721  hwResource.occupy(node_ptr);
722  return true;
723 
724  } else {
725  if (first) {
726  // Although dependencies are complete, resources are not available.
727  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli."
728  " Adding to depFreeQueue.\n", node_ptr->seqNum);
729  depFreeQueue.push(node_ptr);
730  } else {
731  DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num %lli. "
732  "Still pending issue.\n", node_ptr->seqNum);
733  }
734  return false;
735  }
736 }
737 
738 void
740 {
741  // Release the resources for this completed node.
742  if (pkt->isWrite()) {
743  // Consider store complete.
744  hwResource.releaseStoreBuffer();
745  // If it is a store response then do nothing since we do not model
746  // dependencies on store completion in the trace. But if we were
747  // blocking execution due to store buffer fullness, we need to schedule
748  // an event and attempt to progress.
749  } else {
750  // If it is a load response then release the dependents waiting on it.
751  // Get pointer to the completed load
752  auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
753  assert(graph_itr != depGraph.end());
754  GraphNode* node_ptr = graph_itr->second;
755 
756  // Release resources occupied by the load
757  hwResource.release(node_ptr);
758 
759  DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
760  " dependents..\n", node_ptr->seqNum);
761 
762  for (auto child : node_ptr->dependents) {
763  if (child->removeDepOnInst(node_ptr->seqNum)) {
764  checkAndIssue(child);
765  }
766  }
767 
768  // clear the dynamically allocated set of dependents
769  (node_ptr->dependents).clear();
770  // Update the stat for numOps completed
771  owner.updateNumOps(node_ptr->robNum);
772  // delete node
773  delete node_ptr;
774  // remove from graph
775  depGraph.erase(graph_itr);
776  }
777 
778  if (DTRACE(TraceCPUData)) {
779  printReadyList();
780  }
781 
782  // If the size of the dependency graph is less than the dependency window
783  // then read from the trace file to populate the graph next time we are in
784  // execute.
785  if (depGraph.size() < windowSize && !traceComplete)
786  nextRead = true;
787 
788  // If not waiting for retry, attempt to schedule next event
789  if (!retryPkt) {
790  // We might have new dep-free nodes in the list which will have execute
791  // tick greater than or equal to curTick. But a new dep-free node might
792  // have its execute tick earlier. Therefore, attempt to reschedule. It
793  // could happen that the readyList is empty and we got here via a
794  // last remaining response. So, either the trace is complete or there
795  // are pending nodes in the depFreeQueue. The checking is done in the
796  // execute() control flow, so schedule an event to go via that flow.
797  Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
798  std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
799  DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
800  next_event_tick);
801  owner.schedDcacheNextEvent(next_event_tick);
802  }
803 }
804 
805 void
807  Tick exec_tick)
808 {
809  ReadyNode ready_node;
810  ready_node.seqNum = seq_num;
811  ready_node.execTick = exec_tick;
812 
813  // Iterator to readyList
814  auto itr = readyList.begin();
815 
816  // If the readyList is empty, simply insert the new node at the beginning
817  // and return
818  if (itr == readyList.end()) {
819  readyList.insert(itr, ready_node);
820  maxReadyListSize = std::max<double>(readyList.size(),
821  maxReadyListSize.value());
822  return;
823  }
824 
825  // If the new node has its execution tick equal to the first node in the
826  // list then go to the next node. If the first node in the list failed
827  // to execute, its position as the first is thus maintained.
828  if (retryPkt)
829  if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
830  itr++;
831 
832  // Increment the iterator and compare the node pointed to by it to the new
833  // node till the position to insert the new node is found.
834  bool found = false;
835  while (!found && itr != readyList.end()) {
836  // If the execution tick of the new node is less than the node then
837  // this is the position to insert
838  if (exec_tick < itr->execTick)
839  found = true;
840  // If the execution tick of the new node is equal to the node then
841  // sort in ascending order of sequence numbers
842  else if (exec_tick == itr->execTick) {
843  // If the sequence number of the new node is less than the node
844  // then this is the position to insert
845  if (seq_num < itr->seqNum)
846  found = true;
847  // Else go to next node
848  else
849  itr++;
850  }
851  // If the execution tick of the new node is greater than the node then
852  // go to the next node
853  else
854  itr++;
855  }
856  readyList.insert(itr, ready_node);
857  // Update the stat for max size reached of the readyList
858  maxReadyListSize = std::max<double>(readyList.size(),
859  maxReadyListSize.value());
860 }
861 
862 void
864 
865  auto itr = readyList.begin();
866  if (itr == readyList.end()) {
867  DPRINTF(TraceCPUData, "readyList is empty.\n");
868  return;
869  }
870  DPRINTF(TraceCPUData, "Printing readyList:\n");
871  while (itr != readyList.end()) {
872  auto graph_itr = depGraph.find(itr->seqNum);
873  GraphNode* node_ptr M5_VAR_USED = graph_itr->second;
874  DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
875  node_ptr->typeToStr(), itr->execTick);
876  itr++;
877  }
878 }
879 
881  uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
882  : sizeROB(max_rob),
883  sizeStoreBuffer(max_stores),
884  sizeLoadBuffer(max_loads),
885  oldestInFlightRobNum(UINT64_MAX),
886  numInFlightLoads(0),
887  numInFlightStores(0)
888 {}
889 
890 void
892 {
893  // Occupy ROB entry for the issued node
894  // Merely maintain the oldest node, i.e. numerically least robNum by saving
895  // it in the variable oldestInFLightRobNum.
896  inFlightNodes[new_node->seqNum] = new_node->robNum;
897  oldestInFlightRobNum = inFlightNodes.begin()->second;
898 
899  // Occupy Load/Store Buffer entry for the issued node if applicable
900  if (new_node->isLoad()) {
902  } else if (new_node->isStore()) {
904  } // else if it is a non load/store node, no buffer entry is occupied
905 
906  printOccupancy();
907 }
908 
909 void
911 {
912  assert(!inFlightNodes.empty());
913  DPRINTFR(TraceCPUData, "\tClearing done seq. num %d from inFlightNodes..\n",
914  done_node->seqNum);
915 
916  assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
917  inFlightNodes.erase(done_node->seqNum);
918 
919  if (inFlightNodes.empty()) {
920  // If we delete the only in-flight node and then the
921  // oldestInFlightRobNum is set to it's initialized (max) value.
922  oldestInFlightRobNum = UINT64_MAX;
923  } else {
924  // Set the oldest in-flight node rob number equal to the first node in
925  // the inFlightNodes since that will have the numerically least value.
926  oldestInFlightRobNum = inFlightNodes.begin()->second;
927  }
928 
929  DPRINTFR(TraceCPUData, "\tCleared. inFlightNodes.size() = %d, "
930  "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
932 
933  // A store is considered complete when a request is sent, thus ROB entry is
934  // freed. But it occupies an entry in the Store Buffer until its response
935  // is received. A load is considered complete when a response is received,
936  // thus both ROB and Load Buffer entries can be released.
937  if (done_node->isLoad()) {
938  assert(numInFlightLoads != 0);
940  }
941  // For normal writes, we send the requests out and clear a store buffer
942  // entry on response. For writes which are strictly ordered, for e.g.
943  // writes to device registers, we do that within release() which is called
944  // when node is executed and taken off from readyList.
945  if (done_node->isStore() && done_node->isStrictlyOrdered()) {
947  }
948 }
949 
950 void
952 {
953  assert(numInFlightStores != 0);
955 }
956 
957 bool
959  const GraphNode* new_node) const
960 {
961  uint16_t num_in_flight_nodes;
962  if (inFlightNodes.empty()) {
963  num_in_flight_nodes = 0;
964  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
965  " #in-flight nodes = 0", new_node->seqNum);
966  } else if (new_node->robNum > oldestInFlightRobNum) {
967  // This is the intuitive case where new dep-free node is younger
968  // instruction than the oldest instruction in-flight. Thus we make sure
969  // in_flight_nodes does not overflow.
970  num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
971  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
972  " #in-flight nodes = %d - %d = %d", new_node->seqNum,
973  new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
974  } else {
975  // This is the case where an instruction older than the oldest in-
976  // flight instruction becomes dep-free. Thus we must have already
977  // accounted for the entry in ROB for this new dep-free node.
978  // Immediately after this check returns true, oldestInFlightRobNum will
979  // be updated in occupy(). We simply let this node issue now.
980  num_in_flight_nodes = 0;
981  DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
982  " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
983  new_node->seqNum, new_node->robNum);
984  }
985  DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
988  // Check if resources are available to issue the specific node
989  if (num_in_flight_nodes >= sizeROB) {
990  return false;
991  }
992  if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
993  return false;
994  }
995  if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
996  return false;
997  }
998  return true;
999 }
1000 
1001 bool
1003  // Return true if there is at least one read or write request in flight
1004  return (numInFlightStores != 0 || numInFlightLoads != 0);
1005 }
1006 
1007 void
1009  DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
1010  "LQ = %d/%d, SQ = %d/%d.\n",
1014 }
1015 
1016 void
1018 {
1019  using namespace Stats;
1020 
1022  .name(name() + ".numSendAttempted")
1023  .desc("Number of first attempts to send a request")
1024  ;
1025 
1027  .name(name() + ".numSendSucceeded")
1028  .desc("Number of successful first attempts")
1029  ;
1030 
1032  .name(name() + ".numSendFailed")
1033  .desc("Number of failed first attempts")
1034  ;
1035 
1037  .name(name() + ".numRetrySucceeded")
1038  .desc("Number of successful retries")
1039  ;
1040 
1041  instLastTick
1042  .name(name() + ".instLastTick")
1043  .desc("Last tick simulated from the fixed inst trace")
1044  ;
1045 }
1046 
1047 Tick
1049 {
1050  DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
1051  " IcacheGen: fixed issue with retry.\n");
1052 
1053  if (nextExecute()) {
1054  DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
1055  return currElement.tick;
1056  } else {
1057  panic("Read of first message in the trace failed.\n");
1058  return MaxTick;
1059  }
1060 }
1061 
1062 bool
1064 {
1065  // If there is a retry packet, try to send it
1066  if (retryPkt) {
1067 
1068  DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1069 
1070  if (!port.sendTimingReq(retryPkt)) {
1071  // Still blocked! This should never occur.
1072  DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1073  return false;
1074  }
1076  } else {
1077 
1078  DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1079 
1080  // try sending current element
1081  assert(currElement.isValid());
1082 
1083  ++numSendAttempted;
1084 
1085  if (!send(currElement.addr, currElement.blocksize,
1086  currElement.cmd, currElement.flags, currElement.pc)) {
1087  DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1088  ++numSendFailed;
1089  // return false to indicate not to schedule next event
1090  return false;
1091  } else {
1092  ++numSendSucceeded;
1093  }
1094  }
1095  // If packet was sent successfully, either retryPkt or currElement, return
1096  // true to indicate to schedule event at current Tick plus delta. If packet
1097  // was sent successfully and there is no next packet to send, return false.
1098  DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1099  "element.\n");
1100  retryPkt = nullptr;
1101  // Read next element into currElement, currElement gets cleared so save the
1102  // tick to calculate delta
1103  Tick last_tick = currElement.tick;
1104  if (nextExecute()) {
1105  assert(currElement.tick >= last_tick);
1106  delta = currElement.tick - last_tick;
1107  }
1108  return !traceComplete;
1109 }
1110 
1111 void
1113 {
1114  trace.reset();
1115 }
1116 
1117 bool
1119 {
1120  if (traceComplete)
1121  // We are at the end of the file, thus we have no more messages.
1122  // Return false.
1123  return false;
1124 
1125 
1126  //Reset the currElement to the default values
1127  currElement.clear();
1128 
1129  // Read the next line to get the next message. If that fails then end of
1130  // trace has been reached and traceComplete needs to be set in addition
1131  // to returning false. If successful then next message is in currElement.
1132  if (!trace.read(&currElement)) {
1133  traceComplete = true;
1134  instLastTick = curTick();
1135  return false;
1136  }
1137 
1138  DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1139  currElement.cmd.isRead() ? 'r' : 'w',
1140  currElement.addr,
1141  currElement.pc,
1142  currElement.blocksize,
1143  currElement.tick);
1144 
1145  return true;
1146 }
1147 
1148 bool
1149 TraceCPU::FixedRetryGen::send(Addr addr, unsigned size, const MemCmd& cmd,
1150  Request::FlagsType flags, Addr pc)
1151 {
1152 
1153  // Create new request
1154  auto req = std::make_shared<Request>(addr, size, flags, masterID);
1155  req->setPC(pc);
1156 
1157  // If this is not done it triggers assert in L1 cache for invalid contextId
1158  req->setContext(ContextID(0));
1159 
1160  // Embed it in a packet
1161  PacketPtr pkt = new Packet(req, cmd);
1162 
1163  uint8_t* pkt_data = new uint8_t[req->getSize()];
1164  pkt->dataDynamic(pkt_data);
1165 
1166  if (cmd.isWrite()) {
1167  memset(pkt_data, 0xA, req->getSize());
1168  }
1169 
1170  // Call MasterPort method to send a timing request for this packet
1171  bool success = port.sendTimingReq(pkt);
1172  if (!success) {
1173  // If it fails, save the packet to retry when a retry is signalled by
1174  // the cache
1175  retryPkt = pkt;
1176  }
1177  return success;
1178 }
1179 
1180 void
1182 {
1183  // Schedule an event to go through the control flow in the same tick as
1184  // retry is received
1185  DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1186  " event @%lli.\n", curTick());
1188 }
1189 
1190 void
1192 {
1193  // Schedule an event to go through the execute flow in the same tick as
1194  // retry is received
1195  DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1196  " event @%lli.\n", curTick());
1198 }
1199 
1200 void
1202 {
1203  if (!dcacheNextEvent.scheduled()) {
1204  DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1205  when);
1206  schedule(dcacheNextEvent, when);
1208  } else if (when < dcacheNextEvent.when()) {
1209  DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1210  " to %lli.\n", dcacheNextEvent.when(), when);
1211  reschedule(dcacheNextEvent, when);
1212  }
1213 
1214 }
1215 
1216 bool
1218 {
1219  // All responses on the instruction fetch side are ignored. Simply delete
1220  // the packet to free allocated memory
1221  delete pkt;
1222 
1223  return true;
1224 }
1225 
1226 void
1228 {
1230 }
1231 
1232 void
1234 {
1235  DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1237 }
1238 
1239 bool
1241 {
1242  // Handle the responses for data memory requests which is done inside the
1243  // elastic data generator
1245  // After processing the response delete the packet to free
1246  // memory
1247  delete pkt;
1248 
1249  return true;
1250 }
1251 
1252 void
1254 {
1256 }
1257 
1259  const std::string& filename,
1260  const double time_multiplier)
1261  : trace(filename),
1262  timeMultiplier(time_multiplier),
1263  microOpCount(0)
1264 {
1265  // Create a protobuf message for the header and read it from the stream
1266  ProtoMessage::InstDepRecordHeader header_msg;
1267  if (!trace.read(header_msg)) {
1268  panic("Failed to read packet header from %s\n", filename);
1269 
1270  if (header_msg.tick_freq() != SimClock::Frequency) {
1271  panic("Trace %s was recorded with a different tick frequency %d\n",
1272  header_msg.tick_freq());
1273  }
1274  } else {
1275  // Assign window size equal to the field in the trace that was recorded
1276  // when the data dependency trace was captured in the o3cpu model
1277  windowSize = header_msg.window_size();
1278  }
1279 }
1280 
1281 void
1283 {
1284  trace.reset();
1285 }
1286 
1287 bool
1289 {
1290  ProtoMessage::InstDepRecord pkt_msg;
1291  if (trace.read(pkt_msg)) {
1292  // Required fields
1293  element->seqNum = pkt_msg.seq_num();
1294  element->type = pkt_msg.type();
1295  // Scale the compute delay to effectively scale the Trace CPU frequency
1296  element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1297 
1298  // Repeated field robDepList
1299  element->clearRobDep();
1300  assert((pkt_msg.rob_dep()).size() <= element->maxRobDep);
1301  for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1302  element->robDep[element->numRobDep] = pkt_msg.rob_dep(i);
1303  element->numRobDep += 1;
1304  }
1305 
1306  // Repeated field
1307  element->clearRegDep();
1308  assert((pkt_msg.reg_dep()).size() <= TheISA::MaxInstSrcRegs);
1309  for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1310  // There is a possibility that an instruction has both, a register
1311  // and order dependency on an instruction. In such a case, the
1312  // register dependency is omitted
1313  bool duplicate = false;
1314  for (int j = 0; j < element->numRobDep; j++) {
1315  duplicate |= (pkt_msg.reg_dep(i) == element->robDep[j]);
1316  }
1317  if (!duplicate) {
1318  element->regDep[element->numRegDep] = pkt_msg.reg_dep(i);
1319  element->numRegDep += 1;
1320  }
1321  }
1322 
1323  // Optional fields
1324  if (pkt_msg.has_p_addr())
1325  element->physAddr = pkt_msg.p_addr();
1326  else
1327  element->physAddr = 0;
1328 
1329  if (pkt_msg.has_v_addr())
1330  element->virtAddr = pkt_msg.v_addr();
1331  else
1332  element->virtAddr = 0;
1333 
1334  if (pkt_msg.has_asid())
1335  element->asid = pkt_msg.asid();
1336  else
1337  element->asid = 0;
1338 
1339  if (pkt_msg.has_size())
1340  element->size = pkt_msg.size();
1341  else
1342  element->size = 0;
1343 
1344  if (pkt_msg.has_flags())
1345  element->flags = pkt_msg.flags();
1346  else
1347  element->flags = 0;
1348 
1349  if (pkt_msg.has_pc())
1350  element->pc = pkt_msg.pc();
1351  else
1352  element->pc = 0;
1353 
1354  // ROB occupancy number
1355  ++microOpCount;
1356  if (pkt_msg.has_weight()) {
1357  microOpCount += pkt_msg.weight();
1358  }
1359  element->robNum = microOpCount;
1360  return true;
1361  }
1362 
1363  // We have reached the end of the file
1364  return false;
1365 }
1366 
1367 bool
1369 {
1370  for (auto& own_reg_dep : regDep) {
1371  if (own_reg_dep == reg_dep) {
1372  // If register dependency is found, make it zero and return true
1373  own_reg_dep = 0;
1374  assert(numRegDep > 0);
1375  --numRegDep;
1376  DPRINTFR(TraceCPUData, "\tFor %lli: Marking register dependency %lli "
1377  "done.\n", seqNum, reg_dep);
1378  return true;
1379  }
1380  }
1381 
1382  // Return false if the dependency is not found
1383  return false;
1384 }
1385 
1386 bool
1388 {
1389  for (auto& own_rob_dep : robDep) {
1390  if (own_rob_dep == rob_dep) {
1391  // If the rob dependency is found, make it zero and return true
1392  own_rob_dep = 0;
1393  assert(numRobDep > 0);
1394  --numRobDep;
1395  DPRINTFR(TraceCPUData, "\tFor %lli: Marking ROB dependency %lli "
1396  "done.\n", seqNum, rob_dep);
1397  return true;
1398  }
1399  }
1400  return false;
1401 }
1402 
1403 void
1405  for (auto& own_reg_dep : regDep) {
1406  own_reg_dep = 0;
1407  }
1408  numRegDep = 0;
1409 }
1410 
1411 void
1413  for (auto& own_rob_dep : robDep) {
1414  own_rob_dep = 0;
1415  }
1416  numRobDep = 0;
1417 }
1418 
1419 bool
1421 {
1422  // If it is an rob dependency then remove it
1423  if (!removeRobDep(done_seq_num)) {
1424  // If it is not an rob dependency then it must be a register dependency
1425  // If the register dependency is not found, it violates an assumption
1426  // and must be caught by assert.
1427  bool regdep_found M5_VAR_USED = removeRegDep(done_seq_num);
1428  assert(regdep_found);
1429  }
1430  // Return true if the node is dependency free
1431  return (numRobDep == 0 && numRegDep == 0);
1432 }
1433 
1434 void
1436 {
1437  DPRINTFR(TraceCPUData, "%lli", seqNum);
1438  DPRINTFR(TraceCPUData, ",%s", typeToStr());
1439  if (isLoad() || isStore()) {
1440  DPRINTFR(TraceCPUData, ",%i", physAddr);
1441  DPRINTFR(TraceCPUData, ",%i", size);
1442  DPRINTFR(TraceCPUData, ",%i", flags);
1443  }
1444  DPRINTFR(TraceCPUData, ",%lli", compDelay);
1445  int i = 0;
1446  DPRINTFR(TraceCPUData, "robDep:");
1447  while (robDep[i] != 0) {
1448  DPRINTFR(TraceCPUData, ",%lli", robDep[i]);
1449  i++;
1450  }
1451  i = 0;
1452  DPRINTFR(TraceCPUData, "regDep:");
1453  while (regDep[i] != 0) {
1454  DPRINTFR(TraceCPUData, ",%lli", regDep[i]);
1455  i++;
1456  }
1457  auto child_itr = dependents.begin();
1458  DPRINTFR(TraceCPUData, "dependents:");
1459  while (child_itr != dependents.end()) {
1460  DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1461  child_itr++;
1462  }
1463 
1464  DPRINTFR(TraceCPUData, "\n");
1465 }
1466 
1467 std::string
1469 {
1470  return Record::RecordType_Name(type);
1471 }
1472 
1474  : trace(filename)
1475 {
1476  // Create a protobuf message for the header and read it from the stream
1477  ProtoMessage::PacketHeader header_msg;
1478  if (!trace.read(header_msg)) {
1479  panic("Failed to read packet header from %s\n", filename);
1480 
1481  if (header_msg.tick_freq() != SimClock::Frequency) {
1482  panic("Trace %s was recorded with a different tick frequency %d\n",
1483  header_msg.tick_freq());
1484  }
1485  }
1486 }
1487 
1488 void
1490 {
1491  trace.reset();
1492 }
1493 
1494 bool
1496 {
1497  ProtoMessage::Packet pkt_msg;
1498  if (trace.read(pkt_msg)) {
1499  element->cmd = pkt_msg.cmd();
1500  element->addr = pkt_msg.addr();
1501  element->blocksize = pkt_msg.size();
1502  element->tick = pkt_msg.tick();
1503  element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1504  element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1505  return true;
1506  }
1507 
1508  // We have reached the end of the file
1509  return false;
1510 }
InputStream trace
Input stream used for reading the input trace file.
Definition: trace_cpu.hh:1000
void execute()
This is the main execute function which consumes nodes from the sorted readyList. ...
Definition: trace_cpu.cc:431
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
Struct to store a ready-to-execute node and its execution tick.
Definition: trace_cpu.hh:685
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace...
Definition: trace_cpu.cc:182
#define DPRINTF(x,...)
Definition: trace.hh:229
const uint16_t sizeStoreBuffer
The size of store buffer.
Definition: trace_cpu.hh:760
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed...
Definition: trace_cpu.hh:1129
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
Definition: trace_cpu.cc:1149
bool isStore() const
Is the node a store.
Definition: trace_cpu.hh:650
Addr blocksize
The size of the access for the request.
Definition: trace_cpu.hh:360
TraceCPU & owner
Reference of the TraceCPU.
Definition: trace_cpu.hh:991
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
NodeSeqNum seqNum
The sequence number of the ready node.
Definition: trace_cpu.hh:688
void exit()
Exit the FixedRetryGen.
Definition: trace_cpu.cc:1112
Definition: packet.hh:76
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt. ...
Definition: trace_cpu.cc:1253
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition: trace_cpu.hh:818
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached...
Definition: trace_cpu.hh:496
EventFunctionWrapper dcacheNextEvent
Event for the control flow method schedDcacheNext()
Definition: trace_cpu.hh:1088
Bitfield< 7 > i
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
Definition: trace_cpu.cc:1368
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:401
uint16_t numInFlightStores
Number of ready stores for which request may or may not be sent.
Definition: trace_cpu.hh:787
Request::Flags flags
Request flags if any.
Definition: trace_cpu.hh:616
RecordType type
Type of the node corresponding to the instruction modelled by it.
Definition: trace_cpu.hh:601
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition: trace_cpu.hh:1006
Stats::Scalar numSchedIcacheEvent
Definition: trace_cpu.hh:1139
bool nextExecute()
Reads a line of the trace file.
Definition: trace_cpu.cc:1118
void updateNumOps(uint64_t rob_num)
Definition: trace_cpu.cc:96
~TraceCPU()
Definition: trace_cpu.cc:84
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition: trace_cpu.cc:880
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition: trace_cpu.cc:625
Request::FlagsType flags
Potential request flags to use.
Definition: trace_cpu.hh:366
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it...
Definition: trace_cpu.cc:739
physAddr
Definition: misc.hh:833
ip6_addr_t addr
Definition: inet.hh:335
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:919
std::map< NodeSeqNum, NodeRobNum > inFlightNodes
A map from the sequence number to the ROB number of the in- flight nodes.
Definition: trace_cpu.hh:778
void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: trace_cpu.cc:114
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests...
Definition: trace_cpu.hh:1102
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition: trace_cpu.hh:969
Port & getInstPort()
Used to get a reference to the icache port.
Definition: trace_cpu.hh:1149
std::string instTraceFile
File names for input instruction and data traces.
Definition: trace_cpu.hh:335
void exit()
Exit the ElasticDataGen.
Definition: trace_cpu.cc:342
void regStats()
Callback to set stat parameters.
Definition: trace_cpu.cc:217
uint32_t asid
The address space id which is set if the virtual address is set.
Definition: trace_cpu.hh:610
static const uint8_t maxRobDep
The maximum no.
Definition: trace_cpu.hh:586
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
Definition: trace_cpu.cc:1217
const uint16_t sizeROB
The size of the ROB used to throttle the max.
Definition: trace_cpu.hh:754
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition: trace_cpu.cc:196
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition: protoio.cc:177
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
Definition: trace_cpu.hh:1094
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace...
Definition: trace_cpu.cc:157
Stats::Scalar numSchedDcacheEvent
Definition: trace_cpu.hh:1138
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition: trace_cpu.hh:354
Bitfield< 23, 0 > offset
Definition: types.hh:154
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the slave port by calling its corresponding receive function...
Definition: port.hh:445
void clearRobDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1412
uint64_t NodeSeqNum
Node sequence number type.
Definition: trace_cpu.hh:564
bool readNextWindow()
Reads a line of the trace file.
Definition: trace_cpu.cc:348
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1282
const int MaxInstSrcRegs
Definition: registers.hh:59
Tick clockPeriod() const
uint8_t numRegDep
Number of register dependencies.
Definition: trace_cpu.hh:637
bool isWrite() const
Definition: packet.hh:529
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition: core.cc:49
void addDepsOnParent(GraphNode *new_node, T &dep_array, uint8_t &num_dep)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition: trace_cpu.cc:401
Stats::Scalar numSendSucceeded
Definition: trace_cpu.hh:1053
void reset()
Reset the stream such that it can be played once again.
Definition: trace_cpu.cc:1489
bool traceComplete
Set to true when end of trace is reached.
Definition: trace_cpu.hh:1009
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:305
void clearRegDep()
Initialize register dependency array to all zeroes.
Definition: trace_cpu.cc:1404
RequestPtr req
A pointer to the original request.
Definition: packet.hh:327
void regStats() override
Callback to set stat parameters.
Definition: base.cc:388
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition: trace_cpu.hh:801
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
Definition: trace_cpu.hh:1117
uint8_t type
Definition: inet.hh:333
#define inform(...)
Definition: logging.hh:213
const Tick MaxTick
Definition: types.hh:65
const double timeMultiplier
A multiplier for the compute delays in the trace to modulate the Trace CPU frequency either up or dow...
Definition: trace_cpu.hh:809
Tick curTick()
The current simulated tick.
Definition: core.hh:47
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition: trace_cpu.cc:1002
Bitfield< 4 > pc
virtual Port & getInstPort()=0
Purely virtual method that returns a reference to the instruction port.
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition: trace_cpu.hh:671
#define DTRACE(x)
Definition: trace.hh:227
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:385
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model...
Definition: trace_cpu.hh:144
uint64_t progressMsgThreshold
Definition: trace_cpu.hh:1136
NodeRobNum robNum
ROB occupancy number.
Definition: trace_cpu.hh:598
TraceCPU(TraceCPUParams *params)
Definition: trace_cpu.cc:49
uint64_t Tick
Tick count type.
Definition: types.hh:63
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
Definition: trace_cpu.hh:1123
EventFunctionWrapper icacheNextEvent
Event for the control flow method schedIcacheNext()
Definition: trace_cpu.hh:1085
The struct GraphNode stores an instruction in the trace file.
Definition: trace_cpu.hh:578
This struct stores a line in the trace file.
Definition: trace_cpu.hh:351
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Definition: trace_cpu.cc:1191
void reset()
Reset the input stream and seek to the beginning of the file.
Definition: protoio.cc:167
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
Definition: trace_cpu.hh:1064
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:203
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
Definition: trace_cpu.cc:1227
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition: trace_cpu.cc:958
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition: trace_cpu.cc:910
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Definition: statistics.hh:324
uint16_t numInFlightLoads
Number of ready loads for which request may or may not be sent.
Definition: trace_cpu.hh:784
uint64_t compDelay
Computational delay.
Definition: trace_cpu.hh:628
MasterPort & port
Reference of the port to be used to issue memory requests.
Definition: trace_cpu.hh:994
virtual const std::string name() const
Definition: sim_object.hh:120
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:913
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
Definition: trace_cpu.hh:1110
Stats::Formula cpi
Stat for the CPI.
Definition: trace_cpu.hh:1144
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
Definition: trace_cpu.cc:1233
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick execTick
The tick at which the ready node must be executed.
Definition: trace_cpu.hh:691
NodeRobNum oldestInFlightRobNum
The ROB number of the oldest in-flight node.
Definition: trace_cpu.hh:781
Bitfield< 15 > system
Definition: misc.hh:999
const uint16_t sizeLoadBuffer
The size of load buffer.
Definition: trace_cpu.hh:766
Bitfield< 24 > j
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
Stats::Scalar numOps
Stat for number of simulated micro-ops.
Definition: trace_cpu.hh:1142
void reschedule(Event &event, Tick when, bool always=false)
Definition: eventq.hh:756
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node&#39;s source dependencies are complete.
Definition: trace_cpu.cc:699
const MasterID masterID
MasterID used for the requests being sent.
Definition: trace_cpu.hh:997
Tick tick
The time at which the request should be sent.
Definition: trace_cpu.hh:363
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition: sim_events.cc:90
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
Definition: trace_cpu.cc:1240
uint8_t numRobDep
Number of order dependencies.
Definition: trace_cpu.hh:625
Stats::Scalar numSendFailed
Definition: trace_cpu.hh:1054
bool isWrite() const
Definition: packet.hh:198
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
Definition: trace_cpu.cc:1201
std::string dataTraceFile
Definition: trace_cpu.hh:335
Stats::Scalar numSendAttempted
Definition: trace_cpu.hh:1052
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
Definition: trace_cpu.hh:1067
Addr virtAddr
The virtual address for the request if any.
Definition: trace_cpu.hh:607
uint32_t size
Size of request if any.
Definition: trace_cpu.hh:613
void takeOverFrom(Port *old)
A utility function to make it easier to swap out ports.
Definition: port.hh:132
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition: trace_cpu.cc:863
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1258
Stats::Scalar numCycles
Definition: base.hh:603
bool isLoad() const
Is the node a load.
Definition: trace_cpu.hh:647
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition: trace_cpu.cc:951
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: base.cc:281
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition: trace_cpu.cc:335
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition: trace_cpu.cc:891
virtual Port & getDataPort()=0
Purely virtual method that returns a reference to the data port.
RegDepArray regDep
Array of register dependencies (incoming) if any.
Definition: trace_cpu.hh:634
Stats::Scalar numRetrySucceeded
Definition: trace_cpu.hh:1055
void schedule(Event &event, Tick when)
Definition: eventq.hh:744
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1078
Addr addr
The address for the request.
Definition: trace_cpu.hh:357
Tick init()
Called from TraceCPU init().
Definition: trace_cpu.cc:1048
Port & getDataPort()
Used to get a reference to the dcache port.
Definition: trace_cpu.hh:1152
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
Definition: trace_cpu.cc:1181
Addr physAddr
The address for the request if any.
Definition: trace_cpu.hh:604
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
void printOccupancy()
Print resource occupancy for debugging.
Definition: trace_cpu.cc:1008
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition: trace_cpu.cc:806
uint64_t FlagsType
Definition: request.hh:91
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:84
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1495
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
Definition: trace_cpu.cc:1435
RobDepArray robDep
Array of order dependencies.
Definition: trace_cpu.hh:622
const Params * params() const
Definition: base.hh:311
std::string typeToStr() const
Return string specifying the type of the node.
Definition: trace_cpu.cc:1468
void takeOverFrom(BaseCPU *oldCPU)
Load the state of a CPU from the previous CPU object, invoked on all new CPUs that are about to be sw...
Definition: trace_cpu.cc:106
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:703
int ContextID
Globally unique thread context ID.
Definition: types.hh:231
const std::string & name() const
Returns name of the ElasticDataGen instance.
Definition: trace_cpu.hh:893
Command cmd
Definition: packet.hh:187
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
Definition: trace_cpu.cc:1288
uint64_t microOpCount
Count of committed ops read from trace plus the filtered ops.
Definition: trace_cpu.hh:812
NodeSeqNum seqNum
Instruction sequence number.
Definition: trace_cpu.hh:595
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Definition: trace_cpu.cc:1387
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
Definition: trace_cpu.cc:1420
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:104
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
Definition: trace_cpu.cc:1473
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
Definition: trace_cpu.cc:1063
#define DPRINTFR(...)
Definition: trace.hh:231

Generated on Fri Feb 28 2020 16:27:00 for gem5 by doxygen 1.8.13