gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
trace_cpu.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013 - 2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
39
40#include "base/compiler.hh"
41#include "sim/sim_exit.hh"
42
43namespace gem5
44{
45
46// Declare and initialize the static counter for number of trace CPUs.
48
49TraceCPU::TraceCPU(const TraceCPUParams &params)
50 : BaseCPU(params),
51 icachePort(this),
52 dcachePort(this),
53 instRequestorID(params.system->getRequestorId(this, "inst")),
54 dataRequestorID(params.system->getRequestorId(this, "data")),
55 instTraceFile(params.instTraceFile),
56 dataTraceFile(params.dataTraceFile),
57 icacheGen(*this, ".iside", icachePort, instRequestorID, instTraceFile),
58 dcacheGen(*this, ".dside", dcachePort, dataRequestorID, dataTraceFile,
59 params),
60 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
61 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
62 oneTraceComplete(false),
63 traceOffset(0),
64 execCompleteEvent(nullptr),
65 enableEarlyExit(params.enableEarlyExit),
66 progressMsgInterval(params.progressMsgInterval),
67 progressMsgThreshold(params.progressMsgInterval), traceStats(this)
68{
69 // Increment static counter for number of Trace CPUs.
71
72 // Check that the python parameters for sizes of ROB, store buffer and
73 // load buffer do not overflow the corresponding C++ variables.
74 fatal_if(params.sizeROB > UINT16_MAX,
75 "ROB size set to %d exceeds the max. value of %d.",
76 params.sizeROB, UINT16_MAX);
77 fatal_if(params.sizeStoreBuffer > UINT16_MAX,
78 "ROB size set to %d exceeds the max. value of %d.",
79 params.sizeROB, UINT16_MAX);
80 fatal_if(params.sizeLoadBuffer > UINT16_MAX,
81 "Load buffer size set to %d exceeds the max. value of %d.",
82 params.sizeLoadBuffer, UINT16_MAX);
83}
84
85void
86TraceCPU::updateNumOps(uint64_t rob_num)
87{
88 traceStats.numOps = rob_num;
89 if (progressMsgInterval != 0 &&
91 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
93 }
94}
95
96void
98{
99 // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
102}
103
104void
106{
107 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\".\n",
109 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
111
113
114 // Get the send tick of the first instruction read request
115 Tick first_icache_tick = icacheGen.init();
116
117 // Get the send tick of the first data read/write request
118 Tick first_dcache_tick = dcacheGen.init();
119
120 // Set the trace offset as the minimum of that in both traces
121 traceOffset = std::min(first_icache_tick, first_dcache_tick);
122 inform("%s: Time offset (tick) found as min of both traces is %lli.",
123 name(), traceOffset);
124
125 // Schedule next icache and dcache event by subtracting the offset
126 schedule(icacheNextEvent, first_icache_tick - traceOffset);
127 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
128
129 // Adjust the trace offset for the dcache generator's ready nodes
130 // We don't need to do this for the icache generator as it will
131 // send its first request at the first event and schedule subsequent
132 // events using a relative tick delta
134
135 // If the Trace CPU simulation is configured to exit on any one trace
136 // completion then we don't need a counted event to count down all Trace
137 // CPUs in the system. If not then instantiate a counted event.
138 if (!enableEarlyExit) {
139 // The static counter for number of Trace CPUs is correctly set at
140 // this point so create an event and pass it.
141 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
143 }
144
145}
146
147void
149{
150 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
151
152 // Try to send the current packet or a retry packet if there is one
153 bool sched_next = icacheGen.tryNext();
154 // If packet sent successfully, schedule next event
155 if (sched_next) {
156 DPRINTF(TraceCPUInst,
157 "Scheduling next icacheGen event at %d.\n",
161 } else {
162 // check if traceComplete. If not, do nothing because sending failed
163 // and next event will be scheduled via RecvRetry()
165 // If this is the first trace to complete, set the variable. If it
166 // is already set then both traces are complete to exit sim.
168 }
169 }
170 return;
171}
172
173void
175{
176 DPRINTF(TraceCPUData, "DcacheGen event.\n");
177
178 // Update stat for numCycles
180
184 }
185}
186
187void
189{
190 if (!oneTraceComplete) {
191 oneTraceComplete = true;
192 } else {
193 // Schedule event to indicate execution is complete as both
194 // instruction and data access traces have been played back.
195 inform("%s: Execution complete.", name());
196 // If the replay is configured to exit early, that is when any one
197 // execution is complete then exit immediately and return. Otherwise,
198 // schedule the counted exit that counts down completion of each Trace
199 // CPU.
200 if (enableEarlyExit) {
201 exitSimLoop("End of trace reached");
202 } else {
204 }
205 }
206}
208 statistics::Group(trace),
209 ADD_STAT(numSchedDcacheEvent, statistics::units::Count::get(),
210 "Number of events scheduled to trigger data request generator"),
211 ADD_STAT(numSchedIcacheEvent, statistics::units::Count::get(),
212 "Number of events scheduled to trigger instruction request "
213 "generator"),
214 ADD_STAT(numOps, statistics::units::Count::get(),
215 "Number of micro-ops simulated by the Trace CPU"),
216 ADD_STAT(cpi, statistics::units::Rate<
217 statistics::units::Cycle, statistics::units::Count>::get(),
218 "Cycles per micro-op used as a proxy for CPI",
219 trace->baseStats.numCycles / numOps)
220{
221 cpi.precision(6);
222}
223
226 const std::string& _name) :
227 statistics::Group(parent, _name.c_str()),
228 ADD_STAT(maxDependents, statistics::units::Count::get(),
229 "Max number of dependents observed on a node"),
230 ADD_STAT(maxReadyListSize, statistics::units::Count::get(),
231 "Max size of the ready list observed"),
232 ADD_STAT(numSendAttempted, statistics::units::Count::get(),
233 "Number of first attempts to send a request"),
234 ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
235 "Number of successful first attempts"),
236 ADD_STAT(numSendFailed, statistics::units::Count::get(),
237 "Number of failed first attempts"),
238 ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
239 "Number of successful retries"),
240 ADD_STAT(numSplitReqs, statistics::units::Count::get(),
241 "Number of split requests"),
242 ADD_STAT(numSOLoads, statistics::units::Count::get(),
243 "Number of strictly ordered loads"),
244 ADD_STAT(numSOStores, statistics::units::Count::get(),
245 "Number of strictly ordered stores"),
246 ADD_STAT(dataLastTick, statistics::units::Tick::get(),
247 "Last tick simulated from the elastic data trace")
248{
249}
250
251Tick
253{
254 DPRINTF(TraceCPUData, "Initializing data memory request generator "
255 "DcacheGen: elastic issue with retry.\n");
256
258 "Trace has %d elements. It must have at least %d elements.",
259 depGraph.size(), 2 * windowSize);
260 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
261 depGraph.size());
262
264 "Trace has %d elements. It must have at least %d elements.",
265 depGraph.size(), 2 * windowSize);
266 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
267 depGraph.size());
268
269 // Print readyList
270 if (debug::TraceCPUData) {
272 }
273 auto free_itr = readyList.begin();
274 DPRINTF(TraceCPUData,
275 "Execute tick of the first dependency free node %lli is %d.\n",
276 free_itr->seqNum, free_itr->execTick);
277 // Return the execute tick of the earliest ready node so that an event
278 // can be scheduled to call execute()
279 return (free_itr->execTick);
280}
281
282void
284{
285 for (auto& free_node : readyList) {
286 free_node.execTick -= offset;
287 }
288}
289
290void
292{
293 trace.reset();
294}
295
296bool
298{
299 // Read and add next window
300 DPRINTF(TraceCPUData, "Reading next window from file.\n");
301
302 if (traceComplete) {
303 // We are at the end of the file, thus we have no more records.
304 // Return false.
305 return false;
306 }
307
308 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
309 depGraph.size());
310
311 uint32_t num_read = 0;
312 while (num_read != windowSize) {
313
314 // Create a new graph node
315 GraphNode* new_node = new GraphNode;
316
317 // Read the next line to get the next record. If that fails then end of
318 // trace has been reached and traceComplete needs to be set in addition
319 // to returning false.
320 if (!trace.read(new_node)) {
321 DPRINTF(TraceCPUData, "\tTrace complete!\n");
322 traceComplete = true;
323 return false;
324 }
325
326 // Annotate the ROB dependencies of the new node onto the parent nodes.
327 addDepsOnParent(new_node, new_node->robDep);
328 // Annotate the register dependencies of the new node onto the parent
329 // nodes.
330 addDepsOnParent(new_node, new_node->regDep);
331
332 num_read++;
333 // Add to map
334 depGraph[new_node->seqNum] = new_node;
335 if (new_node->robDep.empty() && new_node->regDep.empty()) {
336 // Source dependencies are already complete, check if resources
337 // are available and issue. The execution time is approximated
338 // to current time plus the computational delay.
339 checkAndIssue(new_node);
340 }
341 }
342
343 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
344 depGraph.size());
345 return true;
346}
347
348template<typename T>
349void
351{
352 auto dep_it = dep_list.begin();
353 while (dep_it != dep_list.end()) {
354 // We look up the valid dependency, i.e. the parent of this node
355 auto parent_itr = depGraph.find(*dep_it);
356 if (parent_itr != depGraph.end()) {
357 // If the parent is found, it is yet to be executed. Append a
358 // pointer to the new node to the dependents list of the parent
359 // node.
360 parent_itr->second->dependents.push_back(new_node);
361 auto num_depts = parent_itr->second->dependents.size();
362 elasticStats.maxDependents = std::max<double>(num_depts,
364 dep_it++;
365 } else {
366 // The dependency is not found in the graph. So consider
367 // the execution of the parent is complete, i.e. remove this
368 // dependency.
369 dep_it = dep_list.erase(dep_it);
370 }
371 }
372}
373
374void
376{
377 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
378 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
379 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
380 depFreeQueue.size());
382
383 // Read next window to make sure that dependents of all dep-free nodes
384 // are in the depGraph
385 if (nextRead) {
387 nextRead = false;
388 }
389
390 // First attempt to issue the pending dependency-free nodes held
391 // in depFreeQueue. If resources have become available for a node,
392 // then issue it, i.e. add the node to readyList.
393 while (!depFreeQueue.empty()) {
394 if (checkAndIssue(depFreeQueue.front(), false)) {
395 DPRINTF(TraceCPUData,
396 "Removing from depFreeQueue: seq. num %lli.\n",
397 (depFreeQueue.front())->seqNum);
398 depFreeQueue.pop();
399 } else {
400 break;
401 }
402 }
403 // Proceed to execute from readyList
404 auto graph_itr = depGraph.begin();
405 auto free_itr = readyList.begin();
406 // Iterate through readyList until the next free node has its execute
407 // tick later than curTick or the end of readyList is reached
408 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
409
410 // Get pointer to the node to be executed
411 graph_itr = depGraph.find(free_itr->seqNum);
412 assert(graph_itr != depGraph.end());
413 GraphNode* node_ptr = graph_itr->second;
414
415 // If there is a retryPkt send that else execute the load
416 if (retryPkt) {
417 // The retryPkt must be the request that was created by the
418 // first node in the readyList.
419 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
420 panic("Retry packet's seqence number does not match "
421 "the first node in the readyList.\n");
422 }
425 retryPkt = nullptr;
426 }
427 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
428 // If there is no retryPkt, attempt to send a memory request in
429 // case of a load or store node. If the send fails, executeMemReq()
430 // returns a packet pointer, which we save in retryPkt. In case of
431 // a comp node we don't do anything and simply continue as if the
432 // execution of the comp node succedded.
433 retryPkt = executeMemReq(node_ptr);
434 }
435 // If the retryPkt or a new load/store node failed, we exit from here
436 // as a retry from cache will bring the control to execute(). The
437 // first node in readyList then, will be the failed node.
438 if (retryPkt) {
439 break;
440 }
441
442 // Proceed to remove dependencies for the successfully executed node.
443 // If it is a load which is not strictly ordered and we sent a
444 // request for it successfully, we do not yet mark any register
445 // dependencies complete. But as per dependency modelling we need
446 // to mark ROB dependencies of load and non load/store nodes which
447 // are based on successful sending of the load as complete.
448 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
449 // If execute succeeded mark its dependents as complete
450 DPRINTF(TraceCPUData,
451 "Node seq. num %lli sent. Waking up dependents..\n",
452 node_ptr->seqNum);
453
454 auto child_itr = (node_ptr->dependents).begin();
455 while (child_itr != (node_ptr->dependents).end()) {
456 // ROB dependency of a store on a load must not be removed
457 // after load is sent but after response is received
458 if (!(*child_itr)->isStore() &&
459 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
460
461 // Check if the child node has become dependency free
462 if ((*child_itr)->robDep.empty() &&
463 (*child_itr)->regDep.empty()) {
464
465 // Source dependencies are complete, check if
466 // resources are available and issue
467 checkAndIssue(*child_itr);
468 }
469 // Remove this child for the sent load and point to new
470 // location of the element following the erased element
471 child_itr = node_ptr->dependents.erase(child_itr);
472 } else {
473 // This child is not dependency-free, point to the next
474 // child
475 child_itr++;
476 }
477 }
478 } else {
479 // If it is a strictly ordered load mark its dependents as complete
480 // as we do not send a request for this case. If it is a store or a
481 // comp node we also mark all its dependents complete.
482 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
483 " up dependents..\n", node_ptr->seqNum);
484
485 for (auto child : node_ptr->dependents) {
486 // If the child node is dependency free removeDepOnInst()
487 // returns true.
488 if (child->removeDepOnInst(node_ptr->seqNum)) {
489 // Source dependencies are complete, check if resources
490 // are available and issue
491 checkAndIssue(child);
492 }
493 }
494 }
495
496 // After executing the node, remove from readyList and delete node.
497 readyList.erase(free_itr);
498 // If it is a cacheable load which was sent, don't delete
499 // just yet. Delete it in completeMemAccess() after the
500 // response is received. If it is an strictly ordered
501 // load, it was not sent and all dependencies were simply
502 // marked complete. Thus it is safe to delete it. For
503 // stores and non load/store nodes all dependencies were
504 // marked complete so it is safe to delete it.
505 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
506 // Release all resources occupied by the completed node
507 hwResource.release(node_ptr);
508 // clear the dynamically allocated set of dependents
509 (node_ptr->dependents).clear();
510 // Update the stat for numOps simulated
511 owner.updateNumOps(node_ptr->robNum);
512 // delete node
513 delete node_ptr;
514 // remove from graph
515 depGraph.erase(graph_itr);
516 }
517 // Point to first node to continue to next iteration of while loop
518 free_itr = readyList.begin();
519 } // end of while loop
520
521 // Print readyList, sizes of queues and resource status after updating
522 if (debug::TraceCPUData) {
524 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
525 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
526 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
527 depFreeQueue.size());
529 }
530
531 if (retryPkt) {
532 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
533 "event from the cache for seq. num %lli.\n",
534 retryPkt->req->getReqInstSeqNum());
535 return;
536 }
537 // If the size of the dependency graph is less than the dependency window
538 // then read from the trace file to populate the graph next time we are in
539 // execute.
540 if (depGraph.size() < windowSize && !traceComplete)
541 nextRead = true;
542
543 // If cache is not blocked, schedule an event for the first execTick in
544 // readyList else retry from cache will schedule the event. If the ready
545 // list is empty then check if the next pending node has resources
546 // available to issue. If yes, then schedule an event for the next cycle.
547 if (!readyList.empty()) {
548 Tick next_event_tick = std::max(readyList.begin()->execTick,
549 curTick());
550 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
551 next_event_tick);
552 owner.schedDcacheNextEvent(next_event_tick);
553 } else if (readyList.empty() && !depFreeQueue.empty() &&
555 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
558 }
559
560 // If trace is completely read, readyList is empty and depGraph is empty,
561 // set execComplete to true
562 if (depGraph.empty() && readyList.empty() && traceComplete &&
564 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
565 execComplete = true;
567 }
568}
569
572{
573 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
574 "virt addr %d, pc %#x, size %d, flags %d).\n",
575 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
576 node_ptr->pc, node_ptr->size, node_ptr->flags);
577
578 // If the request is strictly ordered, do not send it. Just return nullptr
579 // as if it was succesfully sent.
580 if (node_ptr->isStrictlyOrdered()) {
581 node_ptr->isLoad() ? ++elasticStats.numSOLoads :
583 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
584 node_ptr->seqNum);
585 return nullptr;
586 }
587
588 // Check if the request spans two cache lines as this condition triggers
589 // an assert fail in the L1 cache. If it does then truncate the size to
590 // access only until the end of that line and ignore the remainder. The
591 // stat counting this is useful to keep a check on how frequently this
592 // happens. If required the code could be revised to mimick splitting such
593 // a request into two.
594 unsigned blk_size = owner.cacheLineSize();
595 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
596 if (!(blk_offset + node_ptr->size <= blk_size)) {
597 node_ptr->size = blk_size - blk_offset;
599 }
600
601 // Create a request and the packet containing request
602 auto req = std::make_shared<Request>(
603 node_ptr->physAddr, node_ptr->size, node_ptr->flags, requestorId);
604 req->setReqInstSeqNum(node_ptr->seqNum);
605
606 // If this is not done it triggers assert in L1 cache for invalid contextId
607 req->setContext(ContextID(0));
608
609 req->setPC(node_ptr->pc);
610 // If virtual address is valid, set the virtual address field
611 // of the request.
612 if (node_ptr->virtAddr != 0) {
613 req->setVirt(node_ptr->virtAddr, node_ptr->size,
614 node_ptr->flags, requestorId, node_ptr->pc);
615 req->setPaddr(node_ptr->physAddr);
616 req->setReqInstSeqNum(node_ptr->seqNum);
617 }
618
619 PacketPtr pkt;
620 uint8_t* pkt_data = new uint8_t[req->getSize()];
621 if (node_ptr->isLoad()) {
622 pkt = Packet::createRead(req);
623 } else {
624 pkt = Packet::createWrite(req);
625 memset(pkt_data, 0xA, req->getSize());
626 }
627 pkt->dataDynamic(pkt_data);
628
629 // Call RequestPort method to send a timing request for this packet
630 bool success = port.sendTimingReq(pkt);
632
633 if (!success) {
634 // If it fails, return the packet to retry when a retry is signalled by
635 // the cache
637 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
638 return pkt;
639 } else {
640 // It is succeeds, return nullptr
642 return nullptr;
643 }
644}
645
646bool
648{
649 // Assert the node is dependency-free
650 assert(node_ptr->robDep.empty() && node_ptr->regDep.empty());
651
652 // If this is the first attempt, print a debug message to indicate this.
653 if (first) {
654 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
655 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
656 node_ptr->robNum);
657 }
658
659 // Check if resources are available to issue the specific node
660 if (hwResource.isAvailable(node_ptr)) {
661 // If resources are free only then add to readyList
662 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. "
663 "Adding to readyList, occupying resources.\n",
664 node_ptr->seqNum);
665 // Compute the execute tick by adding the compute delay for the node
666 // and add the ready node to the ready list
668 owner.clockEdge() + node_ptr->compDelay);
669 // Account for the resources taken up by this issued node.
670 hwResource.occupy(node_ptr);
671 return true;
672 } else {
673 if (first) {
674 // Although dependencies are complete, resources are not available.
675 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
676 "%lli. Adding to depFreeQueue.\n", node_ptr->seqNum);
677 depFreeQueue.push(node_ptr);
678 } else {
679 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
680 "%lli. Still pending issue.\n", node_ptr->seqNum);
681 }
682 return false;
683 }
684}
685
686void
688{
689 // Release the resources for this completed node.
690 if (pkt->isWrite()) {
691 // Consider store complete.
693 // If it is a store response then do nothing since we do not model
694 // dependencies on store completion in the trace. But if we were
695 // blocking execution due to store buffer fullness, we need to schedule
696 // an event and attempt to progress.
697 } else {
698 // If it is a load response then release the dependents waiting on it.
699 // Get pointer to the completed load
700 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
701 assert(graph_itr != depGraph.end());
702 GraphNode* node_ptr = graph_itr->second;
703
704 // Release resources occupied by the load
705 hwResource.release(node_ptr);
706
707 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
708 " dependents..\n", node_ptr->seqNum);
709
710 for (auto child : node_ptr->dependents) {
711 if (child->removeDepOnInst(node_ptr->seqNum)) {
712 checkAndIssue(child);
713 }
714 }
715
716 // clear the dynamically allocated set of dependents
717 (node_ptr->dependents).clear();
718 // Update the stat for numOps completed
719 owner.updateNumOps(node_ptr->robNum);
720 // delete node
721 delete node_ptr;
722 // remove from graph
723 depGraph.erase(graph_itr);
724 }
725
726 if (debug::TraceCPUData) {
728 }
729
730 // If the size of the dependency graph is less than the dependency window
731 // then read from the trace file to populate the graph next time we are in
732 // execute.
733 if (depGraph.size() < windowSize && !traceComplete)
734 nextRead = true;
735
736 // If not waiting for retry, attempt to schedule next event
737 if (!retryPkt) {
738 // We might have new dep-free nodes in the list which will have execute
739 // tick greater than or equal to curTick. But a new dep-free node might
740 // have its execute tick earlier. Therefore, attempt to reschedule. It
741 // could happen that the readyList is empty and we got here via a
742 // last remaining response. So, either the trace is complete or there
743 // are pending nodes in the depFreeQueue. The checking is done in the
744 // execute() control flow, so schedule an event to go via that flow.
745 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
746 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
747 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
748 next_event_tick);
749 owner.schedDcacheNextEvent(next_event_tick);
750 }
751}
752
753void
755 Tick exec_tick)
756{
757 ReadyNode ready_node;
758 ready_node.seqNum = seq_num;
759 ready_node.execTick = exec_tick;
760
761 // Iterator to readyList
762 auto itr = readyList.begin();
763
764 // If the readyList is empty, simply insert the new node at the beginning
765 // and return
766 if (itr == readyList.end()) {
767 readyList.insert(itr, ready_node);
769 std::max<double>(readyList.size(),
771 return;
772 }
773
774 // If the new node has its execution tick equal to the first node in the
775 // list then go to the next node. If the first node in the list failed
776 // to execute, its position as the first is thus maintained.
777 if (retryPkt) {
778 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
779 itr++;
780 }
781
782 // Increment the iterator and compare the node pointed to by it to the new
783 // node till the position to insert the new node is found.
784 bool found = false;
785 while (!found && itr != readyList.end()) {
786 // If the execution tick of the new node is less than the node then
787 // this is the position to insert
788 if (exec_tick < itr->execTick) {
789 found = true;
790 // If the execution tick of the new node is equal to the node then
791 // sort in ascending order of sequence numbers
792 } else if (exec_tick == itr->execTick) {
793 // If the sequence number of the new node is less than the node
794 // then this is the position to insert
795 if (seq_num < itr->seqNum) {
796 found = true;
797 // Else go to next node
798 } else {
799 itr++;
800 }
801 } else {
802 // If the execution tick of the new node is greater than the node
803 // then go to the next node.
804 itr++;
805 }
806 }
807 readyList.insert(itr, ready_node);
808 // Update the stat for max size reached of the readyList
809 elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
811}
812
813void
815{
816 auto itr = readyList.begin();
817 if (itr == readyList.end()) {
818 DPRINTF(TraceCPUData, "readyList is empty.\n");
819 return;
820 }
821 DPRINTF(TraceCPUData, "Printing readyList:\n");
822 while (itr != readyList.end()) {
823 auto graph_itr = depGraph.find(itr->seqNum);
824 [[maybe_unused]] GraphNode* node_ptr = graph_itr->second;
825 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
826 node_ptr->typeToStr(), itr->execTick);
827 itr++;
828 }
829}
830
832 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) :
833 sizeROB(max_rob),
834 sizeStoreBuffer(max_stores),
835 sizeLoadBuffer(max_loads),
836 oldestInFlightRobNum(UINT64_MAX),
837 numInFlightLoads(0),
838 numInFlightStores(0)
839{}
840
841void
843{
844 // Occupy ROB entry for the issued node
845 // Merely maintain the oldest node, i.e. numerically least robNum by saving
846 // it in the variable oldestInFLightRobNum.
847 inFlightNodes[new_node->seqNum] = new_node->robNum;
848 oldestInFlightRobNum = inFlightNodes.begin()->second;
849
850 // Occupy Load/Store Buffer entry for the issued node if applicable
851 if (new_node->isLoad()) {
852 ++numInFlightLoads;
853 } else if (new_node->isStore()) {
854 ++numInFlightStores;
855 } // else if it is a non load/store node, no buffer entry is occupied
856
857 printOccupancy();
858}
859
860void
862{
863 assert(!inFlightNodes.empty());
864 DPRINTFR(TraceCPUData,
865 "\tClearing done seq. num %d from inFlightNodes..\n",
866 done_node->seqNum);
867
868 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
869 inFlightNodes.erase(done_node->seqNum);
870
871 if (inFlightNodes.empty()) {
872 // If we delete the only in-flight node and then the
873 // oldestInFlightRobNum is set to it's initialized (max) value.
874 oldestInFlightRobNum = UINT64_MAX;
875 } else {
876 // Set the oldest in-flight node rob number equal to the first node in
877 // the inFlightNodes since that will have the numerically least value.
878 oldestInFlightRobNum = inFlightNodes.begin()->second;
879 }
880
881 DPRINTFR(TraceCPUData,
882 "\tCleared. inFlightNodes.size() = %d, "
883 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
884 oldestInFlightRobNum);
885
886 // A store is considered complete when a request is sent, thus ROB entry is
887 // freed. But it occupies an entry in the Store Buffer until its response
888 // is received. A load is considered complete when a response is received,
889 // thus both ROB and Load Buffer entries can be released.
890 if (done_node->isLoad()) {
891 assert(numInFlightLoads != 0);
892 --numInFlightLoads;
893 }
894 // For normal writes, we send the requests out and clear a store buffer
895 // entry on response. For writes which are strictly ordered, for e.g.
896 // writes to device registers, we do that within release() which is called
897 // when node is executed and taken off from readyList.
898 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
899 releaseStoreBuffer();
900 }
901}
902
903void
905{
906 assert(numInFlightStores != 0);
907 --numInFlightStores;
908}
909
910bool
912 const GraphNode* new_node) const
913{
914 uint16_t num_in_flight_nodes;
915 if (inFlightNodes.empty()) {
916 num_in_flight_nodes = 0;
917 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
918 " #in-flight nodes = 0", new_node->seqNum);
919 } else if (new_node->robNum > oldestInFlightRobNum) {
920 // This is the intuitive case where new dep-free node is younger
921 // instruction than the oldest instruction in-flight. Thus we make sure
922 // in_flight_nodes does not overflow.
923 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
924 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
925 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
926 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
927 } else {
928 // This is the case where an instruction older than the oldest in-
929 // flight instruction becomes dep-free. Thus we must have already
930 // accounted for the entry in ROB for this new dep-free node.
931 // Immediately after this check returns true, oldestInFlightRobNum will
932 // be updated in occupy(). We simply let this node issue now.
933 num_in_flight_nodes = 0;
934 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
935 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
936 new_node->seqNum, new_node->robNum);
937 }
938 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
939 numInFlightLoads, sizeLoadBuffer,
940 numInFlightStores, sizeStoreBuffer);
941 // Check if resources are available to issue the specific node
942 if (num_in_flight_nodes >= sizeROB) {
943 return false;
944 }
945 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
946 return false;
947 }
948 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
949 return false;
950 }
951 return true;
952}
953
954bool
956{
957 // Return true if there is at least one read or write request in flight
958 return (numInFlightStores != 0 || numInFlightLoads != 0);
959}
960
961void
963{
964 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
965 "LQ = %d/%d, SQ = %d/%d.\n",
966 oldestInFlightRobNum,
967 numInFlightLoads, sizeLoadBuffer,
968 numInFlightStores, sizeStoreBuffer);
969}
970
972 statistics::Group *parent, const std::string& _name) :
973 statistics::Group(parent, _name.c_str()),
974 ADD_STAT(numSendAttempted, statistics::units::Count::get(),
975 "Number of first attempts to send a request"),
976 ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
977 "Number of successful first attempts"),
978 ADD_STAT(numSendFailed, statistics::units::Count::get(),
979 "Number of failed first attempts"),
980 ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
981 "Number of successful retries"),
982 ADD_STAT(instLastTick, statistics::units::Tick::get(),
983 "Last tick simulated from the fixed inst trace")
984{
985
986}
987
988Tick
990{
991 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
992 " IcacheGen: fixed issue with retry.\n");
993
994 if (nextExecute()) {
995 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
996 return currElement.tick;
997 } else {
998 panic("Read of first message in the trace failed.\n");
999 return MaxTick;
1000 }
1001}
1002
1003bool
1005{
1006 // If there is a retry packet, try to send it
1007 if (retryPkt) {
1008 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1009
1010 if (!port.sendTimingReq(retryPkt)) {
1011 // Still blocked! This should never occur.
1012 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1013 return false;
1014 }
1016 } else {
1017 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1018
1019 // try sending current element
1020 assert(currElement.isValid());
1021
1023
1026 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1028 // return false to indicate not to schedule next event
1029 return false;
1030 } else {
1032 }
1033 }
1034 // If packet was sent successfully, either retryPkt or currElement, return
1035 // true to indicate to schedule event at current Tick plus delta. If packet
1036 // was sent successfully and there is no next packet to send, return false.
1037 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1038 "element.\n");
1039 retryPkt = nullptr;
1040 // Read next element into currElement, currElement gets cleared so save the
1041 // tick to calculate delta
1042 Tick last_tick = currElement.tick;
1043 if (nextExecute()) {
1044 assert(currElement.tick >= last_tick);
1045 delta = currElement.tick - last_tick;
1046 }
1047 return !traceComplete;
1048}
1049
1050void
1052{
1053 trace.reset();
1054}
1055
1056bool
1058{
1059 if (traceComplete)
1060 // We are at the end of the file, thus we have no more messages.
1061 // Return false.
1062 return false;
1063
1064
1065 //Reset the currElement to the default values
1067
1068 // Read the next line to get the next message. If that fails then end of
1069 // trace has been reached and traceComplete needs to be set in addition
1070 // to returning false. If successful then next message is in currElement.
1071 if (!trace.read(&currElement)) {
1072 traceComplete = true;
1074 return false;
1075 }
1076
1077 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1078 currElement.cmd.isRead() ? 'r' : 'w',
1083
1084 return true;
1085}
1086
1087bool
1090{
1091
1092 // Create new request
1093 auto req = std::make_shared<Request>(addr, size, flags, requestorId);
1094 req->setPC(pc);
1095
1096 // If this is not done it triggers assert in L1 cache for invalid contextId
1097 req->setContext(ContextID(0));
1098
1099 // Embed it in a packet
1100 PacketPtr pkt = new Packet(req, cmd);
1101
1102 uint8_t* pkt_data = new uint8_t[req->getSize()];
1103 pkt->dataDynamic(pkt_data);
1104
1105 if (cmd.isWrite()) {
1106 memset(pkt_data, 0xA, req->getSize());
1107 }
1108
1109 // Call RequestPort method to send a timing request for this packet
1110 bool success = port.sendTimingReq(pkt);
1111 if (!success) {
1112 // If it fails, save the packet to retry when a retry is signalled by
1113 // the cache
1114 retryPkt = pkt;
1115 }
1116 return success;
1117}
1118
1119void
1121{
1122 // Schedule an event to go through the control flow in the same tick as
1123 // retry is received
1124 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1125 " event @%lli.\n", curTick());
1127}
1128
1129void
1131{
1132 // Schedule an event to go through the execute flow in the same tick as
1133 // retry is received
1134 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1135 " event @%lli.\n", curTick());
1137}
1138
1139void
1141{
1142 if (!dcacheNextEvent.scheduled()) {
1143 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1144 when);
1147 } else if (when < dcacheNextEvent.when()) {
1148 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1149 " to %lli.\n", dcacheNextEvent.when(), when);
1151 }
1152
1153}
1154
1155bool
1157{
1158 // All responses on the instruction fetch side are ignored. Simply delete
1159 // the packet to free allocated memory
1160 delete pkt;
1161
1162 return true;
1163}
1164
1165void
1167{
1169}
1170
1171void
1173{
1174 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1176}
1177
1178bool
1180{
1181 // Handle the responses for data memory requests which is done inside the
1182 // elastic data generator
1184 // After processing the response delete the packet to free
1185 // memory
1186 delete pkt;
1187
1188 return true;
1189}
1190
1191void
1193{
1195}
1196
1198 const std::string& filename, const double time_multiplier) :
1199 trace(filename),
1200 timeMultiplier(time_multiplier),
1201 microOpCount(0)
1202{
1203 // Create a protobuf message for the header and read it from the stream
1204 ProtoMessage::InstDepRecordHeader header_msg;
1205 if (!trace.read(header_msg)) {
1206 panic("Failed to read packet header from %s\n", filename);
1207
1208 if (header_msg.tick_freq() != sim_clock::Frequency) {
1209 panic("Trace %s was recorded with a different tick frequency %d\n",
1210 header_msg.tick_freq());
1211 }
1212 } else {
1213 // Assign window size equal to the field in the trace that was recorded
1214 // when the data dependency trace was captured in the o3cpu model
1215 windowSize = header_msg.window_size();
1216 }
1217}
1218
1219void
1221{
1222 trace.reset();
1223}
1224
1225bool
1227{
1228 ProtoMessage::InstDepRecord pkt_msg;
1229 if (trace.read(pkt_msg)) {
1230 // Required fields
1231 element->seqNum = pkt_msg.seq_num();
1232 element->type = pkt_msg.type();
1233 // Scale the compute delay to effectively scale the Trace CPU frequency
1234 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1235
1236 // Repeated field robDepList
1237 element->robDep.clear();
1238 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1239 element->robDep.push_back(pkt_msg.rob_dep(i));
1240 }
1241
1242 // Repeated field
1243 element->regDep.clear();
1244 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1245 // There is a possibility that an instruction has both, a register
1246 // and order dependency on an instruction. In such a case, the
1247 // register dependency is omitted
1248 bool duplicate = false;
1249 for (auto &dep: element->robDep) {
1250 duplicate |= (pkt_msg.reg_dep(i) == dep);
1251 }
1252 if (!duplicate)
1253 element->regDep.push_back(pkt_msg.reg_dep(i));
1254 }
1255
1256 // Optional fields
1257 if (pkt_msg.has_p_addr())
1258 element->physAddr = pkt_msg.p_addr();
1259 else
1260 element->physAddr = 0;
1261
1262 if (pkt_msg.has_v_addr())
1263 element->virtAddr = pkt_msg.v_addr();
1264 else
1265 element->virtAddr = 0;
1266
1267 if (pkt_msg.has_size())
1268 element->size = pkt_msg.size();
1269 else
1270 element->size = 0;
1271
1272 if (pkt_msg.has_flags())
1273 element->flags = pkt_msg.flags();
1274 else
1275 element->flags = 0;
1276
1277 if (pkt_msg.has_pc())
1278 element->pc = pkt_msg.pc();
1279 else
1280 element->pc = 0;
1281
1282 // ROB occupancy number
1283 ++microOpCount;
1284 if (pkt_msg.has_weight()) {
1285 microOpCount += pkt_msg.weight();
1286 }
1287 element->robNum = microOpCount;
1288 return true;
1289 }
1290
1291 // We have reached the end of the file
1292 return false;
1293}
1294
1295bool
1297{
1298 for (auto it = regDep.begin(); it != regDep.end(); it++) {
1299 if (*it == reg_dep) {
1300 // If register dependency is found, erase it.
1301 regDep.erase(it);
1302 DPRINTFR(TraceCPUData,
1303 "\tFor %lli: Marking register dependency %lli done.\n",
1304 seqNum, reg_dep);
1305 return true;
1306 }
1307 }
1308
1309 // Return false if the dependency is not found
1310 return false;
1311}
1312
1313bool
1315{
1316 for (auto it = robDep.begin(); it != robDep.end(); it++) {
1317 if (*it == rob_dep) {
1318 // If the rob dependency is found, erase it.
1319 robDep.erase(it);
1320 DPRINTFR(TraceCPUData,
1321 "\tFor %lli: Marking ROB dependency %lli done.\n",
1322 seqNum, rob_dep);
1323 return true;
1324 }
1325 }
1326 return false;
1327}
1328
1329bool
1331{
1332 // If it is an rob dependency then remove it
1333 if (!removeRobDep(done_seq_num)) {
1334 // If it is not an rob dependency then it must be a register dependency
1335 // If the register dependency is not found, it violates an assumption
1336 // and must be caught by assert.
1337 [[maybe_unused]] bool regdep_found = removeRegDep(done_seq_num);
1338 assert(regdep_found);
1339 }
1340 // Return true if the node is dependency free
1341 return robDep.empty() && regDep.empty();
1342}
1343
1344void
1346{
1347#if TRACING_ON
1348 DPRINTFR(TraceCPUData, "%lli", seqNum);
1349 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1350 if (isLoad() || isStore()) {
1351 DPRINTFR(TraceCPUData, ",%i", physAddr);
1352 DPRINTFR(TraceCPUData, ",%i", size);
1353 DPRINTFR(TraceCPUData, ",%i", flags);
1354 }
1355 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1356 DPRINTFR(TraceCPUData, "robDep:");
1357 for (auto &dep: robDep) {
1358 DPRINTFR(TraceCPUData, ",%lli", dep);
1359 }
1360 DPRINTFR(TraceCPUData, "regDep:");
1361 for (auto &dep: regDep) {
1362 DPRINTFR(TraceCPUData, ",%lli", dep);
1363 }
1364 auto child_itr = dependents.begin();
1365 DPRINTFR(TraceCPUData, "dependents:");
1366 while (child_itr != dependents.end()) {
1367 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1368 child_itr++;
1369 }
1370
1371 DPRINTFR(TraceCPUData, "\n");
1372#endif // TRACING_ON
1373}
1374
1375std::string
1377{
1378 return Record::RecordType_Name(type);
1379}
1380
1382 : trace(filename)
1383{
1384 // Create a protobuf message for the header and read it from the stream
1385 ProtoMessage::PacketHeader header_msg;
1386 if (!trace.read(header_msg)) {
1387 panic("Failed to read packet header from %s\n", filename);
1388
1389 if (header_msg.tick_freq() != sim_clock::Frequency) {
1390 panic("Trace %s was recorded with a different tick frequency %d\n",
1391 header_msg.tick_freq());
1392 }
1393 }
1394}
1395
1396void
1398{
1399 trace.reset();
1400}
1401
1402bool
1404{
1405 ProtoMessage::Packet pkt_msg;
1406 if (trace.read(pkt_msg)) {
1407 element->cmd = pkt_msg.cmd();
1408 element->addr = pkt_msg.addr();
1409 element->blocksize = pkt_msg.size();
1410 element->tick = pkt_msg.tick();
1411 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1412 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1413 return true;
1414 }
1415
1416 // We have reached the end of the file
1417 return false;
1418}
1419
1420} // namespace gem5
#define DPRINTFR(x,...)
Definition trace.hh:224
#define DPRINTF(x,...)
Definition trace.hh:210
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition protoio.cc:182
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition base.cc:310
virtual Port & getDataPort()=0
Purely virtual method that returns a reference to the data port.
gem5::BaseCPU::BaseCPUStats baseStats
virtual Port & getInstPort()=0
Purely virtual method that returns a reference to the instruction port.
unsigned int cacheLineSize() const
Get the cache line size of the system.
Definition base.hh:397
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick clockPeriod() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
bool isRead() const
Definition packet.hh:227
Command cmd
Definition packet.hh:217
bool isWrite() const
Definition packet.hh:228
const std::string _name
Definition named.hh:41
virtual std::string name() const
Definition named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
bool isWrite() const
Definition packet.hh:594
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
void takeOverFrom(Port *old)
A utility function to make it easier to swap out ports.
Definition port.hh:137
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:530
uint64_t FlagsType
Definition request.hh:100
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt.
The struct GraphNode stores an instruction in the trace file.
Definition trace_cpu.hh:558
bool isLoad() const
Is the node a load.
Definition trace_cpu.hh:613
RecordType type
Type of the node corresponding to the instruction modeled by it.
Definition trace_cpu.hh:576
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition trace_cpu.hh:632
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
std::vector< GraphNode * > dependents
A vector of nodes dependent (outgoing) on this node.
Definition trace_cpu.hh:610
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Request::Flags flags
Request flags if any.
Definition trace_cpu.hh:588
RegDepList regDep
List of register dependencies (incoming) if any.
Definition trace_cpu.hh:603
uint32_t size
Size of request if any.
Definition trace_cpu.hh:585
NodeRobNum robNum
ROB occupancy number.
Definition trace_cpu.hh:570
std::string typeToStr() const
Return string specifying the type of the node.
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
RobDepList robDep
List of order dependencies.
Definition trace_cpu.hh:594
Addr physAddr
The address for the request if any.
Definition trace_cpu.hh:579
Addr virtAddr
The virtual address for the request if any.
Definition trace_cpu.hh:582
bool isStore() const
Is the node a store.
Definition trace_cpu.hh:616
uint64_t compDelay
Computational delay.
Definition trace_cpu.hh:597
NodeSeqNum seqNum
Instruction sequence number.
Definition trace_cpu.hh:567
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition trace_cpu.cc:842
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition trace_cpu.cc:861
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition trace_cpu.cc:955
void printOccupancy()
Print resource occupancy for debugging.
Definition trace_cpu.cc:962
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition trace_cpu.cc:831
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition trace_cpu.cc:904
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition trace_cpu.cc:911
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition trace_cpu.hh:765
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition trace_cpu.hh:782
void reset()
Reset the stream such that it can be played once again.
const uint32_t windowSize
Window size within which to check for dependencies.
Definition trace_cpu.hh:985
bool readNextWindow()
Reads a line of the trace file.
Definition trace_cpu.cc:297
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition trace_cpu.cc:571
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition trace_cpu.cc:814
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition trace_cpu.hh:931
TraceCPU & owner
Reference of the TraceCPU.
Definition trace_cpu.hh:950
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition trace_cpu.hh:956
Tick init()
Called from TraceCPU init().
Definition trace_cpu.cc:252
uint64_t NodeSeqNum
Node sequence number type.
Definition trace_cpu.hh:543
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition trace_cpu.cc:283
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node's source dependencies are complete.
Definition trace_cpu.cc:647
std::list< ReadyNode > readyList
List of nodes that are ready to execute.
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition trace_cpu.hh:965
bool traceComplete
Set to true when end of trace is reached.
Definition trace_cpu.hh:968
void exit()
Exit the ElasticDataGen.
Definition trace_cpu.cc:291
bool nextRead
Set to true when the next window of instructions need to be read.
Definition trace_cpu.hh:971
std::queue< const GraphNode * > depFreeQueue
Queue of dependency-free nodes that are pending issue because resources are not available.
gem5::TraceCPU::ElasticDataGen::ElasticDataGenStatGroup elasticStats
std::unordered_map< NodeSeqNum, GraphNode * > depGraph
Store the depGraph of GraphNodes.
Definition trace_cpu.hh:994
void addDepsOnParent(GraphNode *new_node, T &dep_list)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition trace_cpu.cc:350
bool execComplete
Set true when execution of trace is complete.
Definition trace_cpu.hh:974
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition trace_cpu.hh:953
HardwareResource hwResource
Hardware resources required to contain in-flight nodes and to throttle issuing of new nodes when reso...
Definition trace_cpu.hh:991
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition trace_cpu.cc:754
InputStream trace
Input stream used for reading the input trace file.
Definition trace_cpu.hh:959
void execute()
This is the main execute function which consumes nodes from the sorted readyList.
Definition trace_cpu.cc:375
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it.
Definition trace_cpu.cc:687
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
void reset()
Reset the stream such that it can be played once again.
TraceCPU & owner
Reference of the TraceCPU.
Definition trace_cpu.hh:480
int64_t delta
Stores the difference in the send ticks of the current and last packets.
Definition trace_cpu.hh:502
Tick init()
Called from TraceCPU init().
Definition trace_cpu.cc:989
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition trace_cpu.hh:486
gem5::TraceCPU::FixedRetryGen::FixedRetryGenStatGroup fixedStats
TraceElement currElement
Store an element read from the trace to send as the next packet.
Definition trace_cpu.hh:510
bool nextExecute()
Reads a line of the trace file.
bool traceComplete
Set to true when end of trace is reached.
Definition trace_cpu.hh:507
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition trace_cpu.hh:495
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached.
Definition trace_cpu.hh:474
void exit()
Exit the FixedRetryGen.
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition trace_cpu.hh:483
InputStream trace
Input stream used for reading the input trace file.
Definition trace_cpu.hh:489
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model.
Definition trace_cpu.hh:143
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed.
Port & getDataPort()
Used to get a reference to the dcache port.
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Port & getInstPort()
Used to get a reference to the icache port.
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests.
gem5::TraceCPU::TraceStats traceStats
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
std::string instTraceFile
File names for input instruction and data traces.
Definition trace_cpu.hh:319
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
TraceCPU(const TraceCPUParams &params)
Definition trace_cpu.cc:49
void updateNumOps(uint64_t rob_num)
Definition trace_cpu.cc:86
std::string dataTraceFile
Definition trace_cpu.hh:319
uint64_t progressMsgThreshold
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition trace_cpu.cc:105
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
EventFunctionWrapper dcacheNextEvent
Event for the control flow method schedDcacheNext()
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
void takeOverFrom(BaseCPU *oldCPU)
Load the state of a CPU from the previous CPU object, invoked on all new CPUs that are about to be sw...
Definition trace_cpu.cc:97
EventFunctionWrapper icacheNextEvent
Event for the control flow method schedIcacheNext()
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition trace_cpu.cc:188
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace.
Definition trace_cpu.cc:174
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace.
Definition trace_cpu.cc:148
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Statistics container.
Definition group.hh:93
Counter value() const
Return the current value of this stat as its base type.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
void reschedule(Event &event, Tick when, bool always=false)
Definition eventq.hh:1030
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
uint8_t flags
Definition helpers.cc:66
#define inform(...)
Definition logging.hh:257
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 4 > pc
Bitfield< 15 > system
Definition misc.hh:1004
Bitfield< 3 > addr
Definition types.hh:84
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition core.cc:47
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
const Tick MaxTick
Definition types.hh:60
int ContextID
Globally unique thread context ID.
Definition types.hh:239
statistics::Scalar numCycles
Definition base.hh:640
statistics::Scalar maxDependents
Stats for data memory accesses replayed.
statistics::Scalar dataLastTick
Tick when ElasticDataGen completes execution.
ElasticDataGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition trace_cpu.cc:225
Struct to store a ready-to-execute node and its execution tick.
Definition trace_cpu.hh:648
Tick execTick
The tick at which the ready node must be executed.
Definition trace_cpu.hh:653
NodeSeqNum seqNum
The sequence number of the ready node.
Definition trace_cpu.hh:650
statistics::Scalar numSendAttempted
Stats for instruction accesses replayed.
Definition trace_cpu.hh:518
FixedRetryGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition trace_cpu.cc:971
statistics::Scalar instLastTick
Last simulated tick by the FixedRetryGen.
Definition trace_cpu.hh:523
This struct stores a line in the trace file.
Definition trace_cpu.hh:336
void clear()
Make this element invalid.
Definition trace_cpu.hh:366
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition trace_cpu.hh:339
Addr addr
The address for the request.
Definition trace_cpu.hh:342
Addr blocksize
The size of the access for the request.
Definition trace_cpu.hh:345
Request::FlagsType flags
Potential request flags to use.
Definition trace_cpu.hh:351
Tick tick
The time at which the request should be sent.
Definition trace_cpu.hh:348
bool isValid() const
Check validity of this element.
Definition trace_cpu.hh:361
statistics::Scalar numSchedDcacheEvent
statistics::Scalar numSchedIcacheEvent
statistics::Formula cpi
Stat for the CPI.
TraceStats(TraceCPU *trace)
Definition trace_cpu.cc:207
statistics::Scalar numOps
Stat for number of simulated micro-ops.
const std::string & name()
Definition trace.cc:48

Generated on Mon Jul 10 2023 14:24:30 for gem5 by doxygen 1.9.7