gem5 v24.0.0.0
Loading...
Searching...
No Matches
trace_cpu.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013 - 2016, 2023 Arm Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
39
40#include "base/compiler.hh"
41#include "sim/sim_exit.hh"
42#include "sim/system.hh"
43
44namespace gem5
45{
46
47// Declare and initialize the static counter for number of trace CPUs.
49
50TraceCPU::TraceCPU(const TraceCPUParams &params)
51 : ClockedObject(params),
52 cacheLineSize(params.system->cacheLineSize()),
53 icachePort(this),
54 dcachePort(this),
55 instRequestorID(params.system->getRequestorId(this, "inst")),
56 dataRequestorID(params.system->getRequestorId(this, "data")),
57 instTraceFile(params.instTraceFile),
58 dataTraceFile(params.dataTraceFile),
59 icacheGen(*this, ".iside", icachePort, instRequestorID, instTraceFile),
60 dcacheGen(*this, ".dside", dcachePort, dataRequestorID, dataTraceFile,
61 params),
62 icacheNextEvent([this]{ schedIcacheNext(); }, name()),
63 dcacheNextEvent([this]{ schedDcacheNext(); }, name()),
64 oneTraceComplete(false),
65 traceOffset(0),
66 execCompleteEvent(nullptr),
67 enableEarlyExit(params.enableEarlyExit),
68 progressMsgInterval(params.progressMsgInterval),
69 progressMsgThreshold(params.progressMsgInterval), traceStats(this)
70{
71 // Increment static counter for number of Trace CPUs.
73
74 // Check that the python parameters for sizes of ROB, store buffer and
75 // load buffer do not overflow the corresponding C++ variables.
76 fatal_if(params.sizeROB > UINT16_MAX,
77 "ROB size set to %d exceeds the max. value of %d.",
78 params.sizeROB, UINT16_MAX);
79 fatal_if(params.sizeStoreBuffer > UINT16_MAX,
80 "ROB size set to %d exceeds the max. value of %d.",
81 params.sizeROB, UINT16_MAX);
82 fatal_if(params.sizeLoadBuffer > UINT16_MAX,
83 "Load buffer size set to %d exceeds the max. value of %d.",
84 params.sizeLoadBuffer, UINT16_MAX);
85}
86
87void
88TraceCPU::updateNumOps(uint64_t rob_num)
89{
90 traceStats.numOps = rob_num;
91 if (progressMsgInterval != 0 &&
93 inform("%s: %i insts committed\n", name(), progressMsgThreshold);
95 }
96}
97
98void
100{
101 DPRINTF(TraceCPUInst, "Instruction fetch request trace file is \"%s\".\n",
103 DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
105
107
108 // Get the send tick of the first instruction read request
109 Tick first_icache_tick = icacheGen.init();
110
111 // Get the send tick of the first data read/write request
112 Tick first_dcache_tick = dcacheGen.init();
113
114 // Set the trace offset as the minimum of that in both traces
115 traceOffset = std::min(first_icache_tick, first_dcache_tick);
116 inform("%s: Time offset (tick) found as min of both traces is %lli.",
117 name(), traceOffset);
118
119 // Schedule next icache and dcache event by subtracting the offset
120 schedule(icacheNextEvent, first_icache_tick - traceOffset);
121 schedule(dcacheNextEvent, first_dcache_tick - traceOffset);
122
123 // Adjust the trace offset for the dcache generator's ready nodes
124 // We don't need to do this for the icache generator as it will
125 // send its first request at the first event and schedule subsequent
126 // events using a relative tick delta
128
129 // If the Trace CPU simulation is configured to exit on any one trace
130 // completion then we don't need a counted event to count down all Trace
131 // CPUs in the system. If not then instantiate a counted event.
132 if (!enableEarlyExit) {
133 // The static counter for number of Trace CPUs is correctly set at
134 // this point so create an event and pass it.
135 execCompleteEvent = new CountedExitEvent("end of all traces reached.",
137 }
138
139}
140
141void
143{
144 DPRINTF(TraceCPUInst, "IcacheGen event.\n");
145
146 // Try to send the current packet or a retry packet if there is one
147 bool sched_next = icacheGen.tryNext();
148 // If packet sent successfully, schedule next event
149 if (sched_next) {
150 DPRINTF(TraceCPUInst,
151 "Scheduling next icacheGen event at %d.\n",
155 } else {
156 // check if traceComplete. If not, do nothing because sending failed
157 // and next event will be scheduled via RecvRetry()
159 // If this is the first trace to complete, set the variable. If it
160 // is already set then both traces are complete to exit sim.
162 }
163 }
164 return;
165}
166
167void
169{
170 DPRINTF(TraceCPUData, "DcacheGen event.\n");
171
172 // Update stat for numCycles
174
178 }
179}
180
181void
183{
184 if (!oneTraceComplete) {
185 oneTraceComplete = true;
186 } else {
187 // Schedule event to indicate execution is complete as both
188 // instruction and data access traces have been played back.
189 inform("%s: Execution complete.", name());
190 // If the replay is configured to exit early, that is when any one
191 // execution is complete then exit immediately and return. Otherwise,
192 // schedule the counted exit that counts down completion of each Trace
193 // CPU.
194 if (enableEarlyExit) {
195 exitSimLoop("End of trace reached");
196 } else {
198 }
199 }
200}
202 statistics::Group(trace),
203 ADD_STAT(numSchedDcacheEvent, statistics::units::Count::get(),
204 "Number of events scheduled to trigger data request generator"),
205 ADD_STAT(numSchedIcacheEvent, statistics::units::Count::get(),
206 "Number of events scheduled to trigger instruction request "
207 "generator"),
208 ADD_STAT(numOps, statistics::units::Count::get(),
209 "Number of micro-ops simulated by the Trace CPU"),
210 ADD_STAT(cpi, statistics::units::Rate<
211 statistics::units::Cycle, statistics::units::Count>::get(),
212 "Cycles per micro-op used as a proxy for CPI",
213 trace->traceStats.numCycles / numOps)
214{
215 cpi.precision(6);
216}
217
220 const std::string& _name) :
221 statistics::Group(parent, _name.c_str()),
222 ADD_STAT(maxDependents, statistics::units::Count::get(),
223 "Max number of dependents observed on a node"),
224 ADD_STAT(maxReadyListSize, statistics::units::Count::get(),
225 "Max size of the ready list observed"),
226 ADD_STAT(numSendAttempted, statistics::units::Count::get(),
227 "Number of first attempts to send a request"),
228 ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
229 "Number of successful first attempts"),
230 ADD_STAT(numSendFailed, statistics::units::Count::get(),
231 "Number of failed first attempts"),
232 ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
233 "Number of successful retries"),
234 ADD_STAT(numSplitReqs, statistics::units::Count::get(),
235 "Number of split requests"),
236 ADD_STAT(numSOLoads, statistics::units::Count::get(),
237 "Number of strictly ordered loads"),
238 ADD_STAT(numSOStores, statistics::units::Count::get(),
239 "Number of strictly ordered stores"),
240 ADD_STAT(dataLastTick, statistics::units::Tick::get(),
241 "Last tick simulated from the elastic data trace")
242{
243}
244
245Tick
247{
248 DPRINTF(TraceCPUData, "Initializing data memory request generator "
249 "DcacheGen: elastic issue with retry.\n");
250
252 "Trace has %d elements. It must have at least %d elements.",
253 depGraph.size(), 2 * windowSize);
254 DPRINTF(TraceCPUData, "After 1st read, depGraph size:%d.\n",
255 depGraph.size());
256
258 "Trace has %d elements. It must have at least %d elements.",
259 depGraph.size(), 2 * windowSize);
260 DPRINTF(TraceCPUData, "After 2st read, depGraph size:%d.\n",
261 depGraph.size());
262
263 // Print readyList
264 if (debug::TraceCPUData) {
266 }
267 auto free_itr = readyList.begin();
268 DPRINTF(TraceCPUData,
269 "Execute tick of the first dependency free node %lli is %d.\n",
270 free_itr->seqNum, free_itr->execTick);
271 // Return the execute tick of the earliest ready node so that an event
272 // can be scheduled to call execute()
273 return (free_itr->execTick);
274}
275
276void
278{
279 for (auto& free_node : readyList) {
280 free_node.execTick -= offset;
281 }
282}
283
284void
289
290bool
292{
293 // Read and add next window
294 DPRINTF(TraceCPUData, "Reading next window from file.\n");
295
296 if (traceComplete) {
297 // We are at the end of the file, thus we have no more records.
298 // Return false.
299 return false;
300 }
301
302 DPRINTF(TraceCPUData, "Start read: Size of depGraph is %d.\n",
303 depGraph.size());
304
305 uint32_t num_read = 0;
306 while (num_read != windowSize) {
307
308 // Create a new graph node
309 GraphNode* new_node = new GraphNode;
310
311 // Read the next line to get the next record. If that fails then end of
312 // trace has been reached and traceComplete needs to be set in addition
313 // to returning false.
314 if (!trace.read(new_node)) {
315 DPRINTF(TraceCPUData, "\tTrace complete!\n");
316 traceComplete = true;
317 return false;
318 }
319
320 // Annotate the ROB dependencies of the new node onto the parent nodes.
321 addDepsOnParent(new_node, new_node->robDep);
322 // Annotate the register dependencies of the new node onto the parent
323 // nodes.
324 addDepsOnParent(new_node, new_node->regDep);
325
326 num_read++;
327 // Add to map
328 depGraph[new_node->seqNum] = new_node;
329 if (new_node->robDep.empty() && new_node->regDep.empty()) {
330 // Source dependencies are already complete, check if resources
331 // are available and issue. The execution time is approximated
332 // to current time plus the computational delay.
333 checkAndIssue(new_node);
334 }
335 }
336
337 DPRINTF(TraceCPUData, "End read: Size of depGraph is %d.\n",
338 depGraph.size());
339 return true;
340}
341
342template<typename T>
343void
345{
346 auto dep_it = dep_list.begin();
347 while (dep_it != dep_list.end()) {
348 // We look up the valid dependency, i.e. the parent of this node
349 auto parent_itr = depGraph.find(*dep_it);
350 if (parent_itr != depGraph.end()) {
351 // If the parent is found, it is yet to be executed. Append a
352 // pointer to the new node to the dependents list of the parent
353 // node.
354 parent_itr->second->dependents.push_back(new_node);
355 auto num_depts = parent_itr->second->dependents.size();
356 elasticStats.maxDependents = std::max<double>(num_depts,
358 dep_it++;
359 } else {
360 // The dependency is not found in the graph. So consider
361 // the execution of the parent is complete, i.e. remove this
362 // dependency.
363 dep_it = dep_list.erase(dep_it);
364 }
365 }
366}
367
368void
370{
371 DPRINTF(TraceCPUData, "Execute start occupancy:\n");
372 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
373 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
374 depFreeQueue.size());
376
377 // Read next window to make sure that dependents of all dep-free nodes
378 // are in the depGraph
379 if (nextRead) {
381 nextRead = false;
382 }
383
384 // First attempt to issue the pending dependency-free nodes held
385 // in depFreeQueue. If resources have become available for a node,
386 // then issue it, i.e. add the node to readyList.
387 while (!depFreeQueue.empty()) {
388 if (checkAndIssue(depFreeQueue.front(), false)) {
389 DPRINTF(TraceCPUData,
390 "Removing from depFreeQueue: seq. num %lli.\n",
391 (depFreeQueue.front())->seqNum);
392 depFreeQueue.pop();
393 } else {
394 break;
395 }
396 }
397 // Proceed to execute from readyList
398 auto graph_itr = depGraph.begin();
399 auto free_itr = readyList.begin();
400 // Iterate through readyList until the next free node has its execute
401 // tick later than curTick or the end of readyList is reached
402 while (free_itr->execTick <= curTick() && free_itr != readyList.end()) {
403
404 // Get pointer to the node to be executed
405 graph_itr = depGraph.find(free_itr->seqNum);
406 assert(graph_itr != depGraph.end());
407 GraphNode* node_ptr = graph_itr->second;
408
409 // If there is a retryPkt send that else execute the load
410 if (retryPkt) {
411 // The retryPkt must be the request that was created by the
412 // first node in the readyList.
413 if (retryPkt->req->getReqInstSeqNum() != node_ptr->seqNum) {
414 panic("Retry packet's seqence number does not match "
415 "the first node in the readyList.\n");
416 }
419 retryPkt = nullptr;
420 }
421 } else if (node_ptr->isLoad() || node_ptr->isStore()) {
422 // If there is no retryPkt, attempt to send a memory request in
423 // case of a load or store node. If the send fails, executeMemReq()
424 // returns a packet pointer, which we save in retryPkt. In case of
425 // a comp node we don't do anything and simply continue as if the
426 // execution of the comp node succedded.
427 retryPkt = executeMemReq(node_ptr);
428 }
429 // If the retryPkt or a new load/store node failed, we exit from here
430 // as a retry from cache will bring the control to execute(). The
431 // first node in readyList then, will be the failed node.
432 if (retryPkt) {
433 break;
434 }
435
436 // Proceed to remove dependencies for the successfully executed node.
437 // If it is a load which is not strictly ordered and we sent a
438 // request for it successfully, we do not yet mark any register
439 // dependencies complete. But as per dependency modelling we need
440 // to mark ROB dependencies of load and non load/store nodes which
441 // are based on successful sending of the load as complete.
442 if (node_ptr->isLoad() && !node_ptr->isStrictlyOrdered()) {
443 // If execute succeeded mark its dependents as complete
444 DPRINTF(TraceCPUData,
445 "Node seq. num %lli sent. Waking up dependents..\n",
446 node_ptr->seqNum);
447
448 auto child_itr = (node_ptr->dependents).begin();
449 while (child_itr != (node_ptr->dependents).end()) {
450 // ROB dependency of a store on a load must not be removed
451 // after load is sent but after response is received
452 if (!(*child_itr)->isStore() &&
453 (*child_itr)->removeRobDep(node_ptr->seqNum)) {
454
455 // Check if the child node has become dependency free
456 if ((*child_itr)->robDep.empty() &&
457 (*child_itr)->regDep.empty()) {
458
459 // Source dependencies are complete, check if
460 // resources are available and issue
461 checkAndIssue(*child_itr);
462 }
463 // Remove this child for the sent load and point to new
464 // location of the element following the erased element
465 child_itr = node_ptr->dependents.erase(child_itr);
466 } else {
467 // This child is not dependency-free, point to the next
468 // child
469 child_itr++;
470 }
471 }
472 } else {
473 // If it is a strictly ordered load mark its dependents as complete
474 // as we do not send a request for this case. If it is a store or a
475 // comp node we also mark all its dependents complete.
476 DPRINTF(TraceCPUData, "Node seq. num %lli done. Waking"
477 " up dependents..\n", node_ptr->seqNum);
478
479 for (auto child : node_ptr->dependents) {
480 // If the child node is dependency free removeDepOnInst()
481 // returns true.
482 if (child->removeDepOnInst(node_ptr->seqNum)) {
483 // Source dependencies are complete, check if resources
484 // are available and issue
485 checkAndIssue(child);
486 }
487 }
488 }
489
490 // After executing the node, remove from readyList and delete node.
491 readyList.erase(free_itr);
492 // If it is a cacheable load which was sent, don't delete
493 // just yet. Delete it in completeMemAccess() after the
494 // response is received. If it is an strictly ordered
495 // load, it was not sent and all dependencies were simply
496 // marked complete. Thus it is safe to delete it. For
497 // stores and non load/store nodes all dependencies were
498 // marked complete so it is safe to delete it.
499 if (!node_ptr->isLoad() || node_ptr->isStrictlyOrdered()) {
500 // Release all resources occupied by the completed node
501 hwResource.release(node_ptr);
502 // clear the dynamically allocated set of dependents
503 (node_ptr->dependents).clear();
504 // Update the stat for numOps simulated
505 owner.updateNumOps(node_ptr->robNum);
506 // delete node
507 delete node_ptr;
508 // remove from graph
509 depGraph.erase(graph_itr);
510 }
511 // Point to first node to continue to next iteration of while loop
512 free_itr = readyList.begin();
513 } // end of while loop
514
515 // Print readyList, sizes of queues and resource status after updating
516 if (debug::TraceCPUData) {
518 DPRINTF(TraceCPUData, "Execute end occupancy:\n");
519 DPRINTFR(TraceCPUData, "\tdepGraph = %d, readyList = %d, "
520 "depFreeQueue = %d ,", depGraph.size(), readyList.size(),
521 depFreeQueue.size());
523 }
524
525 if (retryPkt) {
526 DPRINTF(TraceCPUData, "Not scheduling an event as expecting a retry"
527 "event from the cache for seq. num %lli.\n",
528 retryPkt->req->getReqInstSeqNum());
529 return;
530 }
531 // If the size of the dependency graph is less than the dependency window
532 // then read from the trace file to populate the graph next time we are in
533 // execute.
534 if (depGraph.size() < windowSize && !traceComplete)
535 nextRead = true;
536
537 // If cache is not blocked, schedule an event for the first execTick in
538 // readyList else retry from cache will schedule the event. If the ready
539 // list is empty then check if the next pending node has resources
540 // available to issue. If yes, then schedule an event for the next cycle.
541 if (!readyList.empty()) {
542 Tick next_event_tick = std::max(readyList.begin()->execTick,
543 curTick());
544 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
545 next_event_tick);
546 owner.schedDcacheNextEvent(next_event_tick);
547 } else if (readyList.empty() && !depFreeQueue.empty() &&
549 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
552 }
553
554 // If trace is completely read, readyList is empty and depGraph is empty,
555 // set execComplete to true
556 if (depGraph.empty() && readyList.empty() && traceComplete &&
558 DPRINTF(TraceCPUData, "\tExecution Complete!\n");
559 execComplete = true;
561 }
562}
563
566{
567 DPRINTF(TraceCPUData, "Executing memory request %lli (phys addr %d, "
568 "virt addr %d, pc %#x, size %d, flags %d).\n",
569 node_ptr->seqNum, node_ptr->physAddr, node_ptr->virtAddr,
570 node_ptr->pc, node_ptr->size, node_ptr->flags);
571
572 // If the request is strictly ordered, do not send it. Just return nullptr
573 // as if it was succesfully sent.
574 if (node_ptr->isStrictlyOrdered()) {
575 node_ptr->isLoad() ? ++elasticStats.numSOLoads :
577 DPRINTF(TraceCPUData, "Skipping strictly ordered request %lli.\n",
578 node_ptr->seqNum);
579 return nullptr;
580 }
581
582 // Check if the request spans two cache lines as this condition triggers
583 // an assert fail in the L1 cache. If it does then truncate the size to
584 // access only until the end of that line and ignore the remainder. The
585 // stat counting this is useful to keep a check on how frequently this
586 // happens. If required the code could be revised to mimick splitting such
587 // a request into two.
588 Addr blk_size = owner.cacheLineSize;
589 Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
590 if (!(blk_offset + node_ptr->size <= blk_size)) {
591 node_ptr->size = blk_size - blk_offset;
593 }
594
595 // Create a request and the packet containing request
596 auto req = std::make_shared<Request>(
597 node_ptr->physAddr, node_ptr->size, node_ptr->flags, requestorId);
598 req->setReqInstSeqNum(node_ptr->seqNum);
599
600 // If this is not done it triggers assert in L1 cache for invalid contextId
601 req->setContext(ContextID(0));
602
603 req->setPC(node_ptr->pc);
604 // If virtual address is valid, set the virtual address field
605 // of the request.
606 if (node_ptr->virtAddr != 0) {
607 req->setVirt(node_ptr->virtAddr, node_ptr->size,
608 node_ptr->flags, requestorId, node_ptr->pc);
609 req->setPaddr(node_ptr->physAddr);
610 req->setReqInstSeqNum(node_ptr->seqNum);
611 }
612
613 PacketPtr pkt;
614 uint8_t* pkt_data = new uint8_t[req->getSize()];
615 if (node_ptr->isLoad()) {
616 pkt = Packet::createRead(req);
617 } else {
618 pkt = Packet::createWrite(req);
619 memset(pkt_data, 0xA, req->getSize());
620 }
621 pkt->dataDynamic(pkt_data);
622
623 // Call RequestPort method to send a timing request for this packet
624 bool success = port.sendTimingReq(pkt);
626
627 if (!success) {
628 // If it fails, return the packet to retry when a retry is signalled by
629 // the cache
631 DPRINTF(TraceCPUData, "Send failed. Saving packet for retry.\n");
632 return pkt;
633 } else {
634 // It is succeeds, return nullptr
636 return nullptr;
637 }
638}
639
640bool
642{
643 // Assert the node is dependency-free
644 assert(node_ptr->robDep.empty() && node_ptr->regDep.empty());
645
646 // If this is the first attempt, print a debug message to indicate this.
647 if (first) {
648 DPRINTFR(TraceCPUData, "\t\tseq. num %lli(%s) with rob num %lli is now"
649 " dependency free.\n", node_ptr->seqNum, node_ptr->typeToStr(),
650 node_ptr->robNum);
651 }
652
653 // Check if resources are available to issue the specific node
654 if (hwResource.isAvailable(node_ptr)) {
655 // If resources are free only then add to readyList
656 DPRINTFR(TraceCPUData, "\t\tResources available for seq. num %lli. "
657 "Adding to readyList, occupying resources.\n",
658 node_ptr->seqNum);
659 // Compute the execute tick by adding the compute delay for the node
660 // and add the ready node to the ready list
662 owner.clockEdge() + node_ptr->compDelay);
663 // Account for the resources taken up by this issued node.
664 hwResource.occupy(node_ptr);
665 return true;
666 } else {
667 if (first) {
668 // Although dependencies are complete, resources are not available.
669 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
670 "%lli. Adding to depFreeQueue.\n", node_ptr->seqNum);
671 depFreeQueue.push(node_ptr);
672 } else {
673 DPRINTFR(TraceCPUData, "\t\tResources unavailable for seq. num "
674 "%lli. Still pending issue.\n", node_ptr->seqNum);
675 }
676 return false;
677 }
678}
679
680void
682{
683 // Release the resources for this completed node.
684 if (pkt->isWrite()) {
685 // Consider store complete.
687 // If it is a store response then do nothing since we do not model
688 // dependencies on store completion in the trace. But if we were
689 // blocking execution due to store buffer fullness, we need to schedule
690 // an event and attempt to progress.
691 } else {
692 // If it is a load response then release the dependents waiting on it.
693 // Get pointer to the completed load
694 auto graph_itr = depGraph.find(pkt->req->getReqInstSeqNum());
695 assert(graph_itr != depGraph.end());
696 GraphNode* node_ptr = graph_itr->second;
697
698 // Release resources occupied by the load
699 hwResource.release(node_ptr);
700
701 DPRINTF(TraceCPUData, "Load seq. num %lli response received. Waking up"
702 " dependents..\n", node_ptr->seqNum);
703
704 for (auto child : node_ptr->dependents) {
705 if (child->removeDepOnInst(node_ptr->seqNum)) {
706 checkAndIssue(child);
707 }
708 }
709
710 // clear the dynamically allocated set of dependents
711 (node_ptr->dependents).clear();
712 // Update the stat for numOps completed
713 owner.updateNumOps(node_ptr->robNum);
714 // delete node
715 delete node_ptr;
716 // remove from graph
717 depGraph.erase(graph_itr);
718 }
719
720 if (debug::TraceCPUData) {
722 }
723
724 // If the size of the dependency graph is less than the dependency window
725 // then read from the trace file to populate the graph next time we are in
726 // execute.
727 if (depGraph.size() < windowSize && !traceComplete)
728 nextRead = true;
729
730 // If not waiting for retry, attempt to schedule next event
731 if (!retryPkt) {
732 // We might have new dep-free nodes in the list which will have execute
733 // tick greater than or equal to curTick. But a new dep-free node might
734 // have its execute tick earlier. Therefore, attempt to reschedule. It
735 // could happen that the readyList is empty and we got here via a
736 // last remaining response. So, either the trace is complete or there
737 // are pending nodes in the depFreeQueue. The checking is done in the
738 // execute() control flow, so schedule an event to go via that flow.
739 Tick next_event_tick = readyList.empty() ? owner.clockEdge(Cycles(1)) :
740 std::max(readyList.begin()->execTick, owner.clockEdge(Cycles(1)));
741 DPRINTF(TraceCPUData, "Attempting to schedule @%lli.\n",
742 next_event_tick);
743 owner.schedDcacheNextEvent(next_event_tick);
744 }
745}
746
747void
749 Tick exec_tick)
750{
751 ReadyNode ready_node;
752 ready_node.seqNum = seq_num;
753 ready_node.execTick = exec_tick;
754
755 // Iterator to readyList
756 auto itr = readyList.begin();
757
758 // If the readyList is empty, simply insert the new node at the beginning
759 // and return
760 if (itr == readyList.end()) {
761 readyList.insert(itr, ready_node);
763 std::max<double>(readyList.size(),
765 return;
766 }
767
768 // If the new node has its execution tick equal to the first node in the
769 // list then go to the next node. If the first node in the list failed
770 // to execute, its position as the first is thus maintained.
771 if (retryPkt) {
772 if (retryPkt->req->getReqInstSeqNum() == itr->seqNum)
773 itr++;
774 }
775
776 // Increment the iterator and compare the node pointed to by it to the new
777 // node till the position to insert the new node is found.
778 bool found = false;
779 while (!found && itr != readyList.end()) {
780 // If the execution tick of the new node is less than the node then
781 // this is the position to insert
782 if (exec_tick < itr->execTick) {
783 found = true;
784 // If the execution tick of the new node is equal to the node then
785 // sort in ascending order of sequence numbers
786 } else if (exec_tick == itr->execTick) {
787 // If the sequence number of the new node is less than the node
788 // then this is the position to insert
789 if (seq_num < itr->seqNum) {
790 found = true;
791 // Else go to next node
792 } else {
793 itr++;
794 }
795 } else {
796 // If the execution tick of the new node is greater than the node
797 // then go to the next node.
798 itr++;
799 }
800 }
801 readyList.insert(itr, ready_node);
802 // Update the stat for max size reached of the readyList
803 elasticStats.maxReadyListSize = std::max<double>(readyList.size(),
805}
806
807void
809{
810 auto itr = readyList.begin();
811 if (itr == readyList.end()) {
812 DPRINTF(TraceCPUData, "readyList is empty.\n");
813 return;
814 }
815 DPRINTF(TraceCPUData, "Printing readyList:\n");
816 while (itr != readyList.end()) {
817 auto graph_itr = depGraph.find(itr->seqNum);
818 [[maybe_unused]] GraphNode* node_ptr = graph_itr->second;
819 DPRINTFR(TraceCPUData, "\t%lld(%s), %lld\n", itr->seqNum,
820 node_ptr->typeToStr(), itr->execTick);
821 itr++;
822 }
823}
824
826 uint16_t max_rob, uint16_t max_stores, uint16_t max_loads) :
827 sizeROB(max_rob),
828 sizeStoreBuffer(max_stores),
829 sizeLoadBuffer(max_loads),
830 oldestInFlightRobNum(UINT64_MAX),
831 numInFlightLoads(0),
832 numInFlightStores(0)
833{}
834
835void
837{
838 // Occupy ROB entry for the issued node
839 // Merely maintain the oldest node, i.e. numerically least robNum by saving
840 // it in the variable oldestInFLightRobNum.
841 inFlightNodes[new_node->seqNum] = new_node->robNum;
842 oldestInFlightRobNum = inFlightNodes.begin()->second;
843
844 // Occupy Load/Store Buffer entry for the issued node if applicable
845 if (new_node->isLoad()) {
846 ++numInFlightLoads;
847 } else if (new_node->isStore()) {
848 ++numInFlightStores;
849 } // else if it is a non load/store node, no buffer entry is occupied
850
851 printOccupancy();
852}
853
854void
856{
857 assert(!inFlightNodes.empty());
858 DPRINTFR(TraceCPUData,
859 "\tClearing done seq. num %d from inFlightNodes..\n",
860 done_node->seqNum);
861
862 assert(inFlightNodes.find(done_node->seqNum) != inFlightNodes.end());
863 inFlightNodes.erase(done_node->seqNum);
864
865 if (inFlightNodes.empty()) {
866 // If we delete the only in-flight node and then the
867 // oldestInFlightRobNum is set to it's initialized (max) value.
868 oldestInFlightRobNum = UINT64_MAX;
869 } else {
870 // Set the oldest in-flight node rob number equal to the first node in
871 // the inFlightNodes since that will have the numerically least value.
872 oldestInFlightRobNum = inFlightNodes.begin()->second;
873 }
874
875 DPRINTFR(TraceCPUData,
876 "\tCleared. inFlightNodes.size() = %d, "
877 "oldestInFlightRobNum = %d\n", inFlightNodes.size(),
878 oldestInFlightRobNum);
879
880 // A store is considered complete when a request is sent, thus ROB entry is
881 // freed. But it occupies an entry in the Store Buffer until its response
882 // is received. A load is considered complete when a response is received,
883 // thus both ROB and Load Buffer entries can be released.
884 if (done_node->isLoad()) {
885 assert(numInFlightLoads != 0);
886 --numInFlightLoads;
887 }
888 // For normal writes, we send the requests out and clear a store buffer
889 // entry on response. For writes which are strictly ordered, for e.g.
890 // writes to device registers, we do that within release() which is called
891 // when node is executed and taken off from readyList.
892 if (done_node->isStore() && done_node->isStrictlyOrdered()) {
893 releaseStoreBuffer();
894 }
895}
896
897void
899{
900 assert(numInFlightStores != 0);
901 --numInFlightStores;
902}
903
904bool
906 const GraphNode* new_node) const
907{
908 uint16_t num_in_flight_nodes;
909 if (inFlightNodes.empty()) {
910 num_in_flight_nodes = 0;
911 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
912 " #in-flight nodes = 0", new_node->seqNum);
913 } else if (new_node->robNum > oldestInFlightRobNum) {
914 // This is the intuitive case where new dep-free node is younger
915 // instruction than the oldest instruction in-flight. Thus we make sure
916 // in_flight_nodes does not overflow.
917 num_in_flight_nodes = new_node->robNum - oldestInFlightRobNum;
918 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
919 " #in-flight nodes = %d - %d = %d", new_node->seqNum,
920 new_node->robNum, oldestInFlightRobNum, num_in_flight_nodes);
921 } else {
922 // This is the case where an instruction older than the oldest in-
923 // flight instruction becomes dep-free. Thus we must have already
924 // accounted for the entry in ROB for this new dep-free node.
925 // Immediately after this check returns true, oldestInFlightRobNum will
926 // be updated in occupy(). We simply let this node issue now.
927 num_in_flight_nodes = 0;
928 DPRINTFR(TraceCPUData, "\t\tChecking resources to issue seq. num %lli:"
929 " new oldestInFlightRobNum = %d, #in-flight nodes ignored",
930 new_node->seqNum, new_node->robNum);
931 }
932 DPRINTFR(TraceCPUData, ", LQ = %d/%d, SQ = %d/%d.\n",
933 numInFlightLoads, sizeLoadBuffer,
934 numInFlightStores, sizeStoreBuffer);
935 // Check if resources are available to issue the specific node
936 if (num_in_flight_nodes >= sizeROB) {
937 return false;
938 }
939 if (new_node->isLoad() && numInFlightLoads >= sizeLoadBuffer) {
940 return false;
941 }
942 if (new_node->isStore() && numInFlightStores >= sizeStoreBuffer) {
943 return false;
944 }
945 return true;
946}
947
948bool
950{
951 // Return true if there is at least one read or write request in flight
952 return (numInFlightStores != 0 || numInFlightLoads != 0);
953}
954
955void
957{
958 DPRINTFR(TraceCPUData, "oldestInFlightRobNum = %d, "
959 "LQ = %d/%d, SQ = %d/%d.\n",
960 oldestInFlightRobNum,
961 numInFlightLoads, sizeLoadBuffer,
962 numInFlightStores, sizeStoreBuffer);
963}
964
966 statistics::Group *parent, const std::string& _name) :
967 statistics::Group(parent, _name.c_str()),
968 ADD_STAT(numSendAttempted, statistics::units::Count::get(),
969 "Number of first attempts to send a request"),
970 ADD_STAT(numSendSucceeded, statistics::units::Count::get(),
971 "Number of successful first attempts"),
972 ADD_STAT(numSendFailed, statistics::units::Count::get(),
973 "Number of failed first attempts"),
974 ADD_STAT(numRetrySucceeded, statistics::units::Count::get(),
975 "Number of successful retries"),
976 ADD_STAT(instLastTick, statistics::units::Tick::get(),
977 "Last tick simulated from the fixed inst trace")
978{
979
980}
981
982Tick
984{
985 DPRINTF(TraceCPUInst, "Initializing instruction fetch request generator"
986 " IcacheGen: fixed issue with retry.\n");
987
988 if (nextExecute()) {
989 DPRINTF(TraceCPUInst, "\tFirst tick = %d.\n", currElement.tick);
990 return currElement.tick;
991 } else {
992 panic("Read of first message in the trace failed.\n");
993 return MaxTick;
994 }
995}
996
997bool
999{
1000 // If there is a retry packet, try to send it
1001 if (retryPkt) {
1002 DPRINTF(TraceCPUInst, "Trying to send retry packet.\n");
1003
1004 if (!port.sendTimingReq(retryPkt)) {
1005 // Still blocked! This should never occur.
1006 DPRINTF(TraceCPUInst, "Retry packet sending failed.\n");
1007 return false;
1008 }
1010 } else {
1011 DPRINTF(TraceCPUInst, "Trying to send packet for currElement.\n");
1012
1013 // try sending current element
1014 assert(currElement.isValid());
1015
1017
1020 DPRINTF(TraceCPUInst, "currElement sending failed.\n");
1022 // return false to indicate not to schedule next event
1023 return false;
1024 } else {
1026 }
1027 }
1028 // If packet was sent successfully, either retryPkt or currElement, return
1029 // true to indicate to schedule event at current Tick plus delta. If packet
1030 // was sent successfully and there is no next packet to send, return false.
1031 DPRINTF(TraceCPUInst, "Packet sent successfully, trying to read next "
1032 "element.\n");
1033 retryPkt = nullptr;
1034 // Read next element into currElement, currElement gets cleared so save the
1035 // tick to calculate delta
1036 Tick last_tick = currElement.tick;
1037 if (nextExecute()) {
1038 assert(currElement.tick >= last_tick);
1039 delta = currElement.tick - last_tick;
1040 }
1041 return !traceComplete;
1042}
1043
1044void
1049
1050bool
1052{
1053 if (traceComplete)
1054 // We are at the end of the file, thus we have no more messages.
1055 // Return false.
1056 return false;
1057
1058
1059 //Reset the currElement to the default values
1061
1062 // Read the next line to get the next message. If that fails then end of
1063 // trace has been reached and traceComplete needs to be set in addition
1064 // to returning false. If successful then next message is in currElement.
1065 if (!trace.read(&currElement)) {
1066 traceComplete = true;
1068 return false;
1069 }
1070
1071 DPRINTF(TraceCPUInst, "inst fetch: %c addr %d pc %#x size %d tick %d\n",
1072 currElement.cmd.isRead() ? 'r' : 'w',
1077
1078 return true;
1079}
1080
1081bool
1084{
1085
1086 // Create new request
1087 auto req = std::make_shared<Request>(addr, size, flags, requestorId);
1088 req->setPC(pc);
1089
1090 // If this is not done it triggers assert in L1 cache for invalid contextId
1091 req->setContext(ContextID(0));
1092
1093 // Embed it in a packet
1094 PacketPtr pkt = new Packet(req, cmd);
1095
1096 uint8_t* pkt_data = new uint8_t[req->getSize()];
1097 pkt->dataDynamic(pkt_data);
1098
1099 if (cmd.isWrite()) {
1100 memset(pkt_data, 0xA, req->getSize());
1101 }
1102
1103 // Call RequestPort method to send a timing request for this packet
1104 bool success = port.sendTimingReq(pkt);
1105 if (!success) {
1106 // If it fails, save the packet to retry when a retry is signalled by
1107 // the cache
1108 retryPkt = pkt;
1109 }
1110 return success;
1111}
1112
1113void
1115{
1116 // Schedule an event to go through the control flow in the same tick as
1117 // retry is received
1118 DPRINTF(TraceCPUInst, "Icache retry received. Scheduling next IcacheGen"
1119 " event @%lli.\n", curTick());
1121}
1122
1123void
1125{
1126 // Schedule an event to go through the execute flow in the same tick as
1127 // retry is received
1128 DPRINTF(TraceCPUData, "Dcache retry received. Scheduling next DcacheGen"
1129 " event @%lli.\n", curTick());
1131}
1132
1133void
1135{
1136 if (!dcacheNextEvent.scheduled()) {
1137 DPRINTF(TraceCPUData, "Scheduling next DcacheGen event at %lli.\n",
1138 when);
1141 } else if (when < dcacheNextEvent.when()) {
1142 DPRINTF(TraceCPUData, "Re-scheduling next dcache event from %lli"
1143 " to %lli.\n", dcacheNextEvent.when(), when);
1145 }
1146
1147}
1148
1149Port &
1150TraceCPU::getPort(const std::string &if_name, PortID idx)
1151{
1152 // Get the right port based on name. This applies to all the
1153 // subclasses of the base CPU and relies on their implementation
1154 // of getDataPort and getInstPort.
1155 if (if_name == "dcache_port")
1156 return getDataPort();
1157 else if (if_name == "icache_port")
1158 return getInstPort();
1159 else
1160 return ClockedObject::getPort(if_name, idx);
1161}
1162
1163bool
1165{
1166 // All responses on the instruction fetch side are ignored. Simply delete
1167 // the packet to free allocated memory
1168 delete pkt;
1169
1170 return true;
1171}
1172
1173void
1178
1179void
1181{
1182 DPRINTF(TraceCPUData, "Received timing response from Dcache.\n");
1184}
1185
1186bool
1188{
1189 // Handle the responses for data memory requests which is done inside the
1190 // elastic data generator
1192 // After processing the response delete the packet to free
1193 // memory
1194 delete pkt;
1195
1196 return true;
1197}
1198
1199void
1204
1206 const std::string& filename, const double time_multiplier) :
1207 trace(filename),
1208 timeMultiplier(time_multiplier),
1209 microOpCount(0)
1210{
1211 // Create a protobuf message for the header and read it from the stream
1212 ProtoMessage::InstDepRecordHeader header_msg;
1213 if (!trace.read(header_msg)) {
1214 panic("Failed to read packet header from %s\n", filename);
1215
1216 if (header_msg.tick_freq() != sim_clock::Frequency) {
1217 panic("Trace %s was recorded with a different tick frequency %d\n",
1218 header_msg.tick_freq());
1219 }
1220 } else {
1221 // Assign window size equal to the field in the trace that was recorded
1222 // when the data dependency trace was captured in the o3cpu model
1223 windowSize = header_msg.window_size();
1224 }
1225}
1226
1227void
1232
1233bool
1235{
1236 ProtoMessage::InstDepRecord pkt_msg;
1237 if (trace.read(pkt_msg)) {
1238 // Required fields
1239 element->seqNum = pkt_msg.seq_num();
1240 element->type = pkt_msg.type();
1241 // Scale the compute delay to effectively scale the Trace CPU frequency
1242 element->compDelay = pkt_msg.comp_delay() * timeMultiplier;
1243
1244 // Repeated field robDepList
1245 element->robDep.clear();
1246 for (int i = 0; i < (pkt_msg.rob_dep()).size(); i++) {
1247 element->robDep.push_back(pkt_msg.rob_dep(i));
1248 }
1249
1250 // Repeated field
1251 element->regDep.clear();
1252 for (int i = 0; i < (pkt_msg.reg_dep()).size(); i++) {
1253 // There is a possibility that an instruction has both, a register
1254 // and order dependency on an instruction. In such a case, the
1255 // register dependency is omitted
1256 bool duplicate = false;
1257 for (auto &dep: element->robDep) {
1258 duplicate |= (pkt_msg.reg_dep(i) == dep);
1259 }
1260 if (!duplicate)
1261 element->regDep.push_back(pkt_msg.reg_dep(i));
1262 }
1263
1264 // Optional fields
1265 if (pkt_msg.has_p_addr())
1266 element->physAddr = pkt_msg.p_addr();
1267 else
1268 element->physAddr = 0;
1269
1270 if (pkt_msg.has_v_addr())
1271 element->virtAddr = pkt_msg.v_addr();
1272 else
1273 element->virtAddr = 0;
1274
1275 if (pkt_msg.has_size())
1276 element->size = pkt_msg.size();
1277 else
1278 element->size = 0;
1279
1280 if (pkt_msg.has_flags())
1281 element->flags = pkt_msg.flags();
1282 else
1283 element->flags = 0;
1284
1285 if (pkt_msg.has_pc())
1286 element->pc = pkt_msg.pc();
1287 else
1288 element->pc = 0;
1289
1290 // ROB occupancy number
1291 ++microOpCount;
1292 if (pkt_msg.has_weight()) {
1293 microOpCount += pkt_msg.weight();
1294 }
1295 element->robNum = microOpCount;
1296 return true;
1297 }
1298
1299 // We have reached the end of the file
1300 return false;
1301}
1302
1303bool
1305{
1306 for (auto it = regDep.begin(); it != regDep.end(); it++) {
1307 if (*it == reg_dep) {
1308 // If register dependency is found, erase it.
1309 regDep.erase(it);
1310 DPRINTFR(TraceCPUData,
1311 "\tFor %lli: Marking register dependency %lli done.\n",
1312 seqNum, reg_dep);
1313 return true;
1314 }
1315 }
1316
1317 // Return false if the dependency is not found
1318 return false;
1319}
1320
1321bool
1323{
1324 for (auto it = robDep.begin(); it != robDep.end(); it++) {
1325 if (*it == rob_dep) {
1326 // If the rob dependency is found, erase it.
1327 robDep.erase(it);
1328 DPRINTFR(TraceCPUData,
1329 "\tFor %lli: Marking ROB dependency %lli done.\n",
1330 seqNum, rob_dep);
1331 return true;
1332 }
1333 }
1334 return false;
1335}
1336
1337bool
1339{
1340 // If it is an rob dependency then remove it
1341 if (!removeRobDep(done_seq_num)) {
1342 // If it is not an rob dependency then it must be a register dependency
1343 // If the register dependency is not found, it violates an assumption
1344 // and must be caught by assert.
1345 [[maybe_unused]] bool regdep_found = removeRegDep(done_seq_num);
1346 assert(regdep_found);
1347 }
1348 // Return true if the node is dependency free
1349 return robDep.empty() && regDep.empty();
1350}
1351
1352void
1354{
1355#if TRACING_ON
1356 DPRINTFR(TraceCPUData, "%lli", seqNum);
1357 DPRINTFR(TraceCPUData, ",%s", typeToStr());
1358 if (isLoad() || isStore()) {
1359 DPRINTFR(TraceCPUData, ",%i", physAddr);
1360 DPRINTFR(TraceCPUData, ",%i", size);
1361 DPRINTFR(TraceCPUData, ",%i", flags);
1362 }
1363 DPRINTFR(TraceCPUData, ",%lli", compDelay);
1364 DPRINTFR(TraceCPUData, "robDep:");
1365 for (auto &dep: robDep) {
1366 DPRINTFR(TraceCPUData, ",%lli", dep);
1367 }
1368 DPRINTFR(TraceCPUData, "regDep:");
1369 for (auto &dep: regDep) {
1370 DPRINTFR(TraceCPUData, ",%lli", dep);
1371 }
1372 auto child_itr = dependents.begin();
1373 DPRINTFR(TraceCPUData, "dependents:");
1374 while (child_itr != dependents.end()) {
1375 DPRINTFR(TraceCPUData, ":%lli", (*child_itr)->seqNum);
1376 child_itr++;
1377 }
1378
1379 DPRINTFR(TraceCPUData, "\n");
1380#endif // TRACING_ON
1381}
1382
1383std::string
1385{
1386 return Record::RecordType_Name(type);
1387}
1388
1390 : trace(filename)
1391{
1392 // Create a protobuf message for the header and read it from the stream
1393 ProtoMessage::PacketHeader header_msg;
1394 if (!trace.read(header_msg)) {
1395 panic("Failed to read packet header from %s\n", filename);
1396
1397 if (header_msg.tick_freq() != sim_clock::Frequency) {
1398 panic("Trace %s was recorded with a different tick frequency %d\n",
1399 header_msg.tick_freq());
1400 }
1401 }
1402}
1403
1404void
1409
1410bool
1412{
1413 ProtoMessage::Packet pkt_msg;
1414 if (trace.read(pkt_msg)) {
1415 element->cmd = pkt_msg.cmd();
1416 element->addr = pkt_msg.addr();
1417 element->blocksize = pkt_msg.size();
1418 element->tick = pkt_msg.tick();
1419 element->flags = pkt_msg.has_flags() ? pkt_msg.flags() : 0;
1420 element->pc = pkt_msg.has_pc() ? pkt_msg.pc() : 0;
1421 return true;
1422 }
1423
1424 // We have reached the end of the file
1425 return false;
1426}
1427
1428} // namespace gem5
#define DPRINTFR(x,...)
Definition trace.hh:224
#define DPRINTF(x,...)
Definition trace.hh:210
bool read(google::protobuf::Message &msg)
Read a message from the stream.
Definition protoio.cc:182
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Tick clockPeriod() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
bool isRead() const
Definition packet.hh:227
Command cmd
Definition packet.hh:217
bool isWrite() const
Definition packet.hh:228
const std::string _name
Definition named.hh:41
virtual std::string name() const
Definition named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044
bool isWrite() const
Definition packet.hh:594
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038
RequestPtr req
A pointer to the original request.
Definition packet.hh:377
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213
Ports are used to interface objects to each other.
Definition port.hh:62
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603
uint64_t FlagsType
Definition request.hh:100
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and call dcacheRecvTimingResp() method of the dcacheGen to handle completi...
void recvReqRetry()
Handle a retry signalled by the cache if data access failed in the first attempt.
The struct GraphNode stores an instruction in the trace file.
Definition trace_cpu.hh:543
bool isLoad() const
Is the node a load.
Definition trace_cpu.hh:598
RecordType type
Type of the node corresponding to the instruction modeled by it.
Definition trace_cpu.hh:561
bool isStrictlyOrdered() const
Return true if node has a request which is strictly ordered.
Definition trace_cpu.hh:617
bool removeDepOnInst(NodeSeqNum done_seq_num)
Check for all dependencies on completed inst.
std::vector< GraphNode * > dependents
A vector of nodes dependent (outgoing) on this node.
Definition trace_cpu.hh:595
bool removeRobDep(NodeSeqNum rob_dep)
Remove completed instruction from order dependency array.
Request::Flags flags
Request flags if any.
Definition trace_cpu.hh:573
RegDepList regDep
List of register dependencies (incoming) if any.
Definition trace_cpu.hh:588
uint32_t size
Size of request if any.
Definition trace_cpu.hh:570
NodeRobNum robNum
ROB occupancy number.
Definition trace_cpu.hh:555
std::string typeToStr() const
Return string specifying the type of the node.
void writeElementAsTrace() const
Write out element in trace-compatible format using debug flag TraceCPUData.
RobDepList robDep
List of order dependencies.
Definition trace_cpu.hh:579
Addr physAddr
The address for the request if any.
Definition trace_cpu.hh:564
Addr virtAddr
The virtual address for the request if any.
Definition trace_cpu.hh:567
bool isStore() const
Is the node a store.
Definition trace_cpu.hh:601
uint64_t compDelay
Computational delay.
Definition trace_cpu.hh:582
NodeSeqNum seqNum
Instruction sequence number.
Definition trace_cpu.hh:552
bool removeRegDep(NodeSeqNum reg_dep)
Remove completed instruction from register dependency array.
void occupy(const GraphNode *new_node)
Occupy appropriate structures for an issued node.
Definition trace_cpu.cc:836
void release(const GraphNode *done_node)
Release appropriate structures for a completed node.
Definition trace_cpu.cc:855
bool awaitingResponse() const
Check if there are any outstanding requests, i.e.
Definition trace_cpu.cc:949
void printOccupancy()
Print resource occupancy for debugging.
Definition trace_cpu.cc:956
HardwareResource(uint16_t max_rob, uint16_t max_stores, uint16_t max_loads)
Constructor that initializes the sizes of the structures.
Definition trace_cpu.cc:825
void releaseStoreBuffer()
Release store buffer entry for a completed store.
Definition trace_cpu.cc:898
bool isAvailable(const GraphNode *new_node) const
Check if structures required to issue a node are free.
Definition trace_cpu.cc:905
ProtoInputStream trace
Input file stream for the protobuf trace.
Definition trace_cpu.hh:750
InputStream(const std::string &filename, const double time_multiplier)
Create a trace input stream for a given file name.
bool read(GraphNode *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
uint32_t windowSize
The window size that is read from the header of the protobuf trace and used to process the dependency...
Definition trace_cpu.hh:767
void reset()
Reset the stream such that it can be played once again.
const uint32_t windowSize
Window size within which to check for dependencies.
Definition trace_cpu.hh:970
bool readNextWindow()
Reads a line of the trace file.
Definition trace_cpu.cc:291
PacketPtr executeMemReq(GraphNode *node_ptr)
Creates a new request for a load or store assigning the request parameters.
Definition trace_cpu.cc:565
void printReadyList()
Print readyList for debugging using debug flag TraceCPUData.
Definition trace_cpu.cc:808
bool isExecComplete() const
Returns the execComplete variable which is set when the last node is executed.
Definition trace_cpu.hh:916
TraceCPU & owner
Reference of the TraceCPU.
Definition trace_cpu.hh:935
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition trace_cpu.hh:941
Tick init()
Called from TraceCPU init().
Definition trace_cpu.cc:246
uint64_t NodeSeqNum
Node sequence number type.
Definition trace_cpu.hh:528
void adjustInitTraceOffset(Tick &offset)
Adjust traceOffset based on what TraceCPU init() determines on comparing the offsets in the fetch req...
Definition trace_cpu.cc:277
bool checkAndIssue(const GraphNode *node_ptr, bool first=true)
Attempts to issue a node once the node's source dependencies are complete.
Definition trace_cpu.cc:641
std::list< ReadyNode > readyList
List of nodes that are ready to execute.
Definition trace_cpu.hh:991
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition trace_cpu.hh:950
bool traceComplete
Set to true when end of trace is reached.
Definition trace_cpu.hh:953
void exit()
Exit the ElasticDataGen.
Definition trace_cpu.cc:285
bool nextRead
Set to true when the next window of instructions need to be read.
Definition trace_cpu.hh:956
std::queue< const GraphNode * > depFreeQueue
Queue of dependency-free nodes that are pending issue because resources are not available.
Definition trace_cpu.hh:988
gem5::TraceCPU::ElasticDataGen::ElasticDataGenStatGroup elasticStats
std::unordered_map< NodeSeqNum, GraphNode * > depGraph
Store the depGraph of GraphNodes.
Definition trace_cpu.hh:979
void addDepsOnParent(GraphNode *new_node, T &dep_list)
Iterate over the dependencies of a new node and add the new node to the list of dependents of the par...
Definition trace_cpu.cc:344
bool execComplete
Set true when execution of trace is complete.
Definition trace_cpu.hh:959
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition trace_cpu.hh:938
HardwareResource hwResource
Hardware resources required to contain in-flight nodes and to throttle issuing of new nodes when reso...
Definition trace_cpu.hh:976
void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick)
Add a ready node to the readyList.
Definition trace_cpu.cc:748
InputStream trace
Input stream used for reading the input trace file.
Definition trace_cpu.hh:944
void execute()
This is the main execute function which consumes nodes from the sorted readyList.
Definition trace_cpu.cc:369
void completeMemAccess(PacketPtr pkt)
When a load writeback is received, that is when the load completes, release the dependents on it.
Definition trace_cpu.cc:681
InputStream(const std::string &filename)
Create a trace input stream for a given file name.
bool read(TraceElement *element)
Attempt to read a trace element from the stream, and also notify the caller if the end of the file wa...
void reset()
Reset the stream such that it can be played once again.
TraceCPU & owner
Reference of the TraceCPU.
Definition trace_cpu.hh:465
int64_t delta
Stores the difference in the send ticks of the current and last packets.
Definition trace_cpu.hh:487
Tick init()
Called from TraceCPU init().
Definition trace_cpu.cc:983
const RequestorID requestorId
RequestorID used for the requests being sent.
Definition trace_cpu.hh:471
gem5::TraceCPU::FixedRetryGen::FixedRetryGenStatGroup fixedStats
TraceElement currElement
Store an element read from the trace to send as the next packet.
Definition trace_cpu.hh:495
bool nextExecute()
Reads a line of the trace file.
bool traceComplete
Set to true when end of trace is reached.
Definition trace_cpu.hh:492
PacketPtr retryPkt
PacketPtr used to store the packet to retry.
Definition trace_cpu.hh:480
bool isTraceComplete()
Returns the traceComplete variable which is set when end of the input trace file is reached.
Definition trace_cpu.hh:459
void exit()
Exit the FixedRetryGen.
RequestPort & port
Reference of the port to be used to issue memory requests.
Definition trace_cpu.hh:468
InputStream trace
Input stream used for reading the input trace file.
Definition trace_cpu.hh:474
bool send(Addr addr, unsigned size, const MemCmd &cmd, Request::FlagsType flags, Addr pc)
Creates a new request assigning the request parameters passed by the arguments.
bool tryNext()
This tries to send current or retry packet and returns true if successfull.
Definition trace_cpu.cc:998
void recvReqRetry()
Handle a retry signalled by the cache if instruction read failed in the first attempt.
bool recvTimingResp(PacketPtr pkt)
Receive the timing reponse and simply delete the packet since instruction fetch requests are issued a...
The trace cpu replays traces generated using the elastic trace probe attached to the O3 CPU model.
Definition trace_cpu.hh:145
const uint64_t progressMsgInterval
Interval of committed instructions specified by the user at which a progress info message is printed.
Port & getDataPort()
Used to get a reference to the dcache port.
const bool enableEarlyExit
Exit when any one Trace CPU completes its execution.
bool oneTraceComplete
Set to true when one of the generators finishes replaying its trace.
void dcacheRetryRecvd()
When data cache port receives a retry, schedule event dcacheNextEvent.
Port & getInstPort()
Used to get a reference to the icache port.
Tick traceOffset
This stores the time offset in the trace, which is taken away from the ready times of requests.
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition trace_cpu.cc:99
gem5::TraceCPU::TraceStats traceStats
ElasticDataGen dcacheGen
Instance of ElasticDataGen to replay data read and write requests.
std::string instTraceFile
File names for input instruction and data traces.
Definition trace_cpu.hh:304
FixedRetryGen icacheGen
Instance of FixedRetryGen to replay instruction read requests.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port on this CPU.
TraceCPU(const TraceCPUParams &params)
Definition trace_cpu.cc:50
void updateNumOps(uint64_t rob_num)
Definition trace_cpu.cc:88
std::string dataTraceFile
Definition trace_cpu.hh:304
uint64_t progressMsgThreshold
const Addr cacheLineSize
Cache the cache line size that we get from the system.
Definition trace_cpu.hh:289
void dcacheRecvTimingResp(PacketPtr pkt)
When data cache port receives a response, this calls the dcache generator method handle to complete t...
void icacheRetryRecvd()
When instruction cache port receives a retry, schedule event icacheNextEvent.
EventFunctionWrapper dcacheNextEvent
Event for the control flow method schedDcacheNext()
static int numTraceCPUs
Number of Trace CPUs in the system used as a shared variable and passed to the CountedExitEvent event...
EventFunctionWrapper icacheNextEvent
Event for the control flow method schedIcacheNext()
void schedDcacheNextEvent(Tick when)
Schedule event dcacheNextEvent at the given tick.
CountedExitEvent * execCompleteEvent
A CountedExitEvent which when serviced decrements the counter.
void checkAndSchedExitEvent()
This is called when either generator finishes executing from the trace.
Definition trace_cpu.cc:182
void schedDcacheNext()
This is the control flow that uses the functionality of the dcacheGen to replay the trace.
Definition trace_cpu.cc:168
void schedIcacheNext()
This is the control flow that uses the functionality of the icacheGen to replay the trace.
Definition trace_cpu.cc:142
Derived & precision(int _precision)
Set the precision and marks this stat to print at the end of simulation.
Statistics container.
Definition group.hh:93
Counter value() const
Return the current value of this stat as its base type.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
void reschedule(Event &event, Tick when, bool always=false)
Definition eventq.hh:1030
Tick when() const
Get the time that the event is scheduled.
Definition eventq.hh:501
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition sim_object.cc:73
uint8_t flags
Definition helpers.cc:87
#define inform(...)
Definition logging.hh:257
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 4 > pc
Bitfield< 15 > system
Definition misc.hh:1032
Bitfield< 3 > addr
Definition types.hh:84
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition core.cc:47
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
uint64_t Tick
Tick count type.
Definition types.hh:58
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
const Tick MaxTick
Definition types.hh:60
int ContextID
Globally unique thread context ID.
Definition types.hh:239
statistics::Scalar maxDependents
Stats for data memory accesses replayed.
statistics::Scalar dataLastTick
Tick when ElasticDataGen completes execution.
ElasticDataGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition trace_cpu.cc:219
Struct to store a ready-to-execute node and its execution tick.
Definition trace_cpu.hh:633
Tick execTick
The tick at which the ready node must be executed.
Definition trace_cpu.hh:638
NodeSeqNum seqNum
The sequence number of the ready node.
Definition trace_cpu.hh:635
statistics::Scalar numSendAttempted
Stats for instruction accesses replayed.
Definition trace_cpu.hh:503
FixedRetryGenStatGroup(statistics::Group *parent, const std::string &_name)
name is the extension to the name for these stats
Definition trace_cpu.cc:965
statistics::Scalar instLastTick
Last simulated tick by the FixedRetryGen.
Definition trace_cpu.hh:508
This struct stores a line in the trace file.
Definition trace_cpu.hh:321
void clear()
Make this element invalid.
Definition trace_cpu.hh:351
MemCmd cmd
Specifies if the request is to be a read or a write.
Definition trace_cpu.hh:324
Addr addr
The address for the request.
Definition trace_cpu.hh:327
Addr blocksize
The size of the access for the request.
Definition trace_cpu.hh:330
Request::FlagsType flags
Potential request flags to use.
Definition trace_cpu.hh:336
Tick tick
The time at which the request should be sent.
Definition trace_cpu.hh:333
bool isValid() const
Check validity of this element.
Definition trace_cpu.hh:346
statistics::Scalar numSchedDcacheEvent
statistics::Scalar numSchedIcacheEvent
statistics::Formula cpi
Stat for the CPI.
statistics::Scalar numCycles
Number of CPU cycles simulated.
TraceStats(TraceCPU *trace)
Definition trace_cpu.cc:201
statistics::Scalar numOps
Stat for number of simulated micro-ops.
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0