gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
inst_queue.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2014, 2017-2020 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/inst_queue.hh"
43
44#include <limits>
45#include <vector>
46
47#include "base/logging.hh"
48#include "cpu/o3/dyn_inst.hh"
49#include "cpu/o3/fu_pool.hh"
50#include "cpu/o3/limits.hh"
51#include "debug/IQ.hh"
52#include "enums/OpClass.hh"
53#include "params/BaseO3CPU.hh"
54#include "sim/core.hh"
55
56// clang complains about std::set being overloaded with Packet::set if
57// we open up the entire namespace std
58using std::list;
59
60namespace gem5
61{
62
63namespace o3
64{
65
67 int fu_idx, InstructionQueue *iq_ptr)
69 inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
70{
71}
72
73void
75{
76 iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
77 inst = NULL;
78}
79
80
81const char *
83{
84 return "Functional unit completion";
85}
86
88 const BaseO3CPUParams &params)
89 : cpu(cpu_ptr),
90 iewStage(iew_ptr),
91 fuPool(params.fuPool),
92 iqPolicy(params.smtIQPolicy),
93 numThreads(params.numThreads),
94 numEntries(params.numIQEntries),
95 totalWidth(params.issueWidth),
99{
100 assert(fuPool);
101
102 const auto &reg_classes = params.isa[0]->regClasses();
103 // Set the number of total physical registers
104 // As the vector registers have two addressing modes, they are added twice
105 numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs +
106 params.numPhysVecRegs +
107 params.numPhysVecRegs * (
108 reg_classes.at(VecElemClass)->numRegs() /
109 reg_classes.at(VecRegClass)->numRegs()) +
110 params.numPhysVecPredRegs +
111 params.numPhysMatRegs +
112 params.numPhysCCRegs;
113
114 //Create an entry for each physical register within the
115 //dependency graph.
116 dependGraph.resize(numPhysRegs);
117
118 // Resize the register scoreboard.
120
121 //Initialize Mem Dependence Units
122 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
123 memDepUnit[tid].init(params, tid, cpu_ptr);
124 memDepUnit[tid].setIQ(this);
125 }
126
127 resetState();
128
129 //Figure out resource sharing policy
130 if (iqPolicy == SMTQueuePolicy::Dynamic) {
131 //Set Max Entries to Total ROB Capacity
132 for (ThreadID tid = 0; tid < numThreads; tid++) {
133 maxEntries[tid] = numEntries;
134 }
135
136 } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
137 //@todo:make work if part_amt doesnt divide evenly.
138 int part_amt = numEntries / numThreads;
139
140 //Divide ROB up evenly
141 for (ThreadID tid = 0; tid < numThreads; tid++) {
142 maxEntries[tid] = part_amt;
143 }
144
145 DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
146 "%i entries per thread.\n",part_amt);
147 } else if (iqPolicy == SMTQueuePolicy::Threshold) {
148 double threshold = (double)params.smtIQThreshold / 100;
149
150 int thresholdIQ = (int)((double)threshold * numEntries);
151
152 //Divide up by threshold amount
153 for (ThreadID tid = 0; tid < numThreads; tid++) {
154 maxEntries[tid] = thresholdIQ;
155 }
156
157 DPRINTF(IQ, "IQ sharing policy set to Threshold:"
158 "%i entries per thread.\n",thresholdIQ);
159 }
160 for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
161 maxEntries[tid] = 0;
162 }
163}
164
166{
167 dependGraph.reset();
168#ifdef GEM5_DEBUG
169 cprintf("Nodes traversed: %i, removed: %i\n",
170 dependGraph.nodesTraversed, dependGraph.nodesRemoved);
171#endif
172}
173
174std::string
176{
177 return cpu->name() + ".iq";
178}
179
180InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
181 : statistics::Group(cpu),
182 ADD_STAT(instsAdded, statistics::units::Count::get(),
183 "Number of instructions added to the IQ (excludes non-spec)"),
185 "Number of non-speculative instructions added to the IQ"),
186 ADD_STAT(instsIssued, statistics::units::Count::get(),
187 "Number of instructions issued"),
188 ADD_STAT(intInstsIssued, statistics::units::Count::get(),
189 "Number of integer instructions issued"),
191 "Number of float instructions issued"),
193 "Number of branch instructions issued"),
194 ADD_STAT(memInstsIssued, statistics::units::Count::get(),
195 "Number of memory instructions issued"),
197 "Number of miscellaneous instructions issued"),
199 "Number of squashed instructions issued"),
201 "Number of squashed instructions iterated over during squash; "
202 "mainly for profiling"),
204 "Number of squashed operands that are examined and possibly "
205 "removed from graph"),
207 "Number of squashed non-spec instructions that were removed"),
208 ADD_STAT(numIssuedDist, statistics::units::Count::get(),
209 "Number of insts issued each cycle"),
210 ADD_STAT(statFuBusy, statistics::units::Count::get(),
211 "attempts to use FU when none available"),
213 "Number of instructions issued per FU type, per thread"),
215 statistics::units::Count, statistics::units::Cycle>::get(),
216 "Inst issue rate", instsIssued / cpu->baseStats.numCycles),
217 ADD_STAT(fuBusy, statistics::units::Count::get(), "FU busy when requested"),
219 statistics::units::Count, statistics::units::Count>::get(),
220 "FU busy rate (busy events/executed inst)")
221{
223 .prereq(instsAdded);
224
226 .prereq(nonSpecInstsAdded);
227
229 .prereq(instsIssued);
230
232 .prereq(intInstsIssued);
233
235 .prereq(floatInstsIssued);
236
238 .prereq(branchInstsIssued);
239
241 .prereq(memInstsIssued);
242
244 .prereq(miscInstsIssued);
245
247 .prereq(squashedInstsIssued);
248
250 .prereq(squashedInstsExamined);
251
254
256 .prereq(squashedNonSpecRemoved);
257/*
258 queueResDist
259 .init(Num_OpClasses, 0, 99, 2)
260 .name(name() + ".IQ:residence:")
261 .desc("cycles from dispatch to issue")
262 .flags(total | pdf | cdf )
263 ;
264 for (int i = 0; i < Num_OpClasses; ++i) {
265 queueResDist.subname(i, opClassStrings[i]);
266 }
267*/
269 .init(0,total_width,1)
270 .flags(statistics::pdf)
271 ;
272/*
273 dist_unissued
274 .init(Num_OpClasses+2)
275 .name(name() + ".unissued_cause")
276 .desc("Reason ready instruction not issued")
277 .flags(pdf | dist)
278 ;
279 for (int i=0; i < (Num_OpClasses + 2); ++i) {
280 dist_unissued.subname(i, unissued_names[i]);
281 }
282*/
284 .init(cpu->numThreads,enums::Num_OpClass)
286 ;
287 statIssuedInstType.ysubnames(enums::OpClassStrings);
288
289 //
290 // How long did instructions for a particular FU type wait prior to issue
291 //
292/*
293 issueDelayDist
294 .init(Num_OpClasses,0,99,2)
295 .name(name() + ".")
296 .desc("cycles from operands ready to issue")
297 .flags(pdf | cdf)
298 ;
299 for (int i=0; i<Num_OpClasses; ++i) {
300 std::stringstream subname;
301 subname << opClassStrings[i] << "_delay";
302 issueDelayDist.subname(i, subname.str());
303 }
304*/
306 .flags(statistics::total)
307 ;
308
310 .init(Num_OpClasses)
312 ;
313 for (int i=0; i < Num_OpClasses; ++i) {
314 statFuBusy.subname(i, enums::OpClassStrings[i]);
315 }
316
317 fuBusy
318 .init(cpu->numThreads)
319 .flags(statistics::total)
320 ;
321
323 .flags(statistics::total)
324 ;
326}
327
329 : statistics::Group(parent),
331 "Number of integer instruction queue reads"),
333 "Number of integer instruction queue writes"),
335 "Number of integer instruction queue wakeup accesses"),
337 "Number of floating instruction queue reads"),
339 "Number of floating instruction queue writes"),
341 "Number of floating instruction queue wakeup accesses"),
343 "Number of vector instruction queue reads"),
345 "Number of vector instruction queue writes"),
347 "Number of vector instruction queue wakeup accesses"),
348 ADD_STAT(intAluAccesses, statistics::units::Count::get(),
349 "Number of integer alu accesses"),
350 ADD_STAT(fpAluAccesses, statistics::units::Count::get(),
351 "Number of floating point alu accesses"),
352 ADD_STAT(vecAluAccesses, statistics::units::Count::get(),
353 "Number of vector alu accesses")
354{
355 using namespace statistics;
357 .flags(total);
358
360 .flags(total);
361
363 .flags(total);
364
366 .flags(total);
367
369 .flags(total);
370
372 .flags(total);
373
375 .flags(total);
376
378 .flags(total);
379
381 .flags(total);
382
384 .flags(total);
385
387 .flags(total);
388
390 .flags(total);
391}
392
393void
395{
396 //Initialize thread IQ counts
397 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
398 count[tid] = 0;
399 instList[tid].clear();
400 }
401
402 // Initialize the number of free IQ entries.
404
405 // Note that in actuality, the registers corresponding to the logical
406 // registers start off as ready. However this doesn't matter for the
407 // IQ as the instruction should have been correctly told if those
408 // registers are ready in rename. Thus it can all be initialized as
409 // unready.
410 for (int i = 0; i < numPhysRegs; ++i) {
411 regScoreboard[i] = false;
412 }
413
414 for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
415 squashedSeqNum[tid] = 0;
416 }
417
418 for (int i = 0; i < Num_OpClasses; ++i) {
419 while (!readyInsts[i].empty())
420 readyInsts[i].pop();
421 queueOnList[i] = false;
422 readyIt[i] = listOrder.end();
423 }
424 nonSpecInsts.clear();
425 listOrder.clear();
426 deferredMemInsts.clear();
427 blockedMemInsts.clear();
428 retryMemInsts.clear();
429 wbOutstanding = 0;
430}
431
432void
437
438void
443
444void
451
452bool
454{
455 bool drained = dependGraph.empty() &&
456 instsToExecute.empty() &&
457 wbOutstanding == 0;
458 for (ThreadID tid = 0; tid < numThreads; ++tid)
459 drained = drained && memDepUnit[tid].isDrained();
460
461 return drained;
462}
463
464void
466{
467 assert(dependGraph.empty());
468 assert(instsToExecute.empty());
469 for (ThreadID tid = 0; tid < numThreads; ++tid)
471}
472
473void
478
479int
481{
482 if (iqPolicy == SMTQueuePolicy::Partitioned) {
483 return numEntries / num_threads;
484 } else {
485 return 0;
486 }
487}
488
489
490void
492{
493 if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
494 int active_threads = activeThreads->size();
495
496 for (ThreadID tid : *activeThreads) {
497 if (iqPolicy == SMTQueuePolicy::Partitioned) {
498 maxEntries[tid] = numEntries / active_threads;
499 } else if (iqPolicy == SMTQueuePolicy::Threshold &&
500 active_threads == 1) {
501 maxEntries[tid] = numEntries;
502 }
503 }
504 }
505}
506
507unsigned
512
513unsigned
515{
516 return maxEntries[tid] - count[tid];
517}
518
519// Might want to do something more complex if it knows how many instructions
520// will be issued this cycle.
521bool
523{
524 if (freeEntries == 0) {
525 return(true);
526 } else {
527 return(false);
528 }
529}
530
531bool
533{
534 if (numFreeEntries(tid) == 0) {
535 return(true);
536 } else {
537 return(false);
538 }
539}
540
541bool
543{
544 if (!listOrder.empty()) {
545 return true;
546 }
547
548 for (int i = 0; i < Num_OpClasses; ++i) {
549 if (!readyInsts[i].empty()) {
550 return true;
551 }
552 }
553
554 return false;
555}
556
557void
559{
560 if (new_inst->isFloating()) {
561 iqIOStats.fpInstQueueWrites++;
562 } else if (new_inst->isVector()) {
563 iqIOStats.vecInstQueueWrites++;
564 } else {
565 iqIOStats.intInstQueueWrites++;
566 }
567 // Make sure the instruction is valid
568 assert(new_inst);
569
570 DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
571 new_inst->seqNum, new_inst->pcState());
572
573 assert(freeEntries != 0);
574
575 instList[new_inst->threadNumber].push_back(new_inst);
576
577 --freeEntries;
578
579 new_inst->setInIQ();
580
581 // Look through its source registers (physical regs), and mark any
582 // dependencies.
583 addToDependents(new_inst);
584
585 // Have this instruction set itself as the producer of its destination
586 // register(s).
587 addToProducers(new_inst);
588
589 if (new_inst->isMemRef()) {
590 memDepUnit[new_inst->threadNumber].insert(new_inst);
591 } else {
592 addIfReady(new_inst);
593 }
594
595 ++iqStats.instsAdded;
596
597 count[new_inst->threadNumber]++;
598
599 assert(freeEntries == (numEntries - countInsts()));
600}
601
602void
604{
605 // @todo: Clean up this code; can do it by setting inst as unable
606 // to issue, then calling normal insert on the inst.
607 if (new_inst->isFloating()) {
608 iqIOStats.fpInstQueueWrites++;
609 } else if (new_inst->isVector()) {
610 iqIOStats.vecInstQueueWrites++;
611 } else {
612 iqIOStats.intInstQueueWrites++;
613 }
614
615 assert(new_inst);
616
617 nonSpecInsts[new_inst->seqNum] = new_inst;
618
619 DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
620 "to the IQ.\n",
621 new_inst->seqNum, new_inst->pcState());
622
623 assert(freeEntries != 0);
624
625 instList[new_inst->threadNumber].push_back(new_inst);
626
627 --freeEntries;
628
629 new_inst->setInIQ();
630
631 // Have this instruction set itself as the producer of its destination
632 // register(s).
633 addToProducers(new_inst);
634
635 // If it's a memory instruction, add it to the memory dependency
636 // unit.
637 if (new_inst->isMemRef()) {
638 memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
639 }
640
641 ++iqStats.nonSpecInstsAdded;
642
643 count[new_inst->threadNumber]++;
644
645 assert(freeEntries == (numEntries - countInsts()));
646}
647
648void
650{
651 memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
652
653 insertNonSpec(barr_inst);
654}
655
658{
659 assert(!instsToExecute.empty());
660 DynInstPtr inst = std::move(instsToExecute.front());
661 instsToExecute.pop_front();
662 if (inst->isFloating()) {
663 iqIOStats.fpInstQueueReads++;
664 } else if (inst->isVector()) {
665 iqIOStats.vecInstQueueReads++;
666 } else {
667 iqIOStats.intInstQueueReads++;
668 }
669 return inst;
670}
671
672void
674{
675 assert(!readyInsts[op_class].empty());
676
677 ListOrderEntry queue_entry;
678
679 queue_entry.queueType = op_class;
680
681 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
682
683 ListOrderIt list_it = listOrder.begin();
684 ListOrderIt list_end_it = listOrder.end();
685
686 while (list_it != list_end_it) {
687 if ((*list_it).oldestInst > queue_entry.oldestInst) {
688 break;
689 }
690
691 list_it++;
692 }
693
694 readyIt[op_class] = listOrder.insert(list_it, queue_entry);
695 queueOnList[op_class] = true;
696}
697
698void
700{
701 // Get iterator of next item on the list
702 // Delete the original iterator
703 // Determine if the next item is either the end of the list or younger
704 // than the new instruction. If so, then add in a new iterator right here.
705 // If not, then move along.
706 ListOrderEntry queue_entry;
707 OpClass op_class = (*list_order_it).queueType;
708 ListOrderIt next_it = list_order_it;
709
710 ++next_it;
711
712 queue_entry.queueType = op_class;
713 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
714
715 while (next_it != listOrder.end() &&
716 (*next_it).oldestInst < queue_entry.oldestInst) {
717 ++next_it;
718 }
719
720 readyIt[op_class] = listOrder.insert(next_it, queue_entry);
721}
722
723void
725{
726 DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
727 assert(!cpu->switchedOut());
728 // The CPU could have been sleeping until this op completed (*extremely*
729 // long latency op). Wake it if it was. This may be overkill.
731 iewStage->wakeCPU();
732
733 if (fu_idx > -1)
734 fuPool->freeUnitNextCycle(fu_idx);
735
736 // @todo: Ensure that these FU Completions happen at the beginning
737 // of a cycle, otherwise they could add too many instructions to
738 // the queue.
739 issueToExecuteQueue->access(-1)->size++;
740 instsToExecute.push_back(inst);
741}
742
743// @todo: Figure out a better way to remove the squashed items from the
744// lists. Checking the top item of each list to see if it's squashed
745// wastes time and forces jumps.
746void
748{
749 DPRINTF(IQ, "Attempting to schedule ready instructions from "
750 "the IQ.\n");
751
752 IssueStruct *i2e_info = issueToExecuteQueue->access(0);
753
754 DynInstPtr mem_inst;
755 while ((mem_inst = getDeferredMemInstToExecute())) {
756 addReadyMemInst(mem_inst);
757 }
758
759 // See if any cache blocked instructions are able to be executed
760 while ((mem_inst = getBlockedMemInstToExecute())) {
761 addReadyMemInst(mem_inst);
762 }
763
764 // Have iterator to head of the list
765 // While I haven't exceeded bandwidth or reached the end of the list,
766 // Try to get a FU that can do what this op needs.
767 // If successful, change the oldestInst to the new top of the list, put
768 // the queue in the proper place in the list.
769 // Increment the iterator.
770 // This will avoid trying to schedule a certain op class if there are no
771 // FUs that handle it.
772 int total_issued = 0;
773 ListOrderIt order_it = listOrder.begin();
774 ListOrderIt order_end_it = listOrder.end();
775
776 while (total_issued < totalWidth && order_it != order_end_it) {
777 OpClass op_class = (*order_it).queueType;
778
779 assert(!readyInsts[op_class].empty());
780
781 DynInstPtr issuing_inst = readyInsts[op_class].top();
782
783 if (issuing_inst->isFloating()) {
784 iqIOStats.fpInstQueueReads++;
785 } else if (issuing_inst->isVector()) {
786 iqIOStats.vecInstQueueReads++;
787 } else {
788 iqIOStats.intInstQueueReads++;
789 }
790
791 assert(issuing_inst->seqNum == (*order_it).oldestInst);
792
793 if (issuing_inst->isSquashed()) {
794 readyInsts[op_class].pop();
795
796 if (!readyInsts[op_class].empty()) {
797 moveToYoungerInst(order_it);
798 } else {
799 readyIt[op_class] = listOrder.end();
800 queueOnList[op_class] = false;
801 }
802
803 listOrder.erase(order_it++);
804
805 ++iqStats.squashedInstsIssued;
806
807 continue;
808 }
809
810 int idx = FUPool::NoNeedFU;
811 Cycles op_latency = Cycles(1);
812 ThreadID tid = issuing_inst->threadNumber;
813
814 if (op_class != No_OpClass) {
815 idx = fuPool->getUnit(op_class);
816 if (issuing_inst->isFloating()) {
817 iqIOStats.fpAluAccesses++;
818 } else if (issuing_inst->isVector()) {
819 iqIOStats.vecAluAccesses++;
820 } else {
821 iqIOStats.intAluAccesses++;
822 }
823 if (idx > FUPool::NoFreeFU) {
824 op_latency = fuPool->getOpLatency(op_class);
825 }
826 }
827
828 // If we have an instruction that doesn't require a FU, or a
829 // valid FU, then schedule for execution.
830 if (idx > FUPool::NoFreeFU || idx == FUPool::NoNeedFU ||
831 idx == FUPool::NoCapableFU) {
832 if (op_latency == Cycles(1)) {
833 i2e_info->size++;
834 instsToExecute.push_back(issuing_inst);
835
836 // Add the FU onto the list of FU's to be freed next
837 // cycle if we used one.
838 if (idx >= 0)
839 fuPool->freeUnitNextCycle(idx);
840
841 // CPU has no capable FU for the instruction
842 // but this may be OK if the instruction gets
843 // squashed. Remember this and give IEW
844 // the opportunity to trigger a fault
845 // if the instruction is unsupported.
846 // Otherwise, commit will panic.
847 if (idx == FUPool::NoCapableFU)
848 issuing_inst->setNoCapableFU();
849 } else {
850 assert(idx != FUPool::NoCapableFU);
851 bool pipelined = fuPool->isPipelined(op_class);
852 // Generate completion event for the FU
854 FUCompletion *execution = new FUCompletion(issuing_inst,
855 idx, this);
856
857 cpu->schedule(execution,
858 cpu->clockEdge(Cycles(op_latency - 1)));
859
860 if (!pipelined) {
861 // If FU isn't pipelined, then it must be freed
862 // upon the execution completing.
863 execution->setFreeFU();
864 } else {
865 // Add the FU onto the list of FU's to be freed next cycle.
866 fuPool->freeUnitNextCycle(idx);
867 }
868 }
869
870 DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
871 "[sn:%llu]\n",
872 tid, issuing_inst->pcState(),
873 issuing_inst->seqNum);
874
875 readyInsts[op_class].pop();
876
877 if (!readyInsts[op_class].empty()) {
878 moveToYoungerInst(order_it);
879 } else {
880 readyIt[op_class] = listOrder.end();
881 queueOnList[op_class] = false;
882 }
883
884 issuing_inst->setIssued();
885 ++total_issued;
886
887#if TRACING_ON
888 issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
889#endif
890
891 if (issuing_inst->firstIssue == -1)
892 issuing_inst->firstIssue = curTick();
893
894 if (!issuing_inst->isMemRef()) {
895 // Memory instructions can not be freed from the IQ until they
896 // complete.
897 ++freeEntries;
898 count[tid]--;
899 issuing_inst->clearInIQ();
900 } else {
901 memDepUnit[tid].issue(issuing_inst);
902 }
903
904 listOrder.erase(order_it++);
905 iqStats.statIssuedInstType[tid][op_class]++;
906 } else {
907 assert(idx == FUPool::NoFreeFU);
908 iqStats.statFuBusy[op_class]++;
909 iqStats.fuBusy[tid]++;
910 ++order_it;
911 }
912 }
913
914 iqStats.numIssuedDist.sample(total_issued);
915 iqStats.instsIssued+= total_issued;
916
917 // If we issued any instructions, tell the CPU we had activity.
918 // @todo If the way deferred memory instructions are handeled due to
919 // translation changes then the deferredMemInsts condition should be
920 // removed from the code below.
921 if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
922 cpu->activityThisCycle();
923 } else {
924 DPRINTF(IQ, "Not able to schedule any instructions.\n");
925 }
926}
927
928void
930{
931 DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
932 "to execute.\n", inst);
933
934 NonSpecMapIt inst_it = nonSpecInsts.find(inst);
935
936 assert(inst_it != nonSpecInsts.end());
937
938 ThreadID tid = (*inst_it).second->threadNumber;
939
940 (*inst_it).second->setAtCommit();
941
942 (*inst_it).second->setCanIssue();
943
944 if (!(*inst_it).second->isMemRef()) {
945 addIfReady((*inst_it).second);
946 } else {
947 memDepUnit[tid].nonSpecInstReady((*inst_it).second);
948 }
949
950 (*inst_it).second = NULL;
951
952 nonSpecInsts.erase(inst_it);
953}
954
955void
957{
958 DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
959 tid,inst);
960
961 ListIt iq_it = instList[tid].begin();
962
963 while (iq_it != instList[tid].end() &&
964 (*iq_it)->seqNum <= inst) {
965 ++iq_it;
966 instList[tid].pop_front();
967 }
968
969 assert(freeEntries == (numEntries - countInsts()));
970}
971
972int
974{
975 int dependents = 0;
976
977 // The instruction queue here takes care of both floating and int ops
978 if (completed_inst->isFloating()) {
979 iqIOStats.fpInstQueueWakeupAccesses++;
980 } else if (completed_inst->isVector()) {
981 iqIOStats.vecInstQueueWakeupAccesses++;
982 } else {
983 iqIOStats.intInstQueueWakeupAccesses++;
984 }
985
986 completed_inst->lastWakeDependents = curTick();
987
988 DPRINTF(IQ, "Waking dependents of completed instruction.\n");
989
990 assert(!completed_inst->isSquashed());
991
992 // Tell the memory dependence unit to wake any dependents on this
993 // instruction if it is a memory instruction. Also complete the memory
994 // instruction at this point since we know it executed without issues.
995 ThreadID tid = completed_inst->threadNumber;
996 if (completed_inst->isMemRef()) {
997 memDepUnit[tid].completeInst(completed_inst);
998
999 DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
1000 completed_inst->pcState(), completed_inst->seqNum);
1001
1002 ++freeEntries;
1003 completed_inst->memOpDone(true);
1004 count[tid]--;
1005 } else if (completed_inst->isReadBarrier() ||
1006 completed_inst->isWriteBarrier()) {
1007 // Completes a non mem ref barrier
1008 memDepUnit[tid].completeInst(completed_inst);
1009 }
1010
1011 for (int dest_reg_idx = 0;
1012 dest_reg_idx < completed_inst->numDestRegs();
1013 dest_reg_idx++)
1014 {
1015 PhysRegIdPtr dest_reg =
1016 completed_inst->renamedDestIdx(dest_reg_idx);
1017
1018 // Special case of uniq or control registers. They are not
1019 // handled by the IQ and thus have no dependency graph entry.
1020 if (dest_reg->isFixedMapping()) {
1021 DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1022 dest_reg->index(), dest_reg->className());
1023 continue;
1024 }
1025
1026 // Avoid waking up dependents if the register is pinned
1028 if (dest_reg->isPinned())
1029 completed_inst->setPinnedRegsWritten();
1030
1031 if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1032 DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1033 dest_reg->index(), dest_reg->className());
1034 continue;
1035 }
1036
1037 DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1038 dest_reg->index(),
1039 dest_reg->className());
1040
1041 //Go through the dependency chain, marking the registers as
1042 //ready within the waiting instructions.
1043 DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1044
1045 while (dep_inst) {
1046 DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1047 "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1048
1049 // Might want to give more information to the instruction
1050 // so that it knows which of its source registers is
1051 // ready. However that would mean that the dependency
1052 // graph entries would need to hold the src_reg_idx.
1053 dep_inst->markSrcRegReady();
1054
1055 addIfReady(dep_inst);
1056
1057 dep_inst = dependGraph.pop(dest_reg->flatIndex());
1058
1059 ++dependents;
1060 }
1061
1062 // Reset the head node now that all of its dependents have
1063 // been woken up.
1064 assert(dependGraph.empty(dest_reg->flatIndex()));
1065 dependGraph.clearInst(dest_reg->flatIndex());
1066
1067 // Mark the scoreboard as having that register ready.
1068 regScoreboard[dest_reg->flatIndex()] = true;
1069 }
1070 return dependents;
1071}
1072
1073void
1075{
1076 OpClass op_class = ready_inst->opClass();
1077
1078 readyInsts[op_class].push(ready_inst);
1079
1080 // Will need to reorder the list if either a queue is not on the list,
1081 // or it has an older instruction than last time.
1082 if (!queueOnList[op_class]) {
1083 addToOrderList(op_class);
1084 } else if (readyInsts[op_class].top()->seqNum <
1085 (*readyIt[op_class]).oldestInst) {
1086 listOrder.erase(readyIt[op_class]);
1087 addToOrderList(op_class);
1088 }
1089
1090 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1091 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1092 ready_inst->pcState(), op_class, ready_inst->seqNum);
1093}
1094
1095void
1097{
1098 DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1099
1100 // Reset DTB translation state
1101 resched_inst->translationStarted(false);
1102 resched_inst->translationCompleted(false);
1103
1104 resched_inst->clearCanIssue();
1105 memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1106}
1107
1108void
1110{
1111 memDepUnit[replay_inst->threadNumber].replay();
1112}
1113
1114void
1116{
1117 deferredMemInsts.push_back(deferred_inst);
1118}
1119
1120void
1122{
1123 blocked_inst->clearIssued();
1124 blocked_inst->clearCanIssue();
1125 blockedMemInsts.push_back(blocked_inst);
1126 DPRINTF(IQ, "Memory inst [sn:%llu] PC %s is blocked, will be "
1127 "reissued later\n", blocked_inst->seqNum,
1128 blocked_inst->pcState());
1129}
1130
1131void
1133{
1134 retryMemInsts.push_back(retry_inst);
1135}
1136
1137void
1139{
1140 DPRINTF(IQ, "Cache is unblocked, rescheduling blocked memory "
1141 "instructions\n");
1143 // Get the CPU ticking again
1144 cpu->wakeCPU();
1145}
1146
1149{
1150 for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1151 ++it) {
1152 if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1153 DynInstPtr mem_inst = std::move(*it);
1154 deferredMemInsts.erase(it);
1155 return mem_inst;
1156 }
1157 }
1158 return nullptr;
1159}
1160
1163{
1164 if (retryMemInsts.empty()) {
1165 return nullptr;
1166 } else {
1167 DynInstPtr mem_inst = std::move(retryMemInsts.front());
1168 retryMemInsts.pop_front();
1169 return mem_inst;
1170 }
1171}
1172
1173void
1175 const DynInstPtr &faulting_load)
1176{
1177 iqIOStats.intInstQueueWrites++;
1178 memDepUnit[store->threadNumber].violation(store, faulting_load);
1179}
1180
1181void
1183{
1184 DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1185 "the IQ.\n", tid);
1186
1187 // Read instruction sequence number of last instruction out of the
1188 // time buffer.
1189 squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1190
1191 doSquash(tid);
1192
1193 // Also tell the memory dependence unit to squash.
1194 memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1195}
1196
1197void
1199{
1200 // Start at the tail.
1201 ListIt squash_it = instList[tid].end();
1202 --squash_it;
1203
1204 DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1205 tid, squashedSeqNum[tid]);
1206
1207 // Squash any instructions younger than the squashed sequence number
1208 // given.
1209 while (squash_it != instList[tid].end() &&
1210 (*squash_it)->seqNum > squashedSeqNum[tid]) {
1211
1212 DynInstPtr squashed_inst = (*squash_it);
1213 if (squashed_inst->isFloating()) {
1214 iqIOStats.fpInstQueueWrites++;
1215 } else if (squashed_inst->isVector()) {
1216 iqIOStats.vecInstQueueWrites++;
1217 } else {
1218 iqIOStats.intInstQueueWrites++;
1219 }
1220
1221 // Only handle the instruction if it actually is in the IQ and
1222 // hasn't already been squashed in the IQ.
1223 if (squashed_inst->threadNumber != tid ||
1224 squashed_inst->isSquashedInIQ()) {
1225 --squash_it;
1226 continue;
1227 }
1228
1229 if (!squashed_inst->isIssued() ||
1230 (squashed_inst->isMemRef() &&
1231 !squashed_inst->memOpDone())) {
1232
1233 DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1234 tid, squashed_inst->seqNum, squashed_inst->pcState());
1235
1236 bool is_acq_rel = squashed_inst->isFullMemBarrier() &&
1237 (squashed_inst->isLoad() ||
1238 (squashed_inst->isStore() &&
1239 !squashed_inst->isStoreConditional()));
1240
1241 // Remove the instruction from the dependency list.
1242 if (is_acq_rel ||
1243 (!squashed_inst->isNonSpeculative() &&
1244 !squashed_inst->isStoreConditional() &&
1245 !squashed_inst->isAtomic() &&
1246 !squashed_inst->isReadBarrier() &&
1247 !squashed_inst->isWriteBarrier())) {
1248
1249 for (int src_reg_idx = 0;
1250 src_reg_idx < squashed_inst->numSrcRegs();
1251 src_reg_idx++)
1252 {
1253 PhysRegIdPtr src_reg =
1254 squashed_inst->renamedSrcIdx(src_reg_idx);
1255
1256 // Only remove it from the dependency graph if it
1257 // was placed there in the first place.
1258
1259 // Instead of doing a linked list traversal, we
1260 // can just remove these squashed instructions
1261 // either at issue time, or when the register is
1262 // overwritten. The only downside to this is it
1263 // leaves more room for error.
1264
1265 if (!squashed_inst->readySrcIdx(src_reg_idx) &&
1266 !src_reg->isFixedMapping()) {
1267 dependGraph.remove(src_reg->flatIndex(),
1268 squashed_inst);
1269 }
1270
1271 ++iqStats.squashedOperandsExamined;
1272 }
1273
1274 } else if (!squashed_inst->isStoreConditional() ||
1275 !squashed_inst->isCompleted()) {
1276 NonSpecMapIt ns_inst_it =
1277 nonSpecInsts.find(squashed_inst->seqNum);
1278
1279 // we remove non-speculative instructions from
1280 // nonSpecInsts already when they are ready, and so we
1281 // cannot always expect to find them
1282 if (ns_inst_it == nonSpecInsts.end()) {
1283 // loads that became ready but stalled on a
1284 // blocked cache are alreayd removed from
1285 // nonSpecInsts, and have not faulted
1286 assert(squashed_inst->getFault() != NoFault ||
1287 squashed_inst->isMemRef());
1288 } else {
1289
1290 (*ns_inst_it).second = NULL;
1291
1292 nonSpecInsts.erase(ns_inst_it);
1293
1294 ++iqStats.squashedNonSpecRemoved;
1295 }
1296 }
1297
1298 // Might want to also clear out the head of the dependency graph.
1299
1300 // Mark it as squashed within the IQ.
1301 squashed_inst->setSquashedInIQ();
1302
1303 // @todo: Remove this hack where several statuses are set so the
1304 // inst will flow through the rest of the pipeline.
1305 squashed_inst->setIssued();
1306 squashed_inst->setCanCommit();
1307 squashed_inst->clearInIQ();
1308
1309 //Update Thread IQ Count
1310 count[squashed_inst->threadNumber]--;
1311
1312 ++freeEntries;
1313 }
1314
1315 // IQ clears out the heads of the dependency graph only when
1316 // instructions reach writeback stage. If an instruction is squashed
1317 // before writeback stage, its head of dependency graph would not be
1318 // cleared out; it holds the instruction's DynInstPtr. This
1319 // prevents freeing the squashed instruction's DynInst.
1320 // Thus, we need to manually clear out the squashed instructions'
1321 // heads of dependency graph.
1322 for (int dest_reg_idx = 0;
1323 dest_reg_idx < squashed_inst->numDestRegs();
1324 dest_reg_idx++)
1325 {
1326 PhysRegIdPtr dest_reg =
1327 squashed_inst->renamedDestIdx(dest_reg_idx);
1328 if (dest_reg->isFixedMapping()){
1329 continue;
1330 }
1331 assert(dependGraph.empty(dest_reg->flatIndex()));
1332 dependGraph.clearInst(dest_reg->flatIndex());
1333 }
1334 instList[tid].erase(squash_it--);
1335 ++iqStats.squashedInstsExamined;
1336 }
1337}
1338
1339bool
1341 const DynInstPtr &lhs, const DynInstPtr &rhs) const
1342{
1343 return lhs->seqNum > rhs->seqNum;
1344}
1345
1346bool
1348{
1349 // Loop through the instruction's source registers, adding
1350 // them to the dependency list if they are not ready.
1351 int8_t total_src_regs = new_inst->numSrcRegs();
1352 bool return_val = false;
1353
1354 for (int src_reg_idx = 0;
1355 src_reg_idx < total_src_regs;
1356 src_reg_idx++)
1357 {
1358 // Only add it to the dependency graph if it's not ready.
1359 if (!new_inst->readySrcIdx(src_reg_idx)) {
1360 PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
1361
1362 // Check the IQ's scoreboard to make sure the register
1363 // hasn't become ready while the instruction was in flight
1364 // between stages. Only if it really isn't ready should
1365 // it be added to the dependency graph.
1366 if (src_reg->isFixedMapping()) {
1367 continue;
1368 } else if (!regScoreboard[src_reg->flatIndex()]) {
1369 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1370 "is being added to the dependency chain.\n",
1371 new_inst->pcState(), src_reg->index(),
1372 src_reg->className());
1373
1374 dependGraph.insert(src_reg->flatIndex(), new_inst);
1375
1376 // Change the return value to indicate that something
1377 // was added to the dependency graph.
1378 return_val = true;
1379 } else {
1380 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1381 "became ready before it reached the IQ.\n",
1382 new_inst->pcState(), src_reg->index(),
1383 src_reg->className());
1384 // Mark a register ready within the instruction.
1385 new_inst->markSrcRegReady(src_reg_idx);
1386 }
1387 }
1388 }
1389
1390 return return_val;
1391}
1392
1393void
1395{
1396 // Nothing really needs to be marked when an instruction becomes
1397 // the producer of a register's value, but for convenience a ptr
1398 // to the producing instruction will be placed in the head node of
1399 // the dependency links.
1400 int8_t total_dest_regs = new_inst->numDestRegs();
1401
1402 for (int dest_reg_idx = 0;
1403 dest_reg_idx < total_dest_regs;
1404 dest_reg_idx++)
1405 {
1406 PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
1407
1408 // Some registers have fixed mapping, and there is no need to track
1409 // dependencies as these instructions must be executed at commit.
1410 if (dest_reg->isFixedMapping()) {
1411 continue;
1412 }
1413
1414 if (!dependGraph.empty(dest_reg->flatIndex())) {
1415 dependGraph.dump();
1416 panic("Dependency graph %i (%s) (flat: %i) not empty!",
1417 dest_reg->index(), dest_reg->className(),
1418 dest_reg->flatIndex());
1419 }
1420
1421 dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1422
1423 // Mark the scoreboard to say it's not yet ready.
1424 regScoreboard[dest_reg->flatIndex()] = false;
1425 }
1426}
1427
1428void
1430{
1431 // If the instruction now has all of its source registers
1432 // available, then add it to the list of ready instructions.
1433 if (inst->readyToIssue()) {
1434
1435 //Add the instruction to the proper ready list.
1436 if (inst->isMemRef()) {
1437
1438 DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1439
1440 // Message to the mem dependence unit that this instruction has
1441 // its registers ready.
1442 memDepUnit[inst->threadNumber].regsReady(inst);
1443
1444 return;
1445 }
1446
1447 OpClass op_class = inst->opClass();
1448
1449 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1450 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1451 inst->pcState(), op_class, inst->seqNum);
1452
1453 readyInsts[op_class].push(inst);
1454
1455 // Will need to reorder the list if either a queue is not on the list,
1456 // or it has an older instruction than last time.
1457 if (!queueOnList[op_class]) {
1458 addToOrderList(op_class);
1459 } else if (readyInsts[op_class].top()->seqNum <
1460 (*readyIt[op_class]).oldestInst) {
1461 listOrder.erase(readyIt[op_class]);
1462 addToOrderList(op_class);
1463 }
1464 }
1465}
1466
1467int
1472
1473void
1475{
1476 for (int i = 0; i < Num_OpClasses; ++i) {
1477 cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1478
1479 cprintf("\n");
1480 }
1481
1482 cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1483
1484 NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1485 NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1486
1487 cprintf("Non speculative list: ");
1488
1489 while (non_spec_it != non_spec_end_it) {
1490 cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1491 (*non_spec_it).second->seqNum);
1492 ++non_spec_it;
1493 }
1494
1495 cprintf("\n");
1496
1497 ListOrderIt list_order_it = listOrder.begin();
1498 ListOrderIt list_order_end_it = listOrder.end();
1499 int i = 1;
1500
1501 cprintf("List order: ");
1502
1503 while (list_order_it != list_order_end_it) {
1504 cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1505 (*list_order_it).oldestInst);
1506
1507 ++list_order_it;
1508 ++i;
1509 }
1510
1511 cprintf("\n");
1512}
1513
1514
1515void
1517{
1518 for (ThreadID tid = 0; tid < numThreads; ++tid) {
1519 int num = 0;
1520 int valid_num = 0;
1521 ListIt inst_list_it = instList[tid].begin();
1522
1523 while (inst_list_it != instList[tid].end()) {
1524 cprintf("Instruction:%i\n", num);
1525 if (!(*inst_list_it)->isSquashed()) {
1526 if (!(*inst_list_it)->isIssued()) {
1527 ++valid_num;
1528 cprintf("Count:%i\n", valid_num);
1529 } else if ((*inst_list_it)->isMemRef() &&
1530 !(*inst_list_it)->memOpDone()) {
1531 // Loads that have not been marked as executed
1532 // still count towards the total instructions.
1533 ++valid_num;
1534 cprintf("Count:%i\n", valid_num);
1535 }
1536 }
1537
1538 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1539 "Issued:%i\nSquashed:%i\n",
1540 (*inst_list_it)->pcState(),
1541 (*inst_list_it)->seqNum,
1542 (*inst_list_it)->threadNumber,
1543 (*inst_list_it)->isIssued(),
1544 (*inst_list_it)->isSquashed());
1545
1546 if ((*inst_list_it)->isMemRef()) {
1547 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1548 }
1549
1550 cprintf("\n");
1551
1552 inst_list_it++;
1553 ++num;
1554 }
1555 }
1556
1557 cprintf("Insts to Execute list:\n");
1558
1559 int num = 0;
1560 int valid_num = 0;
1561 ListIt inst_list_it = instsToExecute.begin();
1562
1563 while (inst_list_it != instsToExecute.end())
1564 {
1565 cprintf("Instruction:%i\n",
1566 num);
1567 if (!(*inst_list_it)->isSquashed()) {
1568 if (!(*inst_list_it)->isIssued()) {
1569 ++valid_num;
1570 cprintf("Count:%i\n", valid_num);
1571 } else if ((*inst_list_it)->isMemRef() &&
1572 !(*inst_list_it)->memOpDone()) {
1573 // Loads that have not been marked as executed
1574 // still count towards the total instructions.
1575 ++valid_num;
1576 cprintf("Count:%i\n", valid_num);
1577 }
1578 }
1579
1580 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1581 "Issued:%i\nSquashed:%i\n",
1582 (*inst_list_it)->pcState(),
1583 (*inst_list_it)->seqNum,
1584 (*inst_list_it)->threadNumber,
1585 (*inst_list_it)->isIssued(),
1586 (*inst_list_it)->isSquashed());
1587
1588 if ((*inst_list_it)->isMemRef()) {
1589 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1590 }
1591
1592 cprintf("\n");
1593
1594 inst_list_it++;
1595 ++num;
1596 }
1597}
1598
1599} // namespace o3
1600} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
static const FlagsType AutoDelete
Definition eventq.hh:110
Event(Priority p=Default_Pri, Flags f=0)
Definition eventq.hh:407
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:472
constexpr RegIndex index() const
Visible RegId methods.
Definition reg_class.hh:151
int getNumPinnedWritesToComplete() const
Definition reg_class.hh:495
void decrNumPinnedWritesToComplete()
Definition reg_class.hh:506
bool isPinned() const
Definition reg_class.hh:492
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition reg_class.hh:469
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
static constexpr auto NoCapableFU
Instruction asked for a FU but this FUPool does not have a FU for this instruction op type.
Definition fu_pool.hh:156
static constexpr auto NoFreeFU
Instruction asked for a FU but all FU for this op type have already been allocated to other instructi...
Definition fu_pool.hh:163
static constexpr auto NoNeedFU
Named constants to differentiate cases where an instruction asked the FUPool for a free FU but did no...
Definition fu_pool.hh:150
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
bool freeFU
Should the FU be added to the list to be freed upon completing this event.
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr)
Construct a FU completion event.
Definition inst_queue.cc:66
DynInstPtr inst
Executing instruction.
int fuIdx
Index of the FU used for executing.
InstructionQueue * iqPtr
Pointer back to the instruction queue.
virtual const char * description() const
Return a C string describing the event.
Definition inst_queue.cc:82
std::string name() const
Returns the name of the IQ.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread.
gem5::o3::InstructionQueue::IQStats iqStats
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
std::list< DynInstPtr > instList[MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
void retryMemInst(const DynInstPtr &retry_inst)
Retries a memory instruction in the next cycle.
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
unsigned numEntries
The number of entries in the instruction queue.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
FUPool * fuPool
Function unit pool.
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress).
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
unsigned numFreeEntries()
Returns total number of free entries.
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
void resetState()
Resets all instruction queue state.
bool isDrained() const
Determine if we are drained.
unsigned count[MaxThreads]
Per Thread IQ count.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit.
unsigned freeEntries
Number of free IQ entries left.
MemDepUnit memDepUnit[MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void drainSanityCheck() const
Perform sanity checks after a drain.
unsigned numPhysRegs
The number of physical registers in the CPU.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void takeOverFrom()
Takes over execution from another CPU's thread.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
gem5::o3::InstructionQueue::IQIOStats iqIOStats
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an IQ.
Definition inst_queue.cc:87
InstSeqNum squashedSeqNum[MaxThreads]
The sequence number of the squashed instruction.
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
std::list< DynInstPtr >::iterator ListIt
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
void resetEntries()
Resets max entries for all threads.
int countInsts()
Debugging function to count how many entries are in the IQ.
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
std::list< ListOrderEntry >::iterator ListOrderIt
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
ThreadID numThreads
Number of Total Threads.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute.
bool isFull()
Returns whether or not the IQ is full.
void squash(ThreadID tid)
Squashes instructions for a thread.
IEW * iewStage
Pointer to IEW stage.
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
~InstructionQueue()
Destructs the IQ.
void doSquash(ThreadID tid)
Does the actual squashing.
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
int wbOutstanding
Number of instructions currently in flight to FUs.
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.
unsigned maxEntries[MaxThreads]
Max IQ Entries Per Thread.
CPU * cpu
Pointer to the CPU.
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
DependencyGraph< DynInstPtr > dependGraph
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
Statistics container.
Definition group.hh:93
STL list class.
Definition stl.hh:51
Definition test.h:63
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
static const Priority Stat_Event_Pri
Statistics events (dump, reset, etc.) come after everything else, but before exit.
Definition eventq.hh:222
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
Bitfield< 7 > i
Definition misc_types.hh:67
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
const FlagsType total
Print the total.
Definition info.hh:59
const FlagsType dist
Print the distribution.
Definition info.hh:65
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
static const OpClass Num_OpClasses
Definition op_class.hh:135
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
PhysRegId * PhysRegIdPtr
Definition reg_class.hh:510
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ VecRegClass
Vector Register.
Definition reg_class.hh:64
@ VecElemClass
Vector Register Native Elem lane.
Definition reg_class.hh:66
statistics::Scalar vecInstQueueWakeupAccesses
statistics::Scalar intInstQueueWakeupAccesses
IQIOStats(statistics::Group *parent)
statistics::Scalar fpInstQueueWakeupAccesses
statistics::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
statistics::Scalar floatInstsIssued
Stat for number of floating point instructions issued.
statistics::Vector fuBusy
Number of times the FU was busy.
statistics::Scalar instsAdded
Stat for number of instructions added.
statistics::Distribution numIssuedDist
Distribution of number of instructions in the queue.
statistics::Scalar nonSpecInstsAdded
Stat for number of non-speculative instructions added.
statistics::Scalar squashedInstsExamined
Stat for number of squashed instructions examined when squashing.
statistics::Scalar miscInstsIssued
Stat for number of miscellaneous instructions issued.
statistics::Scalar branchInstsIssued
Stat for number of branch instructions issued.
statistics::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
statistics::Scalar memInstsIssued
Stat for number of memory instructions issued.
statistics::Scalar intInstsIssued
Stat for number of integer instructions issued.
statistics::Formula issueRate
Number of instructions issued per cycle.
IQStats(CPU *cpu, const unsigned &total_width)
statistics::Scalar squashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
statistics::Scalar squashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
statistics::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
statistics::Scalar squashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Entry for the list age ordering by op class.
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const

Generated on Mon May 26 2025 09:19:08 for gem5 by doxygen 1.13.2