gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
inst_queue.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2014, 2017-2020 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/inst_queue.hh"
43
44#include <limits>
45#include <vector>
46
47#include "base/logging.hh"
48#include "cpu/o3/dyn_inst.hh"
49#include "cpu/o3/fu_pool.hh"
50#include "cpu/o3/limits.hh"
51#include "debug/IQ.hh"
52#include "enums/OpClass.hh"
53#include "params/BaseO3CPU.hh"
54#include "sim/core.hh"
55
56// clang complains about std::set being overloaded with Packet::set if
57// we open up the entire namespace std
58using std::list;
59
60namespace gem5
61{
62
63namespace o3
64{
65
67 int fu_idx, InstructionQueue *iq_ptr)
69 inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
70{
71}
72
73void
75{
76 iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
77 inst = NULL;
78}
79
80
81const char *
83{
84 return "Functional unit completion";
85}
86
88 const BaseO3CPUParams &params)
89 : cpu(cpu_ptr),
90 iewStage(iew_ptr),
91 fuPool(params.fuPool),
92 iqPolicy(params.smtIQPolicy),
93 numThreads(params.numThreads),
94 numEntries(params.numIQEntries),
95 totalWidth(params.issueWidth),
99{
100 assert(fuPool);
101
102 const auto &reg_classes = params.isa[0]->regClasses();
103 // Set the number of total physical registers
104 // As the vector registers have two addressing modes, they are added twice
105 numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs +
106 params.numPhysVecRegs +
107 params.numPhysVecRegs * (
108 reg_classes.at(VecElemClass)->numRegs() /
109 reg_classes.at(VecRegClass)->numRegs()) +
110 params.numPhysVecPredRegs +
111 params.numPhysMatRegs +
112 params.numPhysCCRegs;
113
114 //Create an entry for each physical register within the
115 //dependency graph.
116 dependGraph.resize(numPhysRegs);
117
118 // Resize the register scoreboard.
120
121 //Initialize Mem Dependence Units
122 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
123 memDepUnit[tid].init(params, tid, cpu_ptr);
124 memDepUnit[tid].setIQ(this);
125 }
126
127 resetState();
128
129 //Figure out resource sharing policy
130 if (iqPolicy == SMTQueuePolicy::Dynamic) {
131 //Set Max Entries to Total ROB Capacity
132 for (ThreadID tid = 0; tid < numThreads; tid++) {
133 maxEntries[tid] = numEntries;
134 }
135
136 } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
137 //@todo:make work if part_amt doesnt divide evenly.
138 int part_amt = numEntries / numThreads;
139
140 //Divide ROB up evenly
141 for (ThreadID tid = 0; tid < numThreads; tid++) {
142 maxEntries[tid] = part_amt;
143 }
144
145 DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
146 "%i entries per thread.\n",part_amt);
147 } else if (iqPolicy == SMTQueuePolicy::Threshold) {
148 double threshold = (double)params.smtIQThreshold / 100;
149
150 int thresholdIQ = (int)((double)threshold * numEntries);
151
152 //Divide up by threshold amount
153 for (ThreadID tid = 0; tid < numThreads; tid++) {
154 maxEntries[tid] = thresholdIQ;
155 }
156
157 DPRINTF(IQ, "IQ sharing policy set to Threshold:"
158 "%i entries per thread.\n",thresholdIQ);
159 }
160 for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
161 maxEntries[tid] = 0;
162 }
163}
164
166{
167 dependGraph.reset();
168#ifdef GEM5_DEBUG
169 cprintf("Nodes traversed: %i, removed: %i\n",
170 dependGraph.nodesTraversed, dependGraph.nodesRemoved);
171#endif
172}
173
174std::string
176{
177 return cpu->name() + ".iq";
178}
179
180InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
181 : statistics::Group(cpu),
182 ADD_STAT(instsAdded, statistics::units::Count::get(),
183 "Number of instructions added to the IQ (excludes non-spec)"),
185 "Number of non-speculative instructions added to the IQ"),
186 ADD_STAT(instsIssued, statistics::units::Count::get(),
187 "Number of instructions issued"),
188 ADD_STAT(intInstsIssued, statistics::units::Count::get(),
189 "Number of integer instructions issued"),
191 "Number of float instructions issued"),
193 "Number of branch instructions issued"),
194 ADD_STAT(memInstsIssued, statistics::units::Count::get(),
195 "Number of memory instructions issued"),
197 "Number of miscellaneous instructions issued"),
199 "Number of squashed instructions issued"),
201 "Number of squashed instructions iterated over during squash; "
202 "mainly for profiling"),
204 "Number of squashed operands that are examined and possibly "
205 "removed from graph"),
207 "Number of squashed non-spec instructions that were removed"),
208 ADD_STAT(numIssuedDist, statistics::units::Count::get(),
209 "Number of insts issued each cycle"),
210 ADD_STAT(statFuBusy, statistics::units::Count::get(),
211 "attempts to use FU when none available"),
212 ADD_STAT(issuedInstType, statistics::units::Count::get(),
213 "Number of instructions issued per FU type, per thread"),
215 statistics::units::Rate<statistics::units::Count,
216 statistics::units::Cycle>::get(),
217 "Inst issue rate", instsIssued / cpu->baseStats.numCycles),
218 ADD_STAT(fuBusy, statistics::units::Count::get(),
219 "FU busy when requested"),
221 statistics::units::Rate<statistics::units::Count,
222 statistics::units::Count>::get(),
223 "FU busy rate (busy events/executed inst)")
224{
226 .prereq(instsAdded);
227
229 .prereq(nonSpecInstsAdded);
230
232 .prereq(instsIssued);
233
235 .prereq(intInstsIssued);
236
238 .prereq(floatInstsIssued);
239
241 .prereq(branchInstsIssued);
242
244 .prereq(memInstsIssued);
245
247 .prereq(miscInstsIssued);
248
250 .prereq(squashedInstsIssued);
251
253 .prereq(squashedInstsExamined);
254
257
259 .prereq(squashedNonSpecRemoved);
260/*
261 queueResDist
262 .init(Num_OpClasses, 0, 99, 2)
263 .name(name() + ".IQ:residence:")
264 .desc("cycles from dispatch to issue")
265 .flags(total | pdf | cdf )
266 ;
267 for (int i = 0; i < Num_OpClasses; ++i) {
268 queueResDist.subname(i, opClassStrings[i]);
269 }
270*/
272 .init(0,total_width,1)
273 .flags(statistics::pdf)
274 ;
275/*
276 dist_unissued
277 .init(Num_OpClasses+2)
278 .name(name() + ".unissued_cause")
279 .desc("Reason ready instruction not issued")
280 .flags(pdf | dist)
281 ;
282 for (int i=0; i < (Num_OpClasses + 2); ++i) {
283 dist_unissued.subname(i, unissued_names[i]);
284 }
285*/
286 issuedInstType.init(cpu->numThreads, enums::Num_OpClass)
288 issuedInstType.ysubnames(enums::OpClassStrings);
289
290 //
291 // How long did instructions for a particular FU type wait prior to issue
292 //
293/*
294 issueDelayDist
295 .init(Num_OpClasses,0,99,2)
296 .name(name() + ".")
297 .desc("cycles from operands ready to issue")
298 .flags(pdf | cdf)
299 ;
300 for (int i=0; i<Num_OpClasses; ++i) {
301 std::stringstream subname;
302 subname << opClassStrings[i] << "_delay";
303 issueDelayDist.subname(i, subname.str());
304 }
305*/
307 .flags(statistics::total)
308 ;
309
311 .init(Num_OpClasses)
313 ;
314 for (int i=0; i < Num_OpClasses; ++i) {
315 statFuBusy.subname(i, enums::OpClassStrings[i]);
316 }
317
318 fuBusy
319 .init(cpu->numThreads)
320 .flags(statistics::total)
321 ;
322
324 .flags(statistics::total)
325 ;
327}
328
330 : statistics::Group(parent),
332 "Number of integer instruction queue reads"),
334 "Number of integer instruction queue writes"),
336 "Number of integer instruction queue wakeup accesses"),
338 "Number of floating instruction queue reads"),
340 "Number of floating instruction queue writes"),
342 "Number of floating instruction queue wakeup accesses"),
344 "Number of vector instruction queue reads"),
346 "Number of vector instruction queue writes"),
348 "Number of vector instruction queue wakeup accesses"),
349 ADD_STAT(intAluAccesses, statistics::units::Count::get(),
350 "Number of integer alu accesses"),
351 ADD_STAT(fpAluAccesses, statistics::units::Count::get(),
352 "Number of floating point alu accesses"),
353 ADD_STAT(vecAluAccesses, statistics::units::Count::get(),
354 "Number of vector alu accesses")
355{
356 using namespace statistics;
358 .flags(total);
359
361 .flags(total);
362
364 .flags(total);
365
367 .flags(total);
368
370 .flags(total);
371
373 .flags(total);
374
376 .flags(total);
377
379 .flags(total);
380
382 .flags(total);
383
385 .flags(total);
386
388 .flags(total);
389
391 .flags(total);
392}
393
394void
396{
397 //Initialize thread IQ counts
398 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
399 count[tid] = 0;
400 instList[tid].clear();
401 }
402
403 // Initialize the number of free IQ entries.
405
406 // Note that in actuality, the registers corresponding to the logical
407 // registers start off as ready. However this doesn't matter for the
408 // IQ as the instruction should have been correctly told if those
409 // registers are ready in rename. Thus it can all be initialized as
410 // unready.
411 for (int i = 0; i < numPhysRegs; ++i) {
412 regScoreboard[i] = false;
413 }
414
415 for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
416 squashedSeqNum[tid] = 0;
417 }
418
419 for (int i = 0; i < Num_OpClasses; ++i) {
420 while (!readyInsts[i].empty())
421 readyInsts[i].pop();
422 queueOnList[i] = false;
423 readyIt[i] = listOrder.end();
424 }
425 nonSpecInsts.clear();
426 listOrder.clear();
427 deferredMemInsts.clear();
428 blockedMemInsts.clear();
429 retryMemInsts.clear();
430 wbOutstanding = 0;
431}
432
433void
438
439void
444
445void
452
453bool
455{
456 bool drained = dependGraph.empty() &&
457 instsToExecute.empty() &&
458 wbOutstanding == 0;
459 for (ThreadID tid = 0; tid < numThreads; ++tid)
460 drained = drained && memDepUnit[tid].isDrained();
461
462 return drained;
463}
464
465void
467{
468 assert(dependGraph.empty());
469 assert(instsToExecute.empty());
470 for (ThreadID tid = 0; tid < numThreads; ++tid)
472}
473
474void
479
480int
482{
483 if (iqPolicy == SMTQueuePolicy::Partitioned) {
484 return numEntries / num_threads;
485 } else {
486 return 0;
487 }
488}
489
490
491void
493{
494 if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
495 int active_threads = activeThreads->size();
496
497 for (ThreadID tid : *activeThreads) {
498 if (iqPolicy == SMTQueuePolicy::Partitioned) {
499 maxEntries[tid] = numEntries / active_threads;
500 } else if (iqPolicy == SMTQueuePolicy::Threshold &&
501 active_threads == 1) {
502 maxEntries[tid] = numEntries;
503 }
504 }
505 }
506}
507
508unsigned
513
514unsigned
516{
517 return maxEntries[tid] - count[tid];
518}
519
520// Might want to do something more complex if it knows how many instructions
521// will be issued this cycle.
522bool
524{
525 if (freeEntries == 0) {
526 return(true);
527 } else {
528 return(false);
529 }
530}
531
532bool
534{
535 if (numFreeEntries(tid) == 0) {
536 return(true);
537 } else {
538 return(false);
539 }
540}
541
542bool
544{
545 if (!listOrder.empty()) {
546 return true;
547 }
548
549 for (int i = 0; i < Num_OpClasses; ++i) {
550 if (!readyInsts[i].empty()) {
551 return true;
552 }
553 }
554
555 return false;
556}
557
558void
560{
561 if (new_inst->isFloating()) {
562 iqIOStats.fpInstQueueWrites++;
563 } else if (new_inst->isVector()) {
564 iqIOStats.vecInstQueueWrites++;
565 } else {
566 iqIOStats.intInstQueueWrites++;
567 }
568 // Make sure the instruction is valid
569 assert(new_inst);
570
571 DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
572 new_inst->seqNum, new_inst->pcState());
573
574 assert(freeEntries != 0);
575
576 instList[new_inst->threadNumber].push_back(new_inst);
577
578 --freeEntries;
579
580 new_inst->setInIQ();
581
582 // Look through its source registers (physical regs), and mark any
583 // dependencies.
584 addToDependents(new_inst);
585
586 // Have this instruction set itself as the producer of its destination
587 // register(s).
588 addToProducers(new_inst);
589
590 if (new_inst->isMemRef()) {
591 memDepUnit[new_inst->threadNumber].insert(new_inst);
592 } else {
593 addIfReady(new_inst);
594 }
595
596 ++iqStats.instsAdded;
597
598 count[new_inst->threadNumber]++;
599
600 assert(freeEntries == (numEntries - countInsts()));
601}
602
603void
605{
606 // @todo: Clean up this code; can do it by setting inst as unable
607 // to issue, then calling normal insert on the inst.
608 if (new_inst->isFloating()) {
609 iqIOStats.fpInstQueueWrites++;
610 } else if (new_inst->isVector()) {
611 iqIOStats.vecInstQueueWrites++;
612 } else {
613 iqIOStats.intInstQueueWrites++;
614 }
615
616 assert(new_inst);
617
618 nonSpecInsts[new_inst->seqNum] = new_inst;
619
620 DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
621 "to the IQ.\n",
622 new_inst->seqNum, new_inst->pcState());
623
624 assert(freeEntries != 0);
625
626 instList[new_inst->threadNumber].push_back(new_inst);
627
628 --freeEntries;
629
630 new_inst->setInIQ();
631
632 // Have this instruction set itself as the producer of its destination
633 // register(s).
634 addToProducers(new_inst);
635
636 // If it's a memory instruction, add it to the memory dependency
637 // unit.
638 if (new_inst->isMemRef()) {
639 memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
640 }
641
642 ++iqStats.nonSpecInstsAdded;
643
644 count[new_inst->threadNumber]++;
645
646 assert(freeEntries == (numEntries - countInsts()));
647}
648
649void
651{
652 memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
653
654 insertNonSpec(barr_inst);
655}
656
659{
660 assert(!instsToExecute.empty());
661 DynInstPtr inst = std::move(instsToExecute.front());
662 instsToExecute.pop_front();
663 if (inst->isFloating()) {
664 iqIOStats.fpInstQueueReads++;
665 } else if (inst->isVector()) {
666 iqIOStats.vecInstQueueReads++;
667 } else {
668 iqIOStats.intInstQueueReads++;
669 }
670 return inst;
671}
672
673void
675{
676 assert(!readyInsts[op_class].empty());
677
678 ListOrderEntry queue_entry;
679
680 queue_entry.queueType = op_class;
681
682 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
683
684 ListOrderIt list_it = listOrder.begin();
685 ListOrderIt list_end_it = listOrder.end();
686
687 while (list_it != list_end_it) {
688 if ((*list_it).oldestInst > queue_entry.oldestInst) {
689 break;
690 }
691
692 list_it++;
693 }
694
695 readyIt[op_class] = listOrder.insert(list_it, queue_entry);
696 queueOnList[op_class] = true;
697}
698
699void
701{
702 // Get iterator of next item on the list
703 // Delete the original iterator
704 // Determine if the next item is either the end of the list or younger
705 // than the new instruction. If so, then add in a new iterator right here.
706 // If not, then move along.
707 ListOrderEntry queue_entry;
708 OpClass op_class = (*list_order_it).queueType;
709 ListOrderIt next_it = list_order_it;
710
711 ++next_it;
712
713 queue_entry.queueType = op_class;
714 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
715
716 while (next_it != listOrder.end() &&
717 (*next_it).oldestInst < queue_entry.oldestInst) {
718 ++next_it;
719 }
720
721 readyIt[op_class] = listOrder.insert(next_it, queue_entry);
722}
723
724void
726{
727 DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
728 assert(!cpu->switchedOut());
729 // The CPU could have been sleeping until this op completed (*extremely*
730 // long latency op). Wake it if it was. This may be overkill.
732 iewStage->wakeCPU();
733
734 if (fu_idx > -1)
735 fuPool->freeUnitNextCycle(fu_idx);
736
737 // @todo: Ensure that these FU Completions happen at the beginning
738 // of a cycle, otherwise they could add too many instructions to
739 // the queue.
740 issueToExecuteQueue->access(-1)->size++;
741 instsToExecute.push_back(inst);
742}
743
744// @todo: Figure out a better way to remove the squashed items from the
745// lists. Checking the top item of each list to see if it's squashed
746// wastes time and forces jumps.
747void
749{
750 DPRINTF(IQ, "Attempting to schedule ready instructions from "
751 "the IQ.\n");
752
753 IssueStruct *i2e_info = issueToExecuteQueue->access(0);
754
755 DynInstPtr mem_inst;
756 while ((mem_inst = getDeferredMemInstToExecute())) {
757 addReadyMemInst(mem_inst);
758 }
759
760 // See if any cache blocked instructions are able to be executed
761 while ((mem_inst = getBlockedMemInstToExecute())) {
762 addReadyMemInst(mem_inst);
763 }
764
765 // Have iterator to head of the list
766 // While I haven't exceeded bandwidth or reached the end of the list,
767 // Try to get a FU that can do what this op needs.
768 // If successful, change the oldestInst to the new top of the list, put
769 // the queue in the proper place in the list.
770 // Increment the iterator.
771 // This will avoid trying to schedule a certain op class if there are no
772 // FUs that handle it.
773 int total_issued = 0;
774 ListOrderIt order_it = listOrder.begin();
775 ListOrderIt order_end_it = listOrder.end();
776
777 while (total_issued < totalWidth && order_it != order_end_it) {
778 OpClass op_class = (*order_it).queueType;
779
780 assert(!readyInsts[op_class].empty());
781
782 DynInstPtr issuing_inst = readyInsts[op_class].top();
783
784 if (issuing_inst->isFloating()) {
785 iqIOStats.fpInstQueueReads++;
786 } else if (issuing_inst->isVector()) {
787 iqIOStats.vecInstQueueReads++;
788 } else {
789 iqIOStats.intInstQueueReads++;
790 }
791
792 assert(issuing_inst->seqNum == (*order_it).oldestInst);
793
794 if (issuing_inst->isSquashed()) {
795 readyInsts[op_class].pop();
796
797 if (!readyInsts[op_class].empty()) {
798 moveToYoungerInst(order_it);
799 } else {
800 readyIt[op_class] = listOrder.end();
801 queueOnList[op_class] = false;
802 }
803
804 listOrder.erase(order_it++);
805
806 ++iqStats.squashedInstsIssued;
807
808 continue;
809 }
810
811 int idx = FUPool::NoNeedFU;
812 Cycles op_latency = Cycles(1);
813 ThreadID tid = issuing_inst->threadNumber;
814
815 if (op_class != No_OpClass) {
816 idx = fuPool->getUnit(op_class);
817 if (issuing_inst->isFloating()) {
818 iqIOStats.fpAluAccesses++;
819 } else if (issuing_inst->isVector()) {
820 iqIOStats.vecAluAccesses++;
821 } else {
822 iqIOStats.intAluAccesses++;
823 }
824 if (idx > FUPool::NoFreeFU) {
825 op_latency = fuPool->getOpLatency(op_class);
826 }
827 }
828
829 // If we have an instruction that doesn't require a FU, or a
830 // valid FU, then schedule for execution.
831 if (idx > FUPool::NoFreeFU || idx == FUPool::NoNeedFU ||
832 idx == FUPool::NoCapableFU) {
833 if (op_latency == Cycles(1)) {
834 i2e_info->size++;
835 instsToExecute.push_back(issuing_inst);
836
837 // Add the FU onto the list of FU's to be freed next
838 // cycle if we used one.
839 if (idx >= 0)
840 fuPool->freeUnitNextCycle(idx);
841
842 // CPU has no capable FU for the instruction
843 // but this may be OK if the instruction gets
844 // squashed. Remember this and give IEW
845 // the opportunity to trigger a fault
846 // if the instruction is unsupported.
847 // Otherwise, commit will panic.
848 if (idx == FUPool::NoCapableFU)
849 issuing_inst->setNoCapableFU();
850 } else {
851 assert(idx != FUPool::NoCapableFU);
852 bool pipelined = fuPool->isPipelined(op_class);
853 // Generate completion event for the FU
855 FUCompletion *execution = new FUCompletion(issuing_inst,
856 idx, this);
857
858 cpu->schedule(execution,
859 cpu->clockEdge(Cycles(op_latency - 1)));
860
861 if (!pipelined) {
862 // If FU isn't pipelined, then it must be freed
863 // upon the execution completing.
864 execution->setFreeFU();
865 } else {
866 // Add the FU onto the list of FU's to be freed next cycle.
867 fuPool->freeUnitNextCycle(idx);
868 }
869 }
870
871 DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
872 "[sn:%llu]\n",
873 tid, issuing_inst->pcState(),
874 issuing_inst->seqNum);
875
876 readyInsts[op_class].pop();
877
878 if (!readyInsts[op_class].empty()) {
879 moveToYoungerInst(order_it);
880 } else {
881 readyIt[op_class] = listOrder.end();
882 queueOnList[op_class] = false;
883 }
884
885 issuing_inst->setIssued();
886 ++total_issued;
887
888#if TRACING_ON
889 issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
890#endif
891
892 if (issuing_inst->firstIssue == -1)
893 issuing_inst->firstIssue = curTick();
894
895 if (!issuing_inst->isMemRef()) {
896 // Memory instructions can not be freed from the IQ until they
897 // complete.
898 ++freeEntries;
899 count[tid]--;
900 issuing_inst->clearInIQ();
901 } else {
902 memDepUnit[tid].issue(issuing_inst);
903 }
904
905 listOrder.erase(order_it++);
906 iqStats.issuedInstType[tid][op_class]++;
907 } else {
908 assert(idx == FUPool::NoFreeFU);
909 iqStats.statFuBusy[op_class]++;
910 iqStats.fuBusy[tid]++;
911 ++order_it;
912 }
913 }
914
915 iqStats.numIssuedDist.sample(total_issued);
916 iqStats.instsIssued+= total_issued;
917
918 // If we issued any instructions, tell the CPU we had activity.
919 // @todo If the way deferred memory instructions are handeled due to
920 // translation changes then the deferredMemInsts condition should be
921 // removed from the code below.
922 if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
923 cpu->activityThisCycle();
924 } else {
925 DPRINTF(IQ, "Not able to schedule any instructions.\n");
926 }
927}
928
929void
931{
932 DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
933 "to execute.\n", inst);
934
935 NonSpecMapIt inst_it = nonSpecInsts.find(inst);
936
937 assert(inst_it != nonSpecInsts.end());
938
939 ThreadID tid = (*inst_it).second->threadNumber;
940
941 (*inst_it).second->setAtCommit();
942
943 (*inst_it).second->setCanIssue();
944
945 if (!(*inst_it).second->isMemRef()) {
946 addIfReady((*inst_it).second);
947 } else {
948 memDepUnit[tid].nonSpecInstReady((*inst_it).second);
949 }
950
951 (*inst_it).second = NULL;
952
953 nonSpecInsts.erase(inst_it);
954}
955
956void
958{
959 DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
960 tid,inst);
961
962 ListIt iq_it = instList[tid].begin();
963
964 while (iq_it != instList[tid].end() &&
965 (*iq_it)->seqNum <= inst) {
966 ++iq_it;
967 instList[tid].pop_front();
968 }
969
970 assert(freeEntries == (numEntries - countInsts()));
971}
972
973int
975{
976 int dependents = 0;
977
978 // The instruction queue here takes care of both floating and int ops
979 if (completed_inst->isFloating()) {
980 iqIOStats.fpInstQueueWakeupAccesses++;
981 } else if (completed_inst->isVector()) {
982 iqIOStats.vecInstQueueWakeupAccesses++;
983 } else {
984 iqIOStats.intInstQueueWakeupAccesses++;
985 }
986
987 completed_inst->lastWakeDependents = curTick();
988
989 DPRINTF(IQ, "Waking dependents of completed instruction.\n");
990
991 assert(!completed_inst->isSquashed());
992
993 // Tell the memory dependence unit to wake any dependents on this
994 // instruction if it is a memory instruction. Also complete the memory
995 // instruction at this point since we know it executed without issues.
996 ThreadID tid = completed_inst->threadNumber;
997 if (completed_inst->isMemRef()) {
998 memDepUnit[tid].completeInst(completed_inst);
999
1000 DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
1001 completed_inst->pcState(), completed_inst->seqNum);
1002
1003 ++freeEntries;
1004 completed_inst->memOpDone(true);
1005 count[tid]--;
1006 } else if (completed_inst->isReadBarrier() ||
1007 completed_inst->isWriteBarrier()) {
1008 // Completes a non mem ref barrier
1009 memDepUnit[tid].completeInst(completed_inst);
1010 }
1011
1012 for (int dest_reg_idx = 0;
1013 dest_reg_idx < completed_inst->numDestRegs();
1014 dest_reg_idx++)
1015 {
1016 PhysRegIdPtr dest_reg =
1017 completed_inst->renamedDestIdx(dest_reg_idx);
1018
1019 // Special case of uniq or control registers. They are not
1020 // handled by the IQ and thus have no dependency graph entry.
1021 if (dest_reg->isFixedMapping()) {
1022 DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1023 dest_reg->index(), dest_reg->className());
1024 continue;
1025 }
1026
1027 // Avoid waking up dependents if the register is pinned
1029 if (dest_reg->isPinned())
1030 completed_inst->setPinnedRegsWritten();
1031
1032 if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1033 DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1034 dest_reg->index(), dest_reg->className());
1035 continue;
1036 }
1037
1038 DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1039 dest_reg->index(),
1040 dest_reg->className());
1041
1042 //Go through the dependency chain, marking the registers as
1043 //ready within the waiting instructions.
1044 DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1045
1046 while (dep_inst) {
1047 DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1048 "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1049
1050 // Might want to give more information to the instruction
1051 // so that it knows which of its source registers is
1052 // ready. However that would mean that the dependency
1053 // graph entries would need to hold the src_reg_idx.
1054 dep_inst->markSrcRegReady();
1055
1056 addIfReady(dep_inst);
1057
1058 dep_inst = dependGraph.pop(dest_reg->flatIndex());
1059
1060 ++dependents;
1061 }
1062
1063 // Reset the head node now that all of its dependents have
1064 // been woken up.
1065 assert(dependGraph.empty(dest_reg->flatIndex()));
1066 dependGraph.clearInst(dest_reg->flatIndex());
1067
1068 // Mark the scoreboard as having that register ready.
1069 regScoreboard[dest_reg->flatIndex()] = true;
1070 }
1071 return dependents;
1072}
1073
1074void
1076{
1077 OpClass op_class = ready_inst->opClass();
1078
1079 assert(op_class < Num_OpClasses);
1080
1081 readyInsts[op_class].push(ready_inst);
1082
1083 // Will need to reorder the list if either a queue is not on the list,
1084 // or it has an older instruction than last time.
1085 if (!queueOnList[op_class]) {
1086 addToOrderList(op_class);
1087 } else if (readyInsts[op_class].top()->seqNum <
1088 (*readyIt[op_class]).oldestInst) {
1089 listOrder.erase(readyIt[op_class]);
1090 addToOrderList(op_class);
1091 }
1092
1093 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1094 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1095 ready_inst->pcState(), op_class, ready_inst->seqNum);
1096}
1097
1098void
1100{
1101 DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1102
1103 // Reset DTB translation state
1104 resched_inst->translationStarted(false);
1105 resched_inst->translationCompleted(false);
1106
1107 resched_inst->clearCanIssue();
1108 memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1109}
1110
1111void
1113{
1114 memDepUnit[replay_inst->threadNumber].replay();
1115}
1116
1117void
1119{
1120 deferredMemInsts.push_back(deferred_inst);
1121}
1122
1123void
1125{
1126 blocked_inst->clearIssued();
1127 blocked_inst->clearCanIssue();
1128 blockedMemInsts.push_back(blocked_inst);
1129 DPRINTF(IQ, "Memory inst [sn:%llu] PC %s is blocked, will be "
1130 "reissued later\n", blocked_inst->seqNum,
1131 blocked_inst->pcState());
1132}
1133
1134void
1136{
1137 retryMemInsts.push_back(retry_inst);
1138}
1139
1140void
1142{
1143 DPRINTF(IQ, "Cache is unblocked, rescheduling blocked memory "
1144 "instructions\n");
1146 // Get the CPU ticking again
1147 cpu->wakeCPU();
1148}
1149
1152{
1153 for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1154 ++it) {
1155 if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1156 DynInstPtr mem_inst = std::move(*it);
1157 deferredMemInsts.erase(it);
1158 return mem_inst;
1159 }
1160 }
1161 return nullptr;
1162}
1163
1166{
1167 if (retryMemInsts.empty()) {
1168 return nullptr;
1169 } else {
1170 DynInstPtr mem_inst = std::move(retryMemInsts.front());
1171 retryMemInsts.pop_front();
1172 return mem_inst;
1173 }
1174}
1175
1176void
1178 const DynInstPtr &faulting_load)
1179{
1180 iqIOStats.intInstQueueWrites++;
1181 memDepUnit[store->threadNumber].violation(store, faulting_load);
1182}
1183
1184void
1186{
1187 DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1188 "the IQ.\n", tid);
1189
1190 // Read instruction sequence number of last instruction out of the
1191 // time buffer.
1192 squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1193
1194 doSquash(tid);
1195
1196 // Also tell the memory dependence unit to squash.
1197 memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1198}
1199
1200void
1202{
1203 // Start at the tail.
1204 ListIt squash_it = instList[tid].end();
1205 --squash_it;
1206
1207 DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1208 tid, squashedSeqNum[tid]);
1209
1210 // Squash any instructions younger than the squashed sequence number
1211 // given.
1212 while (squash_it != instList[tid].end() &&
1213 (*squash_it)->seqNum > squashedSeqNum[tid]) {
1214
1215 DynInstPtr squashed_inst = (*squash_it);
1216 if (squashed_inst->isFloating()) {
1217 iqIOStats.fpInstQueueWrites++;
1218 } else if (squashed_inst->isVector()) {
1219 iqIOStats.vecInstQueueWrites++;
1220 } else {
1221 iqIOStats.intInstQueueWrites++;
1222 }
1223
1224 // Only handle the instruction if it actually is in the IQ and
1225 // hasn't already been squashed in the IQ.
1226 if (squashed_inst->threadNumber != tid ||
1227 squashed_inst->isSquashedInIQ()) {
1228 --squash_it;
1229 continue;
1230 }
1231
1232 if (!squashed_inst->isIssued() ||
1233 (squashed_inst->isMemRef() &&
1234 !squashed_inst->memOpDone())) {
1235
1236 DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1237 tid, squashed_inst->seqNum, squashed_inst->pcState());
1238
1239 bool is_acq_rel = squashed_inst->isFullMemBarrier() &&
1240 (squashed_inst->isLoad() ||
1241 (squashed_inst->isStore() &&
1242 !squashed_inst->isStoreConditional()));
1243
1244 // Remove the instruction from the dependency list.
1245 if (is_acq_rel ||
1246 (!squashed_inst->isNonSpeculative() &&
1247 !squashed_inst->isStoreConditional() &&
1248 !squashed_inst->isAtomic() &&
1249 !squashed_inst->isReadBarrier() &&
1250 !squashed_inst->isWriteBarrier())) {
1251
1252 for (int src_reg_idx = 0;
1253 src_reg_idx < squashed_inst->numSrcRegs();
1254 src_reg_idx++)
1255 {
1256 PhysRegIdPtr src_reg =
1257 squashed_inst->renamedSrcIdx(src_reg_idx);
1258
1259 // Only remove it from the dependency graph if it
1260 // was placed there in the first place.
1261
1262 // Instead of doing a linked list traversal, we
1263 // can just remove these squashed instructions
1264 // either at issue time, or when the register is
1265 // overwritten. The only downside to this is it
1266 // leaves more room for error.
1267
1268 if (!squashed_inst->readySrcIdx(src_reg_idx) &&
1269 !src_reg->isFixedMapping()) {
1270 dependGraph.remove(src_reg->flatIndex(),
1271 squashed_inst);
1272 }
1273
1274 ++iqStats.squashedOperandsExamined;
1275 }
1276
1277 } else if (!squashed_inst->isStoreConditional() ||
1278 !squashed_inst->isCompleted()) {
1279 NonSpecMapIt ns_inst_it =
1280 nonSpecInsts.find(squashed_inst->seqNum);
1281
1282 // we remove non-speculative instructions from
1283 // nonSpecInsts already when they are ready, and so we
1284 // cannot always expect to find them
1285 if (ns_inst_it == nonSpecInsts.end()) {
1286 // loads that became ready but stalled on a
1287 // blocked cache are alreayd removed from
1288 // nonSpecInsts, and have not faulted
1289 assert(squashed_inst->getFault() != NoFault ||
1290 squashed_inst->isMemRef());
1291 } else {
1292
1293 (*ns_inst_it).second = NULL;
1294
1295 nonSpecInsts.erase(ns_inst_it);
1296
1297 ++iqStats.squashedNonSpecRemoved;
1298 }
1299 }
1300
1301 // Might want to also clear out the head of the dependency graph.
1302
1303 // Mark it as squashed within the IQ.
1304 squashed_inst->setSquashedInIQ();
1305
1306 // @todo: Remove this hack where several statuses are set so the
1307 // inst will flow through the rest of the pipeline.
1308 squashed_inst->setIssued();
1309 squashed_inst->setCanCommit();
1310 squashed_inst->clearInIQ();
1311
1312 //Update Thread IQ Count
1313 count[squashed_inst->threadNumber]--;
1314
1315 ++freeEntries;
1316 }
1317
1318 // IQ clears out the heads of the dependency graph only when
1319 // instructions reach writeback stage. If an instruction is squashed
1320 // before writeback stage, its head of dependency graph would not be
1321 // cleared out; it holds the instruction's DynInstPtr. This
1322 // prevents freeing the squashed instruction's DynInst.
1323 // Thus, we need to manually clear out the squashed instructions'
1324 // heads of dependency graph.
1325 for (int dest_reg_idx = 0;
1326 dest_reg_idx < squashed_inst->numDestRegs();
1327 dest_reg_idx++)
1328 {
1329 PhysRegIdPtr dest_reg =
1330 squashed_inst->renamedDestIdx(dest_reg_idx);
1331 if (dest_reg->isFixedMapping()){
1332 continue;
1333 }
1334 assert(dependGraph.empty(dest_reg->flatIndex()));
1335 dependGraph.clearInst(dest_reg->flatIndex());
1336 }
1337 instList[tid].erase(squash_it--);
1338 ++iqStats.squashedInstsExamined;
1339 }
1340}
1341
1342bool
1344 const DynInstPtr &lhs, const DynInstPtr &rhs) const
1345{
1346 return lhs->seqNum > rhs->seqNum;
1347}
1348
1349bool
1351{
1352 // Loop through the instruction's source registers, adding
1353 // them to the dependency list if they are not ready.
1354 int8_t total_src_regs = new_inst->numSrcRegs();
1355 bool return_val = false;
1356
1357 for (int src_reg_idx = 0;
1358 src_reg_idx < total_src_regs;
1359 src_reg_idx++)
1360 {
1361 // Only add it to the dependency graph if it's not ready.
1362 if (!new_inst->readySrcIdx(src_reg_idx)) {
1363 PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
1364
1365 // Check the IQ's scoreboard to make sure the register
1366 // hasn't become ready while the instruction was in flight
1367 // between stages. Only if it really isn't ready should
1368 // it be added to the dependency graph.
1369 if (src_reg->isFixedMapping()) {
1370 continue;
1371 } else if (!regScoreboard[src_reg->flatIndex()]) {
1372 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1373 "is being added to the dependency chain.\n",
1374 new_inst->pcState(), src_reg->index(),
1375 src_reg->className());
1376
1377 dependGraph.insert(src_reg->flatIndex(), new_inst);
1378
1379 // Change the return value to indicate that something
1380 // was added to the dependency graph.
1381 return_val = true;
1382 } else {
1383 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1384 "became ready before it reached the IQ.\n",
1385 new_inst->pcState(), src_reg->index(),
1386 src_reg->className());
1387 // Mark a register ready within the instruction.
1388 new_inst->markSrcRegReady(src_reg_idx);
1389 }
1390 }
1391 }
1392
1393 return return_val;
1394}
1395
1396void
1398{
1399 // Nothing really needs to be marked when an instruction becomes
1400 // the producer of a register's value, but for convenience a ptr
1401 // to the producing instruction will be placed in the head node of
1402 // the dependency links.
1403 int8_t total_dest_regs = new_inst->numDestRegs();
1404
1405 for (int dest_reg_idx = 0;
1406 dest_reg_idx < total_dest_regs;
1407 dest_reg_idx++)
1408 {
1409 PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
1410
1411 // Some registers have fixed mapping, and there is no need to track
1412 // dependencies as these instructions must be executed at commit.
1413 if (dest_reg->isFixedMapping()) {
1414 continue;
1415 }
1416
1417 if (!dependGraph.empty(dest_reg->flatIndex())) {
1418 dependGraph.dump();
1419 panic("Dependency graph %i (%s) (flat: %i) not empty!",
1420 dest_reg->index(), dest_reg->className(),
1421 dest_reg->flatIndex());
1422 }
1423
1424 dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1425
1426 // Mark the scoreboard to say it's not yet ready.
1427 regScoreboard[dest_reg->flatIndex()] = false;
1428 }
1429}
1430
1431void
1433{
1434 // If the instruction now has all of its source registers
1435 // available, then add it to the list of ready instructions.
1436 if (inst->readyToIssue()) {
1437
1438 //Add the instruction to the proper ready list.
1439 if (inst->isMemRef()) {
1440
1441 DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1442
1443 // Message to the mem dependence unit that this instruction has
1444 // its registers ready.
1445 memDepUnit[inst->threadNumber].regsReady(inst);
1446
1447 return;
1448 }
1449
1450 OpClass op_class = inst->opClass();
1451
1452 assert(op_class < Num_OpClasses);
1453
1454 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1455 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1456 inst->pcState(), op_class, inst->seqNum);
1457
1458 readyInsts[op_class].push(inst);
1459
1460 // Will need to reorder the list if either a queue is not on the list,
1461 // or it has an older instruction than last time.
1462 if (!queueOnList[op_class]) {
1463 addToOrderList(op_class);
1464 } else if (readyInsts[op_class].top()->seqNum <
1465 (*readyIt[op_class]).oldestInst) {
1466 listOrder.erase(readyIt[op_class]);
1467 addToOrderList(op_class);
1468 }
1469 }
1470}
1471
1472int
1477
1478void
1480{
1481 for (int i = 0; i < Num_OpClasses; ++i) {
1482 cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1483
1484 cprintf("\n");
1485 }
1486
1487 cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1488
1489 NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1490 NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1491
1492 cprintf("Non speculative list: ");
1493
1494 while (non_spec_it != non_spec_end_it) {
1495 cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1496 (*non_spec_it).second->seqNum);
1497 ++non_spec_it;
1498 }
1499
1500 cprintf("\n");
1501
1502 ListOrderIt list_order_it = listOrder.begin();
1503 ListOrderIt list_order_end_it = listOrder.end();
1504 int i = 1;
1505
1506 cprintf("List order: ");
1507
1508 while (list_order_it != list_order_end_it) {
1509 cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1510 (*list_order_it).oldestInst);
1511
1512 ++list_order_it;
1513 ++i;
1514 }
1515
1516 cprintf("\n");
1517}
1518
1519
1520void
1522{
1523 for (ThreadID tid = 0; tid < numThreads; ++tid) {
1524 int num = 0;
1525 int valid_num = 0;
1526 ListIt inst_list_it = instList[tid].begin();
1527
1528 while (inst_list_it != instList[tid].end()) {
1529 cprintf("Instruction:%i\n", num);
1530 if (!(*inst_list_it)->isSquashed()) {
1531 if (!(*inst_list_it)->isIssued()) {
1532 ++valid_num;
1533 cprintf("Count:%i\n", valid_num);
1534 } else if ((*inst_list_it)->isMemRef() &&
1535 !(*inst_list_it)->memOpDone()) {
1536 // Loads that have not been marked as executed
1537 // still count towards the total instructions.
1538 ++valid_num;
1539 cprintf("Count:%i\n", valid_num);
1540 }
1541 }
1542
1543 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1544 "Issued:%i\nSquashed:%i\n",
1545 (*inst_list_it)->pcState(),
1546 (*inst_list_it)->seqNum,
1547 (*inst_list_it)->threadNumber,
1548 (*inst_list_it)->isIssued(),
1549 (*inst_list_it)->isSquashed());
1550
1551 if ((*inst_list_it)->isMemRef()) {
1552 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1553 }
1554
1555 cprintf("\n");
1556
1557 inst_list_it++;
1558 ++num;
1559 }
1560 }
1561
1562 cprintf("Insts to Execute list:\n");
1563
1564 int num = 0;
1565 int valid_num = 0;
1566 ListIt inst_list_it = instsToExecute.begin();
1567
1568 while (inst_list_it != instsToExecute.end())
1569 {
1570 cprintf("Instruction:%i\n",
1571 num);
1572 if (!(*inst_list_it)->isSquashed()) {
1573 if (!(*inst_list_it)->isIssued()) {
1574 ++valid_num;
1575 cprintf("Count:%i\n", valid_num);
1576 } else if ((*inst_list_it)->isMemRef() &&
1577 !(*inst_list_it)->memOpDone()) {
1578 // Loads that have not been marked as executed
1579 // still count towards the total instructions.
1580 ++valid_num;
1581 cprintf("Count:%i\n", valid_num);
1582 }
1583 }
1584
1585 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1586 "Issued:%i\nSquashed:%i\n",
1587 (*inst_list_it)->pcState(),
1588 (*inst_list_it)->seqNum,
1589 (*inst_list_it)->threadNumber,
1590 (*inst_list_it)->isIssued(),
1591 (*inst_list_it)->isSquashed());
1592
1593 if ((*inst_list_it)->isMemRef()) {
1594 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1595 }
1596
1597 cprintf("\n");
1598
1599 inst_list_it++;
1600 ++num;
1601 }
1602}
1603
1604} // namespace o3
1605} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
static const FlagsType AutoDelete
Definition eventq.hh:110
Event(Priority p=Default_Pri, Flags f=0)
Definition eventq.hh:407
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:473
constexpr RegIndex index() const
Visible RegId methods.
Definition reg_class.hh:151
int getNumPinnedWritesToComplete() const
Definition reg_class.hh:496
void decrNumPinnedWritesToComplete()
Definition reg_class.hh:507
bool isPinned() const
Definition reg_class.hh:493
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:282
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition reg_class.hh:470
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:97
static constexpr auto NoCapableFU
Instruction asked for a FU but this FUPool does not have a FU for this instruction op type.
Definition fu_pool.hh:156
static constexpr auto NoFreeFU
Instruction asked for a FU but all FU for this op type have already been allocated to other instructi...
Definition fu_pool.hh:163
static constexpr auto NoNeedFU
Named constants to differentiate cases where an instruction asked the FUPool for a free FU but did no...
Definition fu_pool.hh:150
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
bool freeFU
Should the FU be added to the list to be freed upon completing this event.
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr)
Construct a FU completion event.
Definition inst_queue.cc:66
DynInstPtr inst
Executing instruction.
int fuIdx
Index of the FU used for executing.
InstructionQueue * iqPtr
Pointer back to the instruction queue.
virtual const char * description() const
Return a C string describing the event.
Definition inst_queue.cc:82
std::string name() const
Returns the name of the IQ.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread.
gem5::o3::InstructionQueue::IQStats iqStats
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
std::list< DynInstPtr > instList[MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
void retryMemInst(const DynInstPtr &retry_inst)
Retries a memory instruction in the next cycle.
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
unsigned numEntries
The number of entries in the instruction queue.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
FUPool * fuPool
Function unit pool.
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress).
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
unsigned numFreeEntries()
Returns total number of free entries.
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
void resetState()
Resets all instruction queue state.
bool isDrained() const
Determine if we are drained.
unsigned count[MaxThreads]
Per Thread IQ count.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit.
unsigned freeEntries
Number of free IQ entries left.
MemDepUnit memDepUnit[MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void drainSanityCheck() const
Perform sanity checks after a drain.
unsigned numPhysRegs
The number of physical registers in the CPU.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void takeOverFrom()
Takes over execution from another CPU's thread.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
gem5::o3::InstructionQueue::IQIOStats iqIOStats
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an IQ.
Definition inst_queue.cc:87
InstSeqNum squashedSeqNum[MaxThreads]
The sequence number of the squashed instruction.
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
std::list< DynInstPtr >::iterator ListIt
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
void resetEntries()
Resets max entries for all threads.
int countInsts()
Debugging function to count how many entries are in the IQ.
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
std::list< ListOrderEntry >::iterator ListOrderIt
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
ThreadID numThreads
Number of Total Threads.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute.
bool isFull()
Returns whether or not the IQ is full.
void squash(ThreadID tid)
Squashes instructions for a thread.
IEW * iewStage
Pointer to IEW stage.
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
~InstructionQueue()
Destructs the IQ.
void doSquash(ThreadID tid)
Does the actual squashing.
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
int wbOutstanding
Number of instructions currently in flight to FUs.
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.
unsigned maxEntries[MaxThreads]
Max IQ Entries Per Thread.
CPU * cpu
Pointer to the CPU.
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
DependencyGraph< DynInstPtr > dependGraph
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
Statistics container.
Definition group.hh:93
STL list class.
Definition stl.hh:51
Definition test.h:63
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
static const Priority Stat_Event_Pri
Statistics events (dump, reset, etc.) come after everything else, but before exit.
Definition eventq.hh:222
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
Bitfield< 7 > i
Definition misc_types.hh:67
static constexpr int MaxThreads
Definition limits.hh:38
RefCountingPtr< DynInst > DynInstPtr
Units for Stats.
Definition units.hh:113
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
const FlagsType total
Print the total.
Definition info.hh:59
const FlagsType dist
Print the distribution.
Definition info.hh:65
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
static const OpClass Num_OpClasses
Definition op_class.hh:149
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
PhysRegId * PhysRegIdPtr
Definition reg_class.hh:511
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ VecRegClass
Vector Register.
Definition reg_class.hh:64
@ VecElemClass
Vector Register Native Elem lane.
Definition reg_class.hh:66
statistics::Scalar vecInstQueueWakeupAccesses
statistics::Scalar intInstQueueWakeupAccesses
IQIOStats(statistics::Group *parent)
statistics::Scalar fpInstQueueWakeupAccesses
statistics::Scalar floatInstsIssued
Stat for number of floating point instructions issued.
statistics::Vector fuBusy
Number of times the FU was busy.
statistics::Scalar instsAdded
Stat for number of instructions added.
statistics::Distribution numIssuedDist
Distribution of number of instructions in the queue.
statistics::Scalar nonSpecInstsAdded
Stat for number of non-speculative instructions added.
statistics::Scalar squashedInstsExamined
Stat for number of squashed instructions examined when squashing.
statistics::Vector2d issuedInstType
Stat for total number issued for each instruction type.
statistics::Scalar miscInstsIssued
Stat for number of miscellaneous instructions issued.
statistics::Scalar branchInstsIssued
Stat for number of branch instructions issued.
statistics::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
statistics::Scalar memInstsIssued
Stat for number of memory instructions issued.
statistics::Scalar intInstsIssued
Stat for number of integer instructions issued.
statistics::Formula issueRate
Number of instructions issued per cycle.
IQStats(CPU *cpu, const unsigned &total_width)
statistics::Scalar squashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
statistics::Scalar squashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
statistics::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
statistics::Scalar squashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Entry for the list age ordering by op class.
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const

Generated on Mon Oct 27 2025 04:13:00 for gem5 by doxygen 1.14.0