gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
inst_queue.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2014, 2017-2020 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/inst_queue.hh"
43
44#include <limits>
45#include <vector>
46
47#include "base/logging.hh"
48#include "cpu/o3/dyn_inst.hh"
49#include "cpu/o3/fu_pool.hh"
50#include "cpu/o3/limits.hh"
51#include "debug/IQ.hh"
52#include "enums/OpClass.hh"
53#include "params/BaseO3CPU.hh"
54#include "sim/core.hh"
55
56// clang complains about std::set being overloaded with Packet::set if
57// we open up the entire namespace std
58using std::list;
59
60namespace gem5
61{
62
63namespace o3
64{
65
67 int fu_idx, InstructionQueue *iq_ptr)
68 : Event(Stat_Event_Pri, AutoDelete),
69 inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
70{
71}
72
73void
75{
76 iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
77 inst = NULL;
78}
79
80
81const char *
83{
84 return "Functional unit completion";
85}
86
88 const BaseO3CPUParams &params)
89 : cpu(cpu_ptr),
90 iewStage(iew_ptr),
91 fuPool(params.fuPool),
92 iqPolicy(params.smtIQPolicy),
93 numThreads(params.numThreads),
94 numEntries(params.numIQEntries),
95 totalWidth(params.issueWidth),
99{
100 assert(fuPool);
101
102 const auto &reg_classes = params.isa[0]->regClasses();
103 // Set the number of total physical registers
104 // As the vector registers have two addressing modes, they are added twice
105 numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs +
106 params.numPhysVecRegs +
107 params.numPhysVecRegs * (
108 reg_classes.at(VecElemClass)->numRegs() /
109 reg_classes.at(VecRegClass)->numRegs()) +
110 params.numPhysVecPredRegs +
111 params.numPhysMatRegs +
112 params.numPhysCCRegs;
113
114 //Create an entry for each physical register within the
115 //dependency graph.
116 dependGraph.resize(numPhysRegs);
117
118 // Resize the register scoreboard.
120
121 //Initialize Mem Dependence Units
122 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
123 memDepUnit[tid].init(params, tid, cpu_ptr);
124 memDepUnit[tid].setIQ(this);
125 }
126
127 resetState();
128
129 //Figure out resource sharing policy
130 if (iqPolicy == SMTQueuePolicy::Dynamic) {
131 //Set Max Entries to Total ROB Capacity
132 for (ThreadID tid = 0; tid < numThreads; tid++) {
133 maxEntries[tid] = numEntries;
134 }
135
136 } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
137 //@todo:make work if part_amt doesnt divide evenly.
138 int part_amt = numEntries / numThreads;
139
140 //Divide ROB up evenly
141 for (ThreadID tid = 0; tid < numThreads; tid++) {
142 maxEntries[tid] = part_amt;
143 }
144
145 DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
146 "%i entries per thread.\n",part_amt);
147 } else if (iqPolicy == SMTQueuePolicy::Threshold) {
148 double threshold = (double)params.smtIQThreshold / 100;
149
150 int thresholdIQ = (int)((double)threshold * numEntries);
151
152 //Divide up by threshold amount
153 for (ThreadID tid = 0; tid < numThreads; tid++) {
154 maxEntries[tid] = thresholdIQ;
155 }
156
157 DPRINTF(IQ, "IQ sharing policy set to Threshold:"
158 "%i entries per thread.\n",thresholdIQ);
159 }
160 for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
161 maxEntries[tid] = 0;
162 }
163}
164
166{
167 dependGraph.reset();
168#ifdef GEM5_DEBUG
169 cprintf("Nodes traversed: %i, removed: %i\n",
170 dependGraph.nodesTraversed, dependGraph.nodesRemoved);
171#endif
172}
173
174std::string
176{
177 return cpu->name() + ".iq";
178}
179
180InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
181 : statistics::Group(cpu),
182 ADD_STAT(instsAdded, statistics::units::Count::get(),
183 "Number of instructions added to the IQ (excludes non-spec)"),
184 ADD_STAT(nonSpecInstsAdded, statistics::units::Count::get(),
185 "Number of non-speculative instructions added to the IQ"),
186 ADD_STAT(instsIssued, statistics::units::Count::get(),
187 "Number of instructions issued"),
188 ADD_STAT(intInstsIssued, statistics::units::Count::get(),
189 "Number of integer instructions issued"),
190 ADD_STAT(floatInstsIssued, statistics::units::Count::get(),
191 "Number of float instructions issued"),
192 ADD_STAT(branchInstsIssued, statistics::units::Count::get(),
193 "Number of branch instructions issued"),
194 ADD_STAT(memInstsIssued, statistics::units::Count::get(),
195 "Number of memory instructions issued"),
196 ADD_STAT(miscInstsIssued, statistics::units::Count::get(),
197 "Number of miscellaneous instructions issued"),
198 ADD_STAT(squashedInstsIssued, statistics::units::Count::get(),
199 "Number of squashed instructions issued"),
200 ADD_STAT(squashedInstsExamined, statistics::units::Count::get(),
201 "Number of squashed instructions iterated over during squash; "
202 "mainly for profiling"),
203 ADD_STAT(squashedOperandsExamined, statistics::units::Count::get(),
204 "Number of squashed operands that are examined and possibly "
205 "removed from graph"),
206 ADD_STAT(squashedNonSpecRemoved, statistics::units::Count::get(),
207 "Number of squashed non-spec instructions that were removed"),
208 ADD_STAT(numIssuedDist, statistics::units::Count::get(),
209 "Number of insts issued each cycle"),
210 ADD_STAT(statFuBusy, statistics::units::Count::get(),
211 "attempts to use FU when none available"),
212 ADD_STAT(statIssuedInstType, statistics::units::Count::get(),
213 "Number of instructions issued per FU type, per thread"),
214 ADD_STAT(issueRate, statistics::units::Rate<
215 statistics::units::Count, statistics::units::Cycle>::get(),
216 "Inst issue rate", instsIssued / cpu->baseStats.numCycles),
217 ADD_STAT(fuBusy, statistics::units::Count::get(), "FU busy when requested"),
218 ADD_STAT(fuBusyRate, statistics::units::Rate<
219 statistics::units::Count, statistics::units::Count>::get(),
220 "FU busy rate (busy events/executed inst)")
221{
224
227
230
233
236
239
242
245
248
251
254
257/*
258 queueResDist
259 .init(Num_OpClasses, 0, 99, 2)
260 .name(name() + ".IQ:residence:")
261 .desc("cycles from dispatch to issue")
262 .flags(total | pdf | cdf )
263 ;
264 for (int i = 0; i < Num_OpClasses; ++i) {
265 queueResDist.subname(i, opClassStrings[i]);
266 }
267*/
269 .init(0,total_width,1)
271 ;
272/*
273 dist_unissued
274 .init(Num_OpClasses+2)
275 .name(name() + ".unissued_cause")
276 .desc("Reason ready instruction not issued")
277 .flags(pdf | dist)
278 ;
279 for (int i=0; i < (Num_OpClasses + 2); ++i) {
280 dist_unissued.subname(i, unissued_names[i]);
281 }
282*/
284 .init(cpu->numThreads,enums::Num_OpClass)
286 ;
287 statIssuedInstType.ysubnames(enums::OpClassStrings);
288
289 //
290 // How long did instructions for a particular FU type wait prior to issue
291 //
292/*
293 issueDelayDist
294 .init(Num_OpClasses,0,99,2)
295 .name(name() + ".")
296 .desc("cycles from operands ready to issue")
297 .flags(pdf | cdf)
298 ;
299 for (int i=0; i<Num_OpClasses; ++i) {
300 std::stringstream subname;
301 subname << opClassStrings[i] << "_delay";
302 issueDelayDist.subname(i, subname.str());
303 }
304*/
307 ;
308
312 ;
313 for (int i=0; i < Num_OpClasses; ++i) {
314 statFuBusy.subname(i, enums::OpClassStrings[i]);
315 }
316
317 fuBusy
320 ;
321
324 ;
326}
327
329 : statistics::Group(parent),
330 ADD_STAT(intInstQueueReads, statistics::units::Count::get(),
331 "Number of integer instruction queue reads"),
332 ADD_STAT(intInstQueueWrites, statistics::units::Count::get(),
333 "Number of integer instruction queue writes"),
334 ADD_STAT(intInstQueueWakeupAccesses, statistics::units::Count::get(),
335 "Number of integer instruction queue wakeup accesses"),
336 ADD_STAT(fpInstQueueReads, statistics::units::Count::get(),
337 "Number of floating instruction queue reads"),
338 ADD_STAT(fpInstQueueWrites, statistics::units::Count::get(),
339 "Number of floating instruction queue writes"),
340 ADD_STAT(fpInstQueueWakeupAccesses, statistics::units::Count::get(),
341 "Number of floating instruction queue wakeup accesses"),
342 ADD_STAT(vecInstQueueReads, statistics::units::Count::get(),
343 "Number of vector instruction queue reads"),
344 ADD_STAT(vecInstQueueWrites, statistics::units::Count::get(),
345 "Number of vector instruction queue writes"),
346 ADD_STAT(vecInstQueueWakeupAccesses, statistics::units::Count::get(),
347 "Number of vector instruction queue wakeup accesses"),
348 ADD_STAT(intAluAccesses, statistics::units::Count::get(),
349 "Number of integer alu accesses"),
350 ADD_STAT(fpAluAccesses, statistics::units::Count::get(),
351 "Number of floating point alu accesses"),
352 ADD_STAT(vecAluAccesses, statistics::units::Count::get(),
353 "Number of vector alu accesses")
354{
355 using namespace statistics;
357 .flags(total);
358
360 .flags(total);
361
363 .flags(total);
364
366 .flags(total);
367
369 .flags(total);
370
372 .flags(total);
373
375 .flags(total);
376
378 .flags(total);
379
381 .flags(total);
382
384 .flags(total);
385
387 .flags(total);
388
390 .flags(total);
391}
392
393void
395{
396 //Initialize thread IQ counts
397 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
398 count[tid] = 0;
399 instList[tid].clear();
400 }
401
402 // Initialize the number of free IQ entries.
404
405 // Note that in actuality, the registers corresponding to the logical
406 // registers start off as ready. However this doesn't matter for the
407 // IQ as the instruction should have been correctly told if those
408 // registers are ready in rename. Thus it can all be initialized as
409 // unready.
410 for (int i = 0; i < numPhysRegs; ++i) {
411 regScoreboard[i] = false;
412 }
413
414 for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
415 squashedSeqNum[tid] = 0;
416 }
417
418 for (int i = 0; i < Num_OpClasses; ++i) {
419 while (!readyInsts[i].empty())
420 readyInsts[i].pop();
421 queueOnList[i] = false;
422 readyIt[i] = listOrder.end();
423 }
424 nonSpecInsts.clear();
425 listOrder.clear();
426 deferredMemInsts.clear();
427 blockedMemInsts.clear();
428 retryMemInsts.clear();
429 wbOutstanding = 0;
430}
431
432void
437
438void
443
444void
451
452bool
454{
455 bool drained = dependGraph.empty() &&
456 instsToExecute.empty() &&
457 wbOutstanding == 0;
458 for (ThreadID tid = 0; tid < numThreads; ++tid)
459 drained = drained && memDepUnit[tid].isDrained();
460
461 return drained;
462}
463
464void
466{
467 assert(dependGraph.empty());
468 assert(instsToExecute.empty());
469 for (ThreadID tid = 0; tid < numThreads; ++tid)
471}
472
473void
478
479int
481{
482 if (iqPolicy == SMTQueuePolicy::Partitioned) {
483 return numEntries / num_threads;
484 } else {
485 return 0;
486 }
487}
488
489
490void
492{
493 if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
494 int active_threads = activeThreads->size();
495
496 list<ThreadID>::iterator threads = activeThreads->begin();
498
499 while (threads != end) {
500 ThreadID tid = *threads++;
501
502 if (iqPolicy == SMTQueuePolicy::Partitioned) {
503 maxEntries[tid] = numEntries / active_threads;
504 } else if (iqPolicy == SMTQueuePolicy::Threshold &&
505 active_threads == 1) {
506 maxEntries[tid] = numEntries;
507 }
508 }
509 }
510}
511
512unsigned
517
518unsigned
520{
521 return maxEntries[tid] - count[tid];
522}
523
524// Might want to do something more complex if it knows how many instructions
525// will be issued this cycle.
526bool
528{
529 if (freeEntries == 0) {
530 return(true);
531 } else {
532 return(false);
533 }
534}
535
536bool
538{
539 if (numFreeEntries(tid) == 0) {
540 return(true);
541 } else {
542 return(false);
543 }
544}
545
546bool
548{
549 if (!listOrder.empty()) {
550 return true;
551 }
552
553 for (int i = 0; i < Num_OpClasses; ++i) {
554 if (!readyInsts[i].empty()) {
555 return true;
556 }
557 }
558
559 return false;
560}
561
562void
564{
565 if (new_inst->isFloating()) {
567 } else if (new_inst->isVector()) {
569 } else {
571 }
572 // Make sure the instruction is valid
573 assert(new_inst);
574
575 DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
576 new_inst->seqNum, new_inst->pcState());
577
578 assert(freeEntries != 0);
579
580 instList[new_inst->threadNumber].push_back(new_inst);
581
582 --freeEntries;
583
584 new_inst->setInIQ();
585
586 // Look through its source registers (physical regs), and mark any
587 // dependencies.
588 addToDependents(new_inst);
589
590 // Have this instruction set itself as the producer of its destination
591 // register(s).
592 addToProducers(new_inst);
593
594 if (new_inst->isMemRef()) {
595 memDepUnit[new_inst->threadNumber].insert(new_inst);
596 } else {
597 addIfReady(new_inst);
598 }
599
601
602 count[new_inst->threadNumber]++;
603
604 assert(freeEntries == (numEntries - countInsts()));
605}
606
607void
609{
610 // @todo: Clean up this code; can do it by setting inst as unable
611 // to issue, then calling normal insert on the inst.
612 if (new_inst->isFloating()) {
614 } else if (new_inst->isVector()) {
616 } else {
618 }
619
620 assert(new_inst);
621
622 nonSpecInsts[new_inst->seqNum] = new_inst;
623
624 DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
625 "to the IQ.\n",
626 new_inst->seqNum, new_inst->pcState());
627
628 assert(freeEntries != 0);
629
630 instList[new_inst->threadNumber].push_back(new_inst);
631
632 --freeEntries;
633
634 new_inst->setInIQ();
635
636 // Have this instruction set itself as the producer of its destination
637 // register(s).
638 addToProducers(new_inst);
639
640 // If it's a memory instruction, add it to the memory dependency
641 // unit.
642 if (new_inst->isMemRef()) {
643 memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
644 }
645
647
648 count[new_inst->threadNumber]++;
649
650 assert(freeEntries == (numEntries - countInsts()));
651}
652
653void
655{
656 memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
657
658 insertNonSpec(barr_inst);
659}
660
663{
664 assert(!instsToExecute.empty());
665 DynInstPtr inst = std::move(instsToExecute.front());
666 instsToExecute.pop_front();
667 if (inst->isFloating()) {
669 } else if (inst->isVector()) {
671 } else {
673 }
674 return inst;
675}
676
677void
679{
680 assert(!readyInsts[op_class].empty());
681
682 ListOrderEntry queue_entry;
683
684 queue_entry.queueType = op_class;
685
686 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
687
688 ListOrderIt list_it = listOrder.begin();
689 ListOrderIt list_end_it = listOrder.end();
690
691 while (list_it != list_end_it) {
692 if ((*list_it).oldestInst > queue_entry.oldestInst) {
693 break;
694 }
695
696 list_it++;
697 }
698
699 readyIt[op_class] = listOrder.insert(list_it, queue_entry);
700 queueOnList[op_class] = true;
701}
702
703void
705{
706 // Get iterator of next item on the list
707 // Delete the original iterator
708 // Determine if the next item is either the end of the list or younger
709 // than the new instruction. If so, then add in a new iterator right here.
710 // If not, then move along.
711 ListOrderEntry queue_entry;
712 OpClass op_class = (*list_order_it).queueType;
713 ListOrderIt next_it = list_order_it;
714
715 ++next_it;
716
717 queue_entry.queueType = op_class;
718 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
719
720 while (next_it != listOrder.end() &&
721 (*next_it).oldestInst < queue_entry.oldestInst) {
722 ++next_it;
723 }
724
725 readyIt[op_class] = listOrder.insert(next_it, queue_entry);
726}
727
728void
730{
731 DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
732 assert(!cpu->switchedOut());
733 // The CPU could have been sleeping until this op completed (*extremely*
734 // long latency op). Wake it if it was. This may be overkill.
736 iewStage->wakeCPU();
737
738 if (fu_idx > -1)
739 fuPool->freeUnitNextCycle(fu_idx);
740
741 // @todo: Ensure that these FU Completions happen at the beginning
742 // of a cycle, otherwise they could add too many instructions to
743 // the queue.
744 issueToExecuteQueue->access(-1)->size++;
745 instsToExecute.push_back(inst);
746}
747
748// @todo: Figure out a better way to remove the squashed items from the
749// lists. Checking the top item of each list to see if it's squashed
750// wastes time and forces jumps.
751void
753{
754 DPRINTF(IQ, "Attempting to schedule ready instructions from "
755 "the IQ.\n");
756
757 IssueStruct *i2e_info = issueToExecuteQueue->access(0);
758
759 DynInstPtr mem_inst;
760 while ((mem_inst = getDeferredMemInstToExecute())) {
761 addReadyMemInst(mem_inst);
762 }
763
764 // See if any cache blocked instructions are able to be executed
765 while ((mem_inst = getBlockedMemInstToExecute())) {
766 addReadyMemInst(mem_inst);
767 }
768
769 // Have iterator to head of the list
770 // While I haven't exceeded bandwidth or reached the end of the list,
771 // Try to get a FU that can do what this op needs.
772 // If successful, change the oldestInst to the new top of the list, put
773 // the queue in the proper place in the list.
774 // Increment the iterator.
775 // This will avoid trying to schedule a certain op class if there are no
776 // FUs that handle it.
777 int total_issued = 0;
778 ListOrderIt order_it = listOrder.begin();
779 ListOrderIt order_end_it = listOrder.end();
780
781 while (total_issued < totalWidth && order_it != order_end_it) {
782 OpClass op_class = (*order_it).queueType;
783
784 assert(!readyInsts[op_class].empty());
785
786 DynInstPtr issuing_inst = readyInsts[op_class].top();
787
788 if (issuing_inst->isFloating()) {
790 } else if (issuing_inst->isVector()) {
792 } else {
794 }
795
796 assert(issuing_inst->seqNum == (*order_it).oldestInst);
797
798 if (issuing_inst->isSquashed()) {
799 readyInsts[op_class].pop();
800
801 if (!readyInsts[op_class].empty()) {
802 moveToYoungerInst(order_it);
803 } else {
804 readyIt[op_class] = listOrder.end();
805 queueOnList[op_class] = false;
806 }
807
808 listOrder.erase(order_it++);
809
811
812 continue;
813 }
814
815 int idx = FUPool::NoNeedFU;
816 Cycles op_latency = Cycles(1);
817 ThreadID tid = issuing_inst->threadNumber;
818
819 if (op_class != No_OpClass) {
820 idx = fuPool->getUnit(op_class);
821 if (issuing_inst->isFloating()) {
823 } else if (issuing_inst->isVector()) {
825 } else {
827 }
828 if (idx > FUPool::NoFreeFU) {
829 op_latency = fuPool->getOpLatency(op_class);
830 }
831 }
832
833 // If we have an instruction that doesn't require a FU, or a
834 // valid FU, then schedule for execution.
835 if (idx > FUPool::NoFreeFU || idx == FUPool::NoNeedFU ||
836 idx == FUPool::NoCapableFU) {
837 if (op_latency == Cycles(1)) {
838 i2e_info->size++;
839 instsToExecute.push_back(issuing_inst);
840
841 // Add the FU onto the list of FU's to be freed next
842 // cycle if we used one.
843 if (idx >= 0)
845
846 // CPU has no capable FU for the instruction
847 // but this may be OK if the instruction gets
848 // squashed. Remember this and give IEW
849 // the opportunity to trigger a fault
850 // if the instruction is unsupported.
851 // Otherwise, commit will panic.
852 if (idx == FUPool::NoCapableFU)
853 issuing_inst->setNoCapableFU();
854 } else {
855 assert(idx != FUPool::NoCapableFU);
856 bool pipelined = fuPool->isPipelined(op_class);
857 // Generate completion event for the FU
859 FUCompletion *execution = new FUCompletion(issuing_inst,
860 idx, this);
861
862 cpu->schedule(execution,
863 cpu->clockEdge(Cycles(op_latency - 1)));
864
865 if (!pipelined) {
866 // If FU isn't pipelined, then it must be freed
867 // upon the execution completing.
868 execution->setFreeFU();
869 } else {
870 // Add the FU onto the list of FU's to be freed next cycle.
872 }
873 }
874
875 DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
876 "[sn:%llu]\n",
877 tid, issuing_inst->pcState(),
878 issuing_inst->seqNum);
879
880 readyInsts[op_class].pop();
881
882 if (!readyInsts[op_class].empty()) {
883 moveToYoungerInst(order_it);
884 } else {
885 readyIt[op_class] = listOrder.end();
886 queueOnList[op_class] = false;
887 }
888
889 issuing_inst->setIssued();
890 ++total_issued;
891
892#if TRACING_ON
893 issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
894#endif
895
896 if (issuing_inst->firstIssue == -1)
897 issuing_inst->firstIssue = curTick();
898
899 if (!issuing_inst->isMemRef()) {
900 // Memory instructions can not be freed from the IQ until they
901 // complete.
902 ++freeEntries;
903 count[tid]--;
904 issuing_inst->clearInIQ();
905 } else {
906 memDepUnit[tid].issue(issuing_inst);
907 }
908
909 listOrder.erase(order_it++);
910 iqStats.statIssuedInstType[tid][op_class]++;
911 } else {
912 assert(idx == FUPool::NoFreeFU);
913 iqStats.statFuBusy[op_class]++;
914 iqStats.fuBusy[tid]++;
915 ++order_it;
916 }
917 }
918
919 iqStats.numIssuedDist.sample(total_issued);
920 iqStats.instsIssued+= total_issued;
921
922 // If we issued any instructions, tell the CPU we had activity.
923 // @todo If the way deferred memory instructions are handeled due to
924 // translation changes then the deferredMemInsts condition should be
925 // removed from the code below.
926 if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
928 } else {
929 DPRINTF(IQ, "Not able to schedule any instructions.\n");
930 }
931}
932
933void
935{
936 DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
937 "to execute.\n", inst);
938
939 NonSpecMapIt inst_it = nonSpecInsts.find(inst);
940
941 assert(inst_it != nonSpecInsts.end());
942
943 ThreadID tid = (*inst_it).second->threadNumber;
944
945 (*inst_it).second->setAtCommit();
946
947 (*inst_it).second->setCanIssue();
948
949 if (!(*inst_it).second->isMemRef()) {
950 addIfReady((*inst_it).second);
951 } else {
952 memDepUnit[tid].nonSpecInstReady((*inst_it).second);
953 }
954
955 (*inst_it).second = NULL;
956
957 nonSpecInsts.erase(inst_it);
958}
959
960void
962{
963 DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
964 tid,inst);
965
966 ListIt iq_it = instList[tid].begin();
967
968 while (iq_it != instList[tid].end() &&
969 (*iq_it)->seqNum <= inst) {
970 ++iq_it;
971 instList[tid].pop_front();
972 }
973
974 assert(freeEntries == (numEntries - countInsts()));
975}
976
977int
979{
980 int dependents = 0;
981
982 // The instruction queue here takes care of both floating and int ops
983 if (completed_inst->isFloating()) {
985 } else if (completed_inst->isVector()) {
987 } else {
989 }
990
991 completed_inst->lastWakeDependents = curTick();
992
993 DPRINTF(IQ, "Waking dependents of completed instruction.\n");
994
995 assert(!completed_inst->isSquashed());
996
997 // Tell the memory dependence unit to wake any dependents on this
998 // instruction if it is a memory instruction. Also complete the memory
999 // instruction at this point since we know it executed without issues.
1000 ThreadID tid = completed_inst->threadNumber;
1001 if (completed_inst->isMemRef()) {
1002 memDepUnit[tid].completeInst(completed_inst);
1003
1004 DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
1005 completed_inst->pcState(), completed_inst->seqNum);
1006
1007 ++freeEntries;
1008 completed_inst->memOpDone(true);
1009 count[tid]--;
1010 } else if (completed_inst->isReadBarrier() ||
1011 completed_inst->isWriteBarrier()) {
1012 // Completes a non mem ref barrier
1013 memDepUnit[tid].completeInst(completed_inst);
1014 }
1015
1016 for (int dest_reg_idx = 0;
1017 dest_reg_idx < completed_inst->numDestRegs();
1018 dest_reg_idx++)
1019 {
1020 PhysRegIdPtr dest_reg =
1021 completed_inst->renamedDestIdx(dest_reg_idx);
1022
1023 // Special case of uniq or control registers. They are not
1024 // handled by the IQ and thus have no dependency graph entry.
1025 if (dest_reg->isFixedMapping()) {
1026 DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1027 dest_reg->index(), dest_reg->className());
1028 continue;
1029 }
1030
1031 // Avoid waking up dependents if the register is pinned
1033 if (dest_reg->isPinned())
1034 completed_inst->setPinnedRegsWritten();
1035
1036 if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1037 DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1038 dest_reg->index(), dest_reg->className());
1039 continue;
1040 }
1041
1042 DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1043 dest_reg->index(),
1044 dest_reg->className());
1045
1046 //Go through the dependency chain, marking the registers as
1047 //ready within the waiting instructions.
1048 DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1049
1050 while (dep_inst) {
1051 DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1052 "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1053
1054 // Might want to give more information to the instruction
1055 // so that it knows which of its source registers is
1056 // ready. However that would mean that the dependency
1057 // graph entries would need to hold the src_reg_idx.
1058 dep_inst->markSrcRegReady();
1059
1060 addIfReady(dep_inst);
1061
1062 dep_inst = dependGraph.pop(dest_reg->flatIndex());
1063
1064 ++dependents;
1065 }
1066
1067 // Reset the head node now that all of its dependents have
1068 // been woken up.
1069 assert(dependGraph.empty(dest_reg->flatIndex()));
1070 dependGraph.clearInst(dest_reg->flatIndex());
1071
1072 // Mark the scoreboard as having that register ready.
1073 regScoreboard[dest_reg->flatIndex()] = true;
1074 }
1075 return dependents;
1076}
1077
1078void
1080{
1081 OpClass op_class = ready_inst->opClass();
1082
1083 readyInsts[op_class].push(ready_inst);
1084
1085 // Will need to reorder the list if either a queue is not on the list,
1086 // or it has an older instruction than last time.
1087 if (!queueOnList[op_class]) {
1088 addToOrderList(op_class);
1089 } else if (readyInsts[op_class].top()->seqNum <
1090 (*readyIt[op_class]).oldestInst) {
1091 listOrder.erase(readyIt[op_class]);
1092 addToOrderList(op_class);
1093 }
1094
1095 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1096 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1097 ready_inst->pcState(), op_class, ready_inst->seqNum);
1098}
1099
1100void
1102{
1103 DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1104
1105 // Reset DTB translation state
1106 resched_inst->translationStarted(false);
1107 resched_inst->translationCompleted(false);
1108
1109 resched_inst->clearCanIssue();
1110 memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1111}
1112
1113void
1115{
1116 memDepUnit[replay_inst->threadNumber].replay();
1117}
1118
1119void
1121{
1122 deferredMemInsts.push_back(deferred_inst);
1123}
1124
1125void
1127{
1128 blocked_inst->clearIssued();
1129 blocked_inst->clearCanIssue();
1130 blockedMemInsts.push_back(blocked_inst);
1131 DPRINTF(IQ, "Memory inst [sn:%llu] PC %s is blocked, will be "
1132 "reissued later\n", blocked_inst->seqNum,
1133 blocked_inst->pcState());
1134}
1135
1136void
1138{
1139 DPRINTF(IQ, "Cache is unblocked, rescheduling blocked memory "
1140 "instructions\n");
1142 // Get the CPU ticking again
1143 cpu->wakeCPU();
1144}
1145
1148{
1149 for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1150 ++it) {
1151 if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1152 DynInstPtr mem_inst = std::move(*it);
1153 deferredMemInsts.erase(it);
1154 return mem_inst;
1155 }
1156 }
1157 return nullptr;
1158}
1159
1162{
1163 if (retryMemInsts.empty()) {
1164 return nullptr;
1165 } else {
1166 DynInstPtr mem_inst = std::move(retryMemInsts.front());
1167 retryMemInsts.pop_front();
1168 return mem_inst;
1169 }
1170}
1171
1172void
1174 const DynInstPtr &faulting_load)
1175{
1177 memDepUnit[store->threadNumber].violation(store, faulting_load);
1178}
1179
1180void
1182{
1183 DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1184 "the IQ.\n", tid);
1185
1186 // Read instruction sequence number of last instruction out of the
1187 // time buffer.
1188 squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1189
1190 doSquash(tid);
1191
1192 // Also tell the memory dependence unit to squash.
1193 memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1194}
1195
1196void
1198{
1199 // Start at the tail.
1200 ListIt squash_it = instList[tid].end();
1201 --squash_it;
1202
1203 DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1204 tid, squashedSeqNum[tid]);
1205
1206 // Squash any instructions younger than the squashed sequence number
1207 // given.
1208 while (squash_it != instList[tid].end() &&
1209 (*squash_it)->seqNum > squashedSeqNum[tid]) {
1210
1211 DynInstPtr squashed_inst = (*squash_it);
1212 if (squashed_inst->isFloating()) {
1214 } else if (squashed_inst->isVector()) {
1216 } else {
1218 }
1219
1220 // Only handle the instruction if it actually is in the IQ and
1221 // hasn't already been squashed in the IQ.
1222 if (squashed_inst->threadNumber != tid ||
1223 squashed_inst->isSquashedInIQ()) {
1224 --squash_it;
1225 continue;
1226 }
1227
1228 if (!squashed_inst->isIssued() ||
1229 (squashed_inst->isMemRef() &&
1230 !squashed_inst->memOpDone())) {
1231
1232 DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1233 tid, squashed_inst->seqNum, squashed_inst->pcState());
1234
1235 bool is_acq_rel = squashed_inst->isFullMemBarrier() &&
1236 (squashed_inst->isLoad() ||
1237 (squashed_inst->isStore() &&
1238 !squashed_inst->isStoreConditional()));
1239
1240 // Remove the instruction from the dependency list.
1241 if (is_acq_rel ||
1242 (!squashed_inst->isNonSpeculative() &&
1243 !squashed_inst->isStoreConditional() &&
1244 !squashed_inst->isAtomic() &&
1245 !squashed_inst->isReadBarrier() &&
1246 !squashed_inst->isWriteBarrier())) {
1247
1248 for (int src_reg_idx = 0;
1249 src_reg_idx < squashed_inst->numSrcRegs();
1250 src_reg_idx++)
1251 {
1252 PhysRegIdPtr src_reg =
1253 squashed_inst->renamedSrcIdx(src_reg_idx);
1254
1255 // Only remove it from the dependency graph if it
1256 // was placed there in the first place.
1257
1258 // Instead of doing a linked list traversal, we
1259 // can just remove these squashed instructions
1260 // either at issue time, or when the register is
1261 // overwritten. The only downside to this is it
1262 // leaves more room for error.
1263
1264 if (!squashed_inst->readySrcIdx(src_reg_idx) &&
1265 !src_reg->isFixedMapping()) {
1266 dependGraph.remove(src_reg->flatIndex(),
1267 squashed_inst);
1268 }
1269
1271 }
1272
1273 } else if (!squashed_inst->isStoreConditional() ||
1274 !squashed_inst->isCompleted()) {
1275 NonSpecMapIt ns_inst_it =
1276 nonSpecInsts.find(squashed_inst->seqNum);
1277
1278 // we remove non-speculative instructions from
1279 // nonSpecInsts already when they are ready, and so we
1280 // cannot always expect to find them
1281 if (ns_inst_it == nonSpecInsts.end()) {
1282 // loads that became ready but stalled on a
1283 // blocked cache are alreayd removed from
1284 // nonSpecInsts, and have not faulted
1285 assert(squashed_inst->getFault() != NoFault ||
1286 squashed_inst->isMemRef());
1287 } else {
1288
1289 (*ns_inst_it).second = NULL;
1290
1291 nonSpecInsts.erase(ns_inst_it);
1292
1294 }
1295 }
1296
1297 // Might want to also clear out the head of the dependency graph.
1298
1299 // Mark it as squashed within the IQ.
1300 squashed_inst->setSquashedInIQ();
1301
1302 // @todo: Remove this hack where several statuses are set so the
1303 // inst will flow through the rest of the pipeline.
1304 squashed_inst->setIssued();
1305 squashed_inst->setCanCommit();
1306 squashed_inst->clearInIQ();
1307
1308 //Update Thread IQ Count
1309 count[squashed_inst->threadNumber]--;
1310
1311 ++freeEntries;
1312 }
1313
1314 // IQ clears out the heads of the dependency graph only when
1315 // instructions reach writeback stage. If an instruction is squashed
1316 // before writeback stage, its head of dependency graph would not be
1317 // cleared out; it holds the instruction's DynInstPtr. This
1318 // prevents freeing the squashed instruction's DynInst.
1319 // Thus, we need to manually clear out the squashed instructions'
1320 // heads of dependency graph.
1321 for (int dest_reg_idx = 0;
1322 dest_reg_idx < squashed_inst->numDestRegs();
1323 dest_reg_idx++)
1324 {
1325 PhysRegIdPtr dest_reg =
1326 squashed_inst->renamedDestIdx(dest_reg_idx);
1327 if (dest_reg->isFixedMapping()){
1328 continue;
1329 }
1330 assert(dependGraph.empty(dest_reg->flatIndex()));
1331 dependGraph.clearInst(dest_reg->flatIndex());
1332 }
1333 instList[tid].erase(squash_it--);
1335 }
1336}
1337
1338bool
1340 const DynInstPtr &lhs, const DynInstPtr &rhs) const
1341{
1342 return lhs->seqNum > rhs->seqNum;
1343}
1344
1345bool
1347{
1348 // Loop through the instruction's source registers, adding
1349 // them to the dependency list if they are not ready.
1350 int8_t total_src_regs = new_inst->numSrcRegs();
1351 bool return_val = false;
1352
1353 for (int src_reg_idx = 0;
1354 src_reg_idx < total_src_regs;
1355 src_reg_idx++)
1356 {
1357 // Only add it to the dependency graph if it's not ready.
1358 if (!new_inst->readySrcIdx(src_reg_idx)) {
1359 PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
1360
1361 // Check the IQ's scoreboard to make sure the register
1362 // hasn't become ready while the instruction was in flight
1363 // between stages. Only if it really isn't ready should
1364 // it be added to the dependency graph.
1365 if (src_reg->isFixedMapping()) {
1366 continue;
1367 } else if (!regScoreboard[src_reg->flatIndex()]) {
1368 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1369 "is being added to the dependency chain.\n",
1370 new_inst->pcState(), src_reg->index(),
1371 src_reg->className());
1372
1373 dependGraph.insert(src_reg->flatIndex(), new_inst);
1374
1375 // Change the return value to indicate that something
1376 // was added to the dependency graph.
1377 return_val = true;
1378 } else {
1379 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1380 "became ready before it reached the IQ.\n",
1381 new_inst->pcState(), src_reg->index(),
1382 src_reg->className());
1383 // Mark a register ready within the instruction.
1384 new_inst->markSrcRegReady(src_reg_idx);
1385 }
1386 }
1387 }
1388
1389 return return_val;
1390}
1391
1392void
1394{
1395 // Nothing really needs to be marked when an instruction becomes
1396 // the producer of a register's value, but for convenience a ptr
1397 // to the producing instruction will be placed in the head node of
1398 // the dependency links.
1399 int8_t total_dest_regs = new_inst->numDestRegs();
1400
1401 for (int dest_reg_idx = 0;
1402 dest_reg_idx < total_dest_regs;
1403 dest_reg_idx++)
1404 {
1405 PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
1406
1407 // Some registers have fixed mapping, and there is no need to track
1408 // dependencies as these instructions must be executed at commit.
1409 if (dest_reg->isFixedMapping()) {
1410 continue;
1411 }
1412
1413 if (!dependGraph.empty(dest_reg->flatIndex())) {
1414 dependGraph.dump();
1415 panic("Dependency graph %i (%s) (flat: %i) not empty!",
1416 dest_reg->index(), dest_reg->className(),
1417 dest_reg->flatIndex());
1418 }
1419
1420 dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1421
1422 // Mark the scoreboard to say it's not yet ready.
1423 regScoreboard[dest_reg->flatIndex()] = false;
1424 }
1425}
1426
1427void
1429{
1430 // If the instruction now has all of its source registers
1431 // available, then add it to the list of ready instructions.
1432 if (inst->readyToIssue()) {
1433
1434 //Add the instruction to the proper ready list.
1435 if (inst->isMemRef()) {
1436
1437 DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1438
1439 // Message to the mem dependence unit that this instruction has
1440 // its registers ready.
1441 memDepUnit[inst->threadNumber].regsReady(inst);
1442
1443 return;
1444 }
1445
1446 OpClass op_class = inst->opClass();
1447
1448 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1449 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1450 inst->pcState(), op_class, inst->seqNum);
1451
1452 readyInsts[op_class].push(inst);
1453
1454 // Will need to reorder the list if either a queue is not on the list,
1455 // or it has an older instruction than last time.
1456 if (!queueOnList[op_class]) {
1457 addToOrderList(op_class);
1458 } else if (readyInsts[op_class].top()->seqNum <
1459 (*readyIt[op_class]).oldestInst) {
1460 listOrder.erase(readyIt[op_class]);
1461 addToOrderList(op_class);
1462 }
1463 }
1464}
1465
1466int
1471
1472void
1474{
1475 for (int i = 0; i < Num_OpClasses; ++i) {
1476 cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1477
1478 cprintf("\n");
1479 }
1480
1481 cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1482
1483 NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1484 NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1485
1486 cprintf("Non speculative list: ");
1487
1488 while (non_spec_it != non_spec_end_it) {
1489 cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1490 (*non_spec_it).second->seqNum);
1491 ++non_spec_it;
1492 }
1493
1494 cprintf("\n");
1495
1496 ListOrderIt list_order_it = listOrder.begin();
1497 ListOrderIt list_order_end_it = listOrder.end();
1498 int i = 1;
1499
1500 cprintf("List order: ");
1501
1502 while (list_order_it != list_order_end_it) {
1503 cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1504 (*list_order_it).oldestInst);
1505
1506 ++list_order_it;
1507 ++i;
1508 }
1509
1510 cprintf("\n");
1511}
1512
1513
1514void
1516{
1517 for (ThreadID tid = 0; tid < numThreads; ++tid) {
1518 int num = 0;
1519 int valid_num = 0;
1520 ListIt inst_list_it = instList[tid].begin();
1521
1522 while (inst_list_it != instList[tid].end()) {
1523 cprintf("Instruction:%i\n", num);
1524 if (!(*inst_list_it)->isSquashed()) {
1525 if (!(*inst_list_it)->isIssued()) {
1526 ++valid_num;
1527 cprintf("Count:%i\n", valid_num);
1528 } else if ((*inst_list_it)->isMemRef() &&
1529 !(*inst_list_it)->memOpDone()) {
1530 // Loads that have not been marked as executed
1531 // still count towards the total instructions.
1532 ++valid_num;
1533 cprintf("Count:%i\n", valid_num);
1534 }
1535 }
1536
1537 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1538 "Issued:%i\nSquashed:%i\n",
1539 (*inst_list_it)->pcState(),
1540 (*inst_list_it)->seqNum,
1541 (*inst_list_it)->threadNumber,
1542 (*inst_list_it)->isIssued(),
1543 (*inst_list_it)->isSquashed());
1544
1545 if ((*inst_list_it)->isMemRef()) {
1546 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1547 }
1548
1549 cprintf("\n");
1550
1551 inst_list_it++;
1552 ++num;
1553 }
1554 }
1555
1556 cprintf("Insts to Execute list:\n");
1557
1558 int num = 0;
1559 int valid_num = 0;
1560 ListIt inst_list_it = instsToExecute.begin();
1561
1562 while (inst_list_it != instsToExecute.end())
1563 {
1564 cprintf("Instruction:%i\n",
1565 num);
1566 if (!(*inst_list_it)->isSquashed()) {
1567 if (!(*inst_list_it)->isIssued()) {
1568 ++valid_num;
1569 cprintf("Count:%i\n", valid_num);
1570 } else if ((*inst_list_it)->isMemRef() &&
1571 !(*inst_list_it)->memOpDone()) {
1572 // Loads that have not been marked as executed
1573 // still count towards the total instructions.
1574 ++valid_num;
1575 cprintf("Count:%i\n", valid_num);
1576 }
1577 }
1578
1579 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1580 "Issued:%i\nSquashed:%i\n",
1581 (*inst_list_it)->pcState(),
1582 (*inst_list_it)->seqNum,
1583 (*inst_list_it)->threadNumber,
1584 (*inst_list_it)->isIssued(),
1585 (*inst_list_it)->isSquashed());
1586
1587 if ((*inst_list_it)->isMemRef()) {
1588 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1589 }
1590
1591 cprintf("\n");
1592
1593 inst_list_it++;
1594 ++num;
1595 }
1596}
1597
1598} // namespace o3
1599} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition base.hh:417
bool switchedOut() const
Determine if the CPU is switched out.
Definition base.hh:400
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
virtual std::string name() const
Definition named.hh:47
Physical register ID.
Definition reg_class.hh:415
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:472
constexpr RegIndex index() const
Visible RegId methods.
Definition reg_class.hh:151
int getNumPinnedWritesToComplete() const
Definition reg_class.hh:495
void decrNumPinnedWritesToComplete()
Definition reg_class.hh:506
bool isPinned() const
Definition reg_class.hh:492
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition reg_class.hh:469
wire getWire(int idx)
Definition timebuf.hh:232
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition cpu.hh:488
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition cpu.cc:1316
void freeUnitNextCycle(int fu_idx)
Frees a FU at the end of this cycle.
Definition fu_pool.cc:190
Cycles getOpLatency(OpClass capability)
Returns the operation execution latency of the given capability.
Definition fu_pool.hh:192
bool isPipelined(OpClass capability)
Returns the issue latency of the given capability.
Definition fu_pool.hh:197
static constexpr auto NoCapableFU
Instruction asked for a FU but this FUPool does not have a FU for this instruction op type.
Definition fu_pool.hh:158
static constexpr auto NoFreeFU
Instruction asked for a FU but all FU for this op type have already been allocated to other instructi...
Definition fu_pool.hh:165
static constexpr auto NoNeedFU
Named constants to differentiate cases where an instruction asked the FUPool for a free FU but did no...
Definition fu_pool.hh:152
int getUnit(OpClass capability)
Gets a FU providing the requested capability.
Definition fu_pool.cc:162
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition iew.cc:765
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr)
Construct a FU completion event.
Definition inst_queue.cc:66
virtual const char * description() const
Return a C string describing the event.
Definition inst_queue.cc:82
A standard instruction queue class.
Definition inst_queue.hh:99
std::string name() const
Returns the name of the IQ.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread.
gem5::o3::InstructionQueue::IQStats iqStats
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
std::list< DynInstPtr > instList[MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
unsigned numEntries
The number of entries in the instruction queue.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
FUPool * fuPool
Function unit pool.
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress).
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
unsigned numFreeEntries()
Returns total number of free entries.
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
void resetState()
Resets all instruction queue state.
bool isDrained() const
Determine if we are drained.
unsigned count[MaxThreads]
Per Thread IQ count.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit.
unsigned freeEntries
Number of free IQ entries left.
MemDepUnit memDepUnit[MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void drainSanityCheck() const
Perform sanity checks after a drain.
unsigned numPhysRegs
The number of physical registers in the CPU.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void takeOverFrom()
Takes over execution from another CPU's thread.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
gem5::o3::InstructionQueue::IQIOStats iqIOStats
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an IQ.
Definition inst_queue.cc:87
InstSeqNum squashedSeqNum[MaxThreads]
The sequence number of the squashed instruction.
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
std::list< DynInstPtr >::iterator ListIt
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
void resetEntries()
Resets max entries for all threads.
int countInsts()
Debugging function to count how many entries are in the IQ.
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
std::list< ListOrderEntry >::iterator ListOrderIt
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
ThreadID numThreads
Number of Total Threads.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute.
bool isFull()
Returns whether or not the IQ is full.
void squash(ThreadID tid)
Squashes instructions for a thread.
IEW * iewStage
Pointer to IEW stage.
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
~InstructionQueue()
Destructs the IQ.
void doSquash(ThreadID tid)
Does the actual squashing.
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
int wbOutstanding
Number of instructions currently in flight to FUs.
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.
unsigned maxEntries[MaxThreads]
Max IQ Entries Per Thread.
CPU * cpu
Pointer to the CPU.
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
DependencyGraph< DynInstPtr > dependGraph
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
void completeInst(const DynInstPtr &inst)
Notifies completion of an instruction.
void nonSpecInstReady(const DynInstPtr &inst)
Indicate that a non-speculative instruction is ready.
void issue(const DynInstPtr &inst)
Issues the given instruction.
void insert(const DynInstPtr &inst)
Inserts a memory instruction.
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squashes all instructions up until a given sequence number for a specific thread.
void violation(const DynInstPtr &store_inst, const DynInstPtr &violating_load)
Indicates an ordering violation between a store and a younger load.
void replay()
Replays all instructions that have been rescheduled by moving them to the ready list.
void init(const BaseO3CPUParams &params, ThreadID tid, CPU *cpu)
Initializes the unit with parameters and a thread id.
void regsReady(const DynInstPtr &inst)
Indicate that an instruction has its registers ready.
void insertNonSpec(const DynInstPtr &inst)
Inserts a non-speculative memory instruction.
void reschedule(const DynInstPtr &inst)
Reschedules an instruction to be re-executed.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a barrier instruction.
void setIQ(InstructionQueue *iq_ptr)
Sets the pointer to the IQ.
Derived & ysubnames(const char **names)
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Statistics container.
Definition group.hh:93
Derived & init(size_type _x, size_type _y)
Derived & init(size_type size)
Set this vector to have the given size.
STL list class.
Definition stl.hh:51
Definition test.h:63
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
Bitfield< 7 > i
Definition misc_types.hh:67
static constexpr int MaxThreads
Definition limits.hh:38
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
const FlagsType total
Print the total.
Definition info.hh:59
const FlagsType dist
Print the distribution.
Definition info.hh:65
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
static const OpClass Num_OpClasses
Definition op_class.hh:135
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ VecRegClass
Vector Register.
Definition reg_class.hh:64
@ VecElemClass
Vector Register Native Elem lane.
Definition reg_class.hh:66
statistics::Scalar vecInstQueueWakeupAccesses
statistics::Scalar intInstQueueWakeupAccesses
IQIOStats(statistics::Group *parent)
statistics::Scalar fpInstQueueWakeupAccesses
statistics::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
statistics::Scalar floatInstsIssued
Stat for number of floating point instructions issued.
statistics::Vector fuBusy
Number of times the FU was busy.
statistics::Scalar instsAdded
Stat for number of instructions added.
statistics::Distribution numIssuedDist
Distribution of number of instructions in the queue.
statistics::Scalar nonSpecInstsAdded
Stat for number of non-speculative instructions added.
statistics::Scalar squashedInstsExamined
Stat for number of squashed instructions examined when squashing.
statistics::Scalar miscInstsIssued
Stat for number of miscellaneous instructions issued.
statistics::Scalar branchInstsIssued
Stat for number of branch instructions issued.
statistics::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
statistics::Scalar memInstsIssued
Stat for number of memory instructions issued.
statistics::Scalar intInstsIssued
Stat for number of integer instructions issued.
statistics::Formula issueRate
Number of instructions issued per cycle.
IQStats(CPU *cpu, const unsigned &total_width)
statistics::Scalar squashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
statistics::Scalar squashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
statistics::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
statistics::Scalar squashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Entry for the list age ordering by op class.
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const

Generated on Mon Jan 13 2025 04:28:31 for gem5 by doxygen 1.9.8