gem5 v23.0.0.1
Loading...
Searching...
No Matches
inst_queue.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2014, 2017-2020 ARM Limited
3 * Copyright (c) 2013 Advanced Micro Devices, Inc.
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder. You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2004-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "cpu/o3/inst_queue.hh"
43
44#include <limits>
45#include <vector>
46
47#include "base/logging.hh"
48#include "cpu/o3/dyn_inst.hh"
49#include "cpu/o3/fu_pool.hh"
50#include "cpu/o3/limits.hh"
51#include "debug/IQ.hh"
52#include "enums/OpClass.hh"
53#include "params/BaseO3CPU.hh"
54#include "sim/core.hh"
55
56// clang complains about std::set being overloaded with Packet::set if
57// we open up the entire namespace std
58using std::list;
59
60namespace gem5
61{
62
63namespace o3
64{
65
67 int fu_idx, InstructionQueue *iq_ptr)
68 : Event(Stat_Event_Pri, AutoDelete),
69 inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
70{
71}
72
73void
75{
76 iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
77 inst = NULL;
78}
79
80
81const char *
83{
84 return "Functional unit completion";
85}
86
88 const BaseO3CPUParams &params)
89 : cpu(cpu_ptr),
90 iewStage(iew_ptr),
91 fuPool(params.fuPool),
92 iqPolicy(params.smtIQPolicy),
93 numThreads(params.numThreads),
94 numEntries(params.numIQEntries),
95 totalWidth(params.issueWidth),
99{
100 assert(fuPool);
101
102 const auto &reg_classes = params.isa[0]->regClasses();
103 // Set the number of total physical registers
104 // As the vector registers have two addressing modes, they are added twice
105 numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs +
106 params.numPhysVecRegs +
107 params.numPhysVecRegs * (
108 reg_classes.at(VecElemClass)->numRegs() /
109 reg_classes.at(VecRegClass)->numRegs()) +
110 params.numPhysVecPredRegs +
111 params.numPhysMatRegs +
112 params.numPhysCCRegs;
113
114 //Create an entry for each physical register within the
115 //dependency graph.
116 dependGraph.resize(numPhysRegs);
117
118 // Resize the register scoreboard.
120
121 //Initialize Mem Dependence Units
122 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
123 memDepUnit[tid].init(params, tid, cpu_ptr);
124 memDepUnit[tid].setIQ(this);
125 }
126
127 resetState();
128
129 //Figure out resource sharing policy
130 if (iqPolicy == SMTQueuePolicy::Dynamic) {
131 //Set Max Entries to Total ROB Capacity
132 for (ThreadID tid = 0; tid < numThreads; tid++) {
133 maxEntries[tid] = numEntries;
134 }
135
136 } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
137 //@todo:make work if part_amt doesnt divide evenly.
138 int part_amt = numEntries / numThreads;
139
140 //Divide ROB up evenly
141 for (ThreadID tid = 0; tid < numThreads; tid++) {
142 maxEntries[tid] = part_amt;
143 }
144
145 DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
146 "%i entries per thread.\n",part_amt);
147 } else if (iqPolicy == SMTQueuePolicy::Threshold) {
148 double threshold = (double)params.smtIQThreshold / 100;
149
150 int thresholdIQ = (int)((double)threshold * numEntries);
151
152 //Divide up by threshold amount
153 for (ThreadID tid = 0; tid < numThreads; tid++) {
154 maxEntries[tid] = thresholdIQ;
155 }
156
157 DPRINTF(IQ, "IQ sharing policy set to Threshold:"
158 "%i entries per thread.\n",thresholdIQ);
159 }
160 for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
161 maxEntries[tid] = 0;
162 }
163}
164
166{
167 dependGraph.reset();
168#ifdef GEM5_DEBUG
169 cprintf("Nodes traversed: %i, removed: %i\n",
170 dependGraph.nodesTraversed, dependGraph.nodesRemoved);
171#endif
172}
173
174std::string
176{
177 return cpu->name() + ".iq";
178}
179
180InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
181 : statistics::Group(cpu),
182 ADD_STAT(instsAdded, statistics::units::Count::get(),
183 "Number of instructions added to the IQ (excludes non-spec)"),
184 ADD_STAT(nonSpecInstsAdded, statistics::units::Count::get(),
185 "Number of non-speculative instructions added to the IQ"),
186 ADD_STAT(instsIssued, statistics::units::Count::get(),
187 "Number of instructions issued"),
188 ADD_STAT(intInstsIssued, statistics::units::Count::get(),
189 "Number of integer instructions issued"),
190 ADD_STAT(floatInstsIssued, statistics::units::Count::get(),
191 "Number of float instructions issued"),
192 ADD_STAT(branchInstsIssued, statistics::units::Count::get(),
193 "Number of branch instructions issued"),
194 ADD_STAT(memInstsIssued, statistics::units::Count::get(),
195 "Number of memory instructions issued"),
196 ADD_STAT(miscInstsIssued, statistics::units::Count::get(),
197 "Number of miscellaneous instructions issued"),
198 ADD_STAT(squashedInstsIssued, statistics::units::Count::get(),
199 "Number of squashed instructions issued"),
200 ADD_STAT(squashedInstsExamined, statistics::units::Count::get(),
201 "Number of squashed instructions iterated over during squash; "
202 "mainly for profiling"),
203 ADD_STAT(squashedOperandsExamined, statistics::units::Count::get(),
204 "Number of squashed operands that are examined and possibly "
205 "removed from graph"),
206 ADD_STAT(squashedNonSpecRemoved, statistics::units::Count::get(),
207 "Number of squashed non-spec instructions that were removed"),
208 ADD_STAT(numIssuedDist, statistics::units::Count::get(),
209 "Number of insts issued each cycle"),
210 ADD_STAT(statFuBusy, statistics::units::Count::get(),
211 "attempts to use FU when none available"),
212 ADD_STAT(statIssuedInstType, statistics::units::Count::get(),
213 "Number of instructions issued per FU type, per thread"),
214 ADD_STAT(issueRate, statistics::units::Rate<
215 statistics::units::Count, statistics::units::Cycle>::get(),
216 "Inst issue rate", instsIssued / cpu->baseStats.numCycles),
217 ADD_STAT(fuBusy, statistics::units::Count::get(), "FU busy when requested"),
218 ADD_STAT(fuBusyRate, statistics::units::Rate<
219 statistics::units::Count, statistics::units::Count>::get(),
220 "FU busy rate (busy events/executed inst)")
221{
224
227
230
233
236
239
242
245
248
251
254
257/*
258 queueResDist
259 .init(Num_OpClasses, 0, 99, 2)
260 .name(name() + ".IQ:residence:")
261 .desc("cycles from dispatch to issue")
262 .flags(total | pdf | cdf )
263 ;
264 for (int i = 0; i < Num_OpClasses; ++i) {
265 queueResDist.subname(i, opClassStrings[i]);
266 }
267*/
269 .init(0,total_width,1)
271 ;
272/*
273 dist_unissued
274 .init(Num_OpClasses+2)
275 .name(name() + ".unissued_cause")
276 .desc("Reason ready instruction not issued")
277 .flags(pdf | dist)
278 ;
279 for (int i=0; i < (Num_OpClasses + 2); ++i) {
280 dist_unissued.subname(i, unissued_names[i]);
281 }
282*/
284 .init(cpu->numThreads,enums::Num_OpClass)
286 ;
287 statIssuedInstType.ysubnames(enums::OpClassStrings);
288
289 //
290 // How long did instructions for a particular FU type wait prior to issue
291 //
292/*
293 issueDelayDist
294 .init(Num_OpClasses,0,99,2)
295 .name(name() + ".")
296 .desc("cycles from operands ready to issue")
297 .flags(pdf | cdf)
298 ;
299 for (int i=0; i<Num_OpClasses; ++i) {
300 std::stringstream subname;
301 subname << opClassStrings[i] << "_delay";
302 issueDelayDist.subname(i, subname.str());
303 }
304*/
307 ;
308
312 ;
313 for (int i=0; i < Num_OpClasses; ++i) {
314 statFuBusy.subname(i, enums::OpClassStrings[i]);
315 }
316
317 fuBusy
320 ;
321
324 ;
326}
327
329 : statistics::Group(parent),
330 ADD_STAT(intInstQueueReads, statistics::units::Count::get(),
331 "Number of integer instruction queue reads"),
332 ADD_STAT(intInstQueueWrites, statistics::units::Count::get(),
333 "Number of integer instruction queue writes"),
334 ADD_STAT(intInstQueueWakeupAccesses, statistics::units::Count::get(),
335 "Number of integer instruction queue wakeup accesses"),
336 ADD_STAT(fpInstQueueReads, statistics::units::Count::get(),
337 "Number of floating instruction queue reads"),
338 ADD_STAT(fpInstQueueWrites, statistics::units::Count::get(),
339 "Number of floating instruction queue writes"),
340 ADD_STAT(fpInstQueueWakeupAccesses, statistics::units::Count::get(),
341 "Number of floating instruction queue wakeup accesses"),
342 ADD_STAT(vecInstQueueReads, statistics::units::Count::get(),
343 "Number of vector instruction queue reads"),
344 ADD_STAT(vecInstQueueWrites, statistics::units::Count::get(),
345 "Number of vector instruction queue writes"),
346 ADD_STAT(vecInstQueueWakeupAccesses, statistics::units::Count::get(),
347 "Number of vector instruction queue wakeup accesses"),
348 ADD_STAT(intAluAccesses, statistics::units::Count::get(),
349 "Number of integer alu accesses"),
350 ADD_STAT(fpAluAccesses, statistics::units::Count::get(),
351 "Number of floating point alu accesses"),
352 ADD_STAT(vecAluAccesses, statistics::units::Count::get(),
353 "Number of vector alu accesses")
354{
355 using namespace statistics;
357 .flags(total);
358
360 .flags(total);
361
363 .flags(total);
364
366 .flags(total);
367
369 .flags(total);
370
372 .flags(total);
373
375 .flags(total);
376
378 .flags(total);
379
381 .flags(total);
382
384 .flags(total);
385
387 .flags(total);
388
390 .flags(total);
391}
392
393void
395{
396 //Initialize thread IQ counts
397 for (ThreadID tid = 0; tid < MaxThreads; tid++) {
398 count[tid] = 0;
399 instList[tid].clear();
400 }
401
402 // Initialize the number of free IQ entries.
404
405 // Note that in actuality, the registers corresponding to the logical
406 // registers start off as ready. However this doesn't matter for the
407 // IQ as the instruction should have been correctly told if those
408 // registers are ready in rename. Thus it can all be initialized as
409 // unready.
410 for (int i = 0; i < numPhysRegs; ++i) {
411 regScoreboard[i] = false;
412 }
413
414 for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
415 squashedSeqNum[tid] = 0;
416 }
417
418 for (int i = 0; i < Num_OpClasses; ++i) {
419 while (!readyInsts[i].empty())
420 readyInsts[i].pop();
421 queueOnList[i] = false;
422 readyIt[i] = listOrder.end();
423 }
424 nonSpecInsts.clear();
425 listOrder.clear();
426 deferredMemInsts.clear();
427 blockedMemInsts.clear();
428 retryMemInsts.clear();
429 wbOutstanding = 0;
430}
431
432void
434{
435 activeThreads = at_ptr;
436}
437
438void
440{
441 issueToExecuteQueue = i2e_ptr;
442}
443
444void
446{
447 timeBuffer = tb_ptr;
448
450}
451
452bool
454{
455 bool drained = dependGraph.empty() &&
456 instsToExecute.empty() &&
457 wbOutstanding == 0;
458 for (ThreadID tid = 0; tid < numThreads; ++tid)
459 drained = drained && memDepUnit[tid].isDrained();
460
461 return drained;
462}
463
464void
466{
467 assert(dependGraph.empty());
468 assert(instsToExecute.empty());
469 for (ThreadID tid = 0; tid < numThreads; ++tid)
471}
472
473void
475{
476 resetState();
477}
478
479int
481{
482 if (iqPolicy == SMTQueuePolicy::Partitioned) {
483 return numEntries / num_threads;
484 } else {
485 return 0;
486 }
487}
488
489
490void
492{
493 if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
494 int active_threads = activeThreads->size();
495
496 list<ThreadID>::iterator threads = activeThreads->begin();
498
499 while (threads != end) {
500 ThreadID tid = *threads++;
501
502 if (iqPolicy == SMTQueuePolicy::Partitioned) {
503 maxEntries[tid] = numEntries / active_threads;
504 } else if (iqPolicy == SMTQueuePolicy::Threshold &&
505 active_threads == 1) {
506 maxEntries[tid] = numEntries;
507 }
508 }
509 }
510}
511
512unsigned
514{
515 return freeEntries;
516}
517
518unsigned
520{
521 return maxEntries[tid] - count[tid];
522}
523
524// Might want to do something more complex if it knows how many instructions
525// will be issued this cycle.
526bool
528{
529 if (freeEntries == 0) {
530 return(true);
531 } else {
532 return(false);
533 }
534}
535
536bool
538{
539 if (numFreeEntries(tid) == 0) {
540 return(true);
541 } else {
542 return(false);
543 }
544}
545
546bool
548{
549 if (!listOrder.empty()) {
550 return true;
551 }
552
553 for (int i = 0; i < Num_OpClasses; ++i) {
554 if (!readyInsts[i].empty()) {
555 return true;
556 }
557 }
558
559 return false;
560}
561
562void
564{
565 if (new_inst->isFloating()) {
567 } else if (new_inst->isVector()) {
569 } else {
571 }
572 // Make sure the instruction is valid
573 assert(new_inst);
574
575 DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
576 new_inst->seqNum, new_inst->pcState());
577
578 assert(freeEntries != 0);
579
580 instList[new_inst->threadNumber].push_back(new_inst);
581
582 --freeEntries;
583
584 new_inst->setInIQ();
585
586 // Look through its source registers (physical regs), and mark any
587 // dependencies.
588 addToDependents(new_inst);
589
590 // Have this instruction set itself as the producer of its destination
591 // register(s).
592 addToProducers(new_inst);
593
594 if (new_inst->isMemRef()) {
595 memDepUnit[new_inst->threadNumber].insert(new_inst);
596 } else {
597 addIfReady(new_inst);
598 }
599
601
602 count[new_inst->threadNumber]++;
603
604 assert(freeEntries == (numEntries - countInsts()));
605}
606
607void
609{
610 // @todo: Clean up this code; can do it by setting inst as unable
611 // to issue, then calling normal insert on the inst.
612 if (new_inst->isFloating()) {
614 } else if (new_inst->isVector()) {
616 } else {
618 }
619
620 assert(new_inst);
621
622 nonSpecInsts[new_inst->seqNum] = new_inst;
623
624 DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
625 "to the IQ.\n",
626 new_inst->seqNum, new_inst->pcState());
627
628 assert(freeEntries != 0);
629
630 instList[new_inst->threadNumber].push_back(new_inst);
631
632 --freeEntries;
633
634 new_inst->setInIQ();
635
636 // Have this instruction set itself as the producer of its destination
637 // register(s).
638 addToProducers(new_inst);
639
640 // If it's a memory instruction, add it to the memory dependency
641 // unit.
642 if (new_inst->isMemRef()) {
643 memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
644 }
645
647
648 count[new_inst->threadNumber]++;
649
650 assert(freeEntries == (numEntries - countInsts()));
651}
652
653void
655{
656 memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
657
658 insertNonSpec(barr_inst);
659}
660
663{
664 assert(!instsToExecute.empty());
665 DynInstPtr inst = std::move(instsToExecute.front());
666 instsToExecute.pop_front();
667 if (inst->isFloating()) {
669 } else if (inst->isVector()) {
671 } else {
673 }
674 return inst;
675}
676
677void
679{
680 assert(!readyInsts[op_class].empty());
681
682 ListOrderEntry queue_entry;
683
684 queue_entry.queueType = op_class;
685
686 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
687
688 ListOrderIt list_it = listOrder.begin();
689 ListOrderIt list_end_it = listOrder.end();
690
691 while (list_it != list_end_it) {
692 if ((*list_it).oldestInst > queue_entry.oldestInst) {
693 break;
694 }
695
696 list_it++;
697 }
698
699 readyIt[op_class] = listOrder.insert(list_it, queue_entry);
700 queueOnList[op_class] = true;
701}
702
703void
705{
706 // Get iterator of next item on the list
707 // Delete the original iterator
708 // Determine if the next item is either the end of the list or younger
709 // than the new instruction. If so, then add in a new iterator right here.
710 // If not, then move along.
711 ListOrderEntry queue_entry;
712 OpClass op_class = (*list_order_it).queueType;
713 ListOrderIt next_it = list_order_it;
714
715 ++next_it;
716
717 queue_entry.queueType = op_class;
718 queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
719
720 while (next_it != listOrder.end() &&
721 (*next_it).oldestInst < queue_entry.oldestInst) {
722 ++next_it;
723 }
724
725 readyIt[op_class] = listOrder.insert(next_it, queue_entry);
726}
727
728void
730{
731 DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
732 assert(!cpu->switchedOut());
733 // The CPU could have been sleeping until this op completed (*extremely*
734 // long latency op). Wake it if it was. This may be overkill.
736 iewStage->wakeCPU();
737
738 if (fu_idx > -1)
739 fuPool->freeUnitNextCycle(fu_idx);
740
741 // @todo: Ensure that these FU Completions happen at the beginning
742 // of a cycle, otherwise they could add too many instructions to
743 // the queue.
744 issueToExecuteQueue->access(-1)->size++;
745 instsToExecute.push_back(inst);
746}
747
748// @todo: Figure out a better way to remove the squashed items from the
749// lists. Checking the top item of each list to see if it's squashed
750// wastes time and forces jumps.
751void
753{
754 DPRINTF(IQ, "Attempting to schedule ready instructions from "
755 "the IQ.\n");
756
757 IssueStruct *i2e_info = issueToExecuteQueue->access(0);
758
759 DynInstPtr mem_inst;
760 while ((mem_inst = getDeferredMemInstToExecute())) {
761 addReadyMemInst(mem_inst);
762 }
763
764 // See if any cache blocked instructions are able to be executed
765 while ((mem_inst = getBlockedMemInstToExecute())) {
766 addReadyMemInst(mem_inst);
767 }
768
769 // Have iterator to head of the list
770 // While I haven't exceeded bandwidth or reached the end of the list,
771 // Try to get a FU that can do what this op needs.
772 // If successful, change the oldestInst to the new top of the list, put
773 // the queue in the proper place in the list.
774 // Increment the iterator.
775 // This will avoid trying to schedule a certain op class if there are no
776 // FUs that handle it.
777 int total_issued = 0;
778 ListOrderIt order_it = listOrder.begin();
779 ListOrderIt order_end_it = listOrder.end();
780
781 while (total_issued < totalWidth && order_it != order_end_it) {
782 OpClass op_class = (*order_it).queueType;
783
784 assert(!readyInsts[op_class].empty());
785
786 DynInstPtr issuing_inst = readyInsts[op_class].top();
787
788 if (issuing_inst->isFloating()) {
790 } else if (issuing_inst->isVector()) {
792 } else {
794 }
795
796 assert(issuing_inst->seqNum == (*order_it).oldestInst);
797
798 if (issuing_inst->isSquashed()) {
799 readyInsts[op_class].pop();
800
801 if (!readyInsts[op_class].empty()) {
802 moveToYoungerInst(order_it);
803 } else {
804 readyIt[op_class] = listOrder.end();
805 queueOnList[op_class] = false;
806 }
807
808 listOrder.erase(order_it++);
809
811
812 continue;
813 }
814
815 int idx = FUPool::NoCapableFU;
816 Cycles op_latency = Cycles(1);
817 ThreadID tid = issuing_inst->threadNumber;
818
819 if (op_class != No_OpClass) {
820 idx = fuPool->getUnit(op_class);
821 if (issuing_inst->isFloating()) {
823 } else if (issuing_inst->isVector()) {
825 } else {
827 }
828 if (idx > FUPool::NoFreeFU) {
829 op_latency = fuPool->getOpLatency(op_class);
830 }
831 }
832
833 // If we have an instruction that doesn't require a FU, or a
834 // valid FU, then schedule for execution.
835 if (idx != FUPool::NoFreeFU) {
836 if (op_latency == Cycles(1)) {
837 i2e_info->size++;
838 instsToExecute.push_back(issuing_inst);
839
840 // Add the FU onto the list of FU's to be freed next
841 // cycle if we used one.
842 if (idx >= 0)
844 } else {
845 bool pipelined = fuPool->isPipelined(op_class);
846 // Generate completion event for the FU
848 FUCompletion *execution = new FUCompletion(issuing_inst,
849 idx, this);
850
851 cpu->schedule(execution,
852 cpu->clockEdge(Cycles(op_latency - 1)));
853
854 if (!pipelined) {
855 // If FU isn't pipelined, then it must be freed
856 // upon the execution completing.
857 execution->setFreeFU();
858 } else {
859 // Add the FU onto the list of FU's to be freed next cycle.
861 }
862 }
863
864 DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
865 "[sn:%llu]\n",
866 tid, issuing_inst->pcState(),
867 issuing_inst->seqNum);
868
869 readyInsts[op_class].pop();
870
871 if (!readyInsts[op_class].empty()) {
872 moveToYoungerInst(order_it);
873 } else {
874 readyIt[op_class] = listOrder.end();
875 queueOnList[op_class] = false;
876 }
877
878 issuing_inst->setIssued();
879 ++total_issued;
880
881#if TRACING_ON
882 issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
883#endif
884
885 if (issuing_inst->firstIssue == -1)
886 issuing_inst->firstIssue = curTick();
887
888 if (!issuing_inst->isMemRef()) {
889 // Memory instructions can not be freed from the IQ until they
890 // complete.
891 ++freeEntries;
892 count[tid]--;
893 issuing_inst->clearInIQ();
894 } else {
895 memDepUnit[tid].issue(issuing_inst);
896 }
897
898 listOrder.erase(order_it++);
899 iqStats.statIssuedInstType[tid][op_class]++;
900 } else {
901 iqStats.statFuBusy[op_class]++;
902 iqStats.fuBusy[tid]++;
903 ++order_it;
904 }
905 }
906
907 iqStats.numIssuedDist.sample(total_issued);
908 iqStats.instsIssued+= total_issued;
909
910 // If we issued any instructions, tell the CPU we had activity.
911 // @todo If the way deferred memory instructions are handeled due to
912 // translation changes then the deferredMemInsts condition should be
913 // removed from the code below.
914 if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
916 } else {
917 DPRINTF(IQ, "Not able to schedule any instructions.\n");
918 }
919}
920
921void
923{
924 DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
925 "to execute.\n", inst);
926
927 NonSpecMapIt inst_it = nonSpecInsts.find(inst);
928
929 assert(inst_it != nonSpecInsts.end());
930
931 ThreadID tid = (*inst_it).second->threadNumber;
932
933 (*inst_it).second->setAtCommit();
934
935 (*inst_it).second->setCanIssue();
936
937 if (!(*inst_it).second->isMemRef()) {
938 addIfReady((*inst_it).second);
939 } else {
940 memDepUnit[tid].nonSpecInstReady((*inst_it).second);
941 }
942
943 (*inst_it).second = NULL;
944
945 nonSpecInsts.erase(inst_it);
946}
947
948void
950{
951 DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
952 tid,inst);
953
954 ListIt iq_it = instList[tid].begin();
955
956 while (iq_it != instList[tid].end() &&
957 (*iq_it)->seqNum <= inst) {
958 ++iq_it;
959 instList[tid].pop_front();
960 }
961
962 assert(freeEntries == (numEntries - countInsts()));
963}
964
965int
967{
968 int dependents = 0;
969
970 // The instruction queue here takes care of both floating and int ops
971 if (completed_inst->isFloating()) {
973 } else if (completed_inst->isVector()) {
975 } else {
977 }
978
979 completed_inst->lastWakeDependents = curTick();
980
981 DPRINTF(IQ, "Waking dependents of completed instruction.\n");
982
983 assert(!completed_inst->isSquashed());
984
985 // Tell the memory dependence unit to wake any dependents on this
986 // instruction if it is a memory instruction. Also complete the memory
987 // instruction at this point since we know it executed without issues.
988 ThreadID tid = completed_inst->threadNumber;
989 if (completed_inst->isMemRef()) {
990 memDepUnit[tid].completeInst(completed_inst);
991
992 DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
993 completed_inst->pcState(), completed_inst->seqNum);
994
995 ++freeEntries;
996 completed_inst->memOpDone(true);
997 count[tid]--;
998 } else if (completed_inst->isReadBarrier() ||
999 completed_inst->isWriteBarrier()) {
1000 // Completes a non mem ref barrier
1001 memDepUnit[tid].completeInst(completed_inst);
1002 }
1003
1004 for (int dest_reg_idx = 0;
1005 dest_reg_idx < completed_inst->numDestRegs();
1006 dest_reg_idx++)
1007 {
1008 PhysRegIdPtr dest_reg =
1009 completed_inst->renamedDestIdx(dest_reg_idx);
1010
1011 // Special case of uniq or control registers. They are not
1012 // handled by the IQ and thus have no dependency graph entry.
1013 if (dest_reg->isFixedMapping()) {
1014 DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1015 dest_reg->index(), dest_reg->className());
1016 continue;
1017 }
1018
1019 // Avoid waking up dependents if the register is pinned
1021 if (dest_reg->isPinned())
1022 completed_inst->setPinnedRegsWritten();
1023
1024 if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1025 DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1026 dest_reg->index(), dest_reg->className());
1027 continue;
1028 }
1029
1030 DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1031 dest_reg->index(),
1032 dest_reg->className());
1033
1034 //Go through the dependency chain, marking the registers as
1035 //ready within the waiting instructions.
1036 DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1037
1038 while (dep_inst) {
1039 DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1040 "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1041
1042 // Might want to give more information to the instruction
1043 // so that it knows which of its source registers is
1044 // ready. However that would mean that the dependency
1045 // graph entries would need to hold the src_reg_idx.
1046 dep_inst->markSrcRegReady();
1047
1048 addIfReady(dep_inst);
1049
1050 dep_inst = dependGraph.pop(dest_reg->flatIndex());
1051
1052 ++dependents;
1053 }
1054
1055 // Reset the head node now that all of its dependents have
1056 // been woken up.
1057 assert(dependGraph.empty(dest_reg->flatIndex()));
1058 dependGraph.clearInst(dest_reg->flatIndex());
1059
1060 // Mark the scoreboard as having that register ready.
1061 regScoreboard[dest_reg->flatIndex()] = true;
1062 }
1063 return dependents;
1064}
1065
1066void
1068{
1069 OpClass op_class = ready_inst->opClass();
1070
1071 readyInsts[op_class].push(ready_inst);
1072
1073 // Will need to reorder the list if either a queue is not on the list,
1074 // or it has an older instruction than last time.
1075 if (!queueOnList[op_class]) {
1076 addToOrderList(op_class);
1077 } else if (readyInsts[op_class].top()->seqNum <
1078 (*readyIt[op_class]).oldestInst) {
1079 listOrder.erase(readyIt[op_class]);
1080 addToOrderList(op_class);
1081 }
1082
1083 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1084 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1085 ready_inst->pcState(), op_class, ready_inst->seqNum);
1086}
1087
1088void
1090{
1091 DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1092
1093 // Reset DTB translation state
1094 resched_inst->translationStarted(false);
1095 resched_inst->translationCompleted(false);
1096
1097 resched_inst->clearCanIssue();
1098 memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1099}
1100
1101void
1103{
1104 memDepUnit[replay_inst->threadNumber].replay();
1105}
1106
1107void
1109{
1110 deferredMemInsts.push_back(deferred_inst);
1111}
1112
1113void
1115{
1116 blocked_inst->clearIssued();
1117 blocked_inst->clearCanIssue();
1118 blockedMemInsts.push_back(blocked_inst);
1119 DPRINTF(IQ, "Memory inst [sn:%llu] PC %s is blocked, will be "
1120 "reissued later\n", blocked_inst->seqNum,
1121 blocked_inst->pcState());
1122}
1123
1124void
1126{
1127 DPRINTF(IQ, "Cache is unblocked, rescheduling blocked memory "
1128 "instructions\n");
1130 // Get the CPU ticking again
1131 cpu->wakeCPU();
1132}
1133
1136{
1137 for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1138 ++it) {
1139 if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1140 DynInstPtr mem_inst = std::move(*it);
1141 deferredMemInsts.erase(it);
1142 return mem_inst;
1143 }
1144 }
1145 return nullptr;
1146}
1147
1150{
1151 if (retryMemInsts.empty()) {
1152 return nullptr;
1153 } else {
1154 DynInstPtr mem_inst = std::move(retryMemInsts.front());
1155 retryMemInsts.pop_front();
1156 return mem_inst;
1157 }
1158}
1159
1160void
1162 const DynInstPtr &faulting_load)
1163{
1165 memDepUnit[store->threadNumber].violation(store, faulting_load);
1166}
1167
1168void
1170{
1171 DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1172 "the IQ.\n", tid);
1173
1174 // Read instruction sequence number of last instruction out of the
1175 // time buffer.
1176 squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1177
1178 doSquash(tid);
1179
1180 // Also tell the memory dependence unit to squash.
1181 memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1182}
1183
1184void
1186{
1187 // Start at the tail.
1188 ListIt squash_it = instList[tid].end();
1189 --squash_it;
1190
1191 DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1192 tid, squashedSeqNum[tid]);
1193
1194 // Squash any instructions younger than the squashed sequence number
1195 // given.
1196 while (squash_it != instList[tid].end() &&
1197 (*squash_it)->seqNum > squashedSeqNum[tid]) {
1198
1199 DynInstPtr squashed_inst = (*squash_it);
1200 if (squashed_inst->isFloating()) {
1202 } else if (squashed_inst->isVector()) {
1204 } else {
1206 }
1207
1208 // Only handle the instruction if it actually is in the IQ and
1209 // hasn't already been squashed in the IQ.
1210 if (squashed_inst->threadNumber != tid ||
1211 squashed_inst->isSquashedInIQ()) {
1212 --squash_it;
1213 continue;
1214 }
1215
1216 if (!squashed_inst->isIssued() ||
1217 (squashed_inst->isMemRef() &&
1218 !squashed_inst->memOpDone())) {
1219
1220 DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1221 tid, squashed_inst->seqNum, squashed_inst->pcState());
1222
1223 bool is_acq_rel = squashed_inst->isFullMemBarrier() &&
1224 (squashed_inst->isLoad() ||
1225 (squashed_inst->isStore() &&
1226 !squashed_inst->isStoreConditional()));
1227
1228 // Remove the instruction from the dependency list.
1229 if (is_acq_rel ||
1230 (!squashed_inst->isNonSpeculative() &&
1231 !squashed_inst->isStoreConditional() &&
1232 !squashed_inst->isAtomic() &&
1233 !squashed_inst->isReadBarrier() &&
1234 !squashed_inst->isWriteBarrier())) {
1235
1236 for (int src_reg_idx = 0;
1237 src_reg_idx < squashed_inst->numSrcRegs();
1238 src_reg_idx++)
1239 {
1240 PhysRegIdPtr src_reg =
1241 squashed_inst->renamedSrcIdx(src_reg_idx);
1242
1243 // Only remove it from the dependency graph if it
1244 // was placed there in the first place.
1245
1246 // Instead of doing a linked list traversal, we
1247 // can just remove these squashed instructions
1248 // either at issue time, or when the register is
1249 // overwritten. The only downside to this is it
1250 // leaves more room for error.
1251
1252 if (!squashed_inst->readySrcIdx(src_reg_idx) &&
1253 !src_reg->isFixedMapping()) {
1254 dependGraph.remove(src_reg->flatIndex(),
1255 squashed_inst);
1256 }
1257
1259 }
1260
1261 } else if (!squashed_inst->isStoreConditional() ||
1262 !squashed_inst->isCompleted()) {
1263 NonSpecMapIt ns_inst_it =
1264 nonSpecInsts.find(squashed_inst->seqNum);
1265
1266 // we remove non-speculative instructions from
1267 // nonSpecInsts already when they are ready, and so we
1268 // cannot always expect to find them
1269 if (ns_inst_it == nonSpecInsts.end()) {
1270 // loads that became ready but stalled on a
1271 // blocked cache are alreayd removed from
1272 // nonSpecInsts, and have not faulted
1273 assert(squashed_inst->getFault() != NoFault ||
1274 squashed_inst->isMemRef());
1275 } else {
1276
1277 (*ns_inst_it).second = NULL;
1278
1279 nonSpecInsts.erase(ns_inst_it);
1280
1282 }
1283 }
1284
1285 // Might want to also clear out the head of the dependency graph.
1286
1287 // Mark it as squashed within the IQ.
1288 squashed_inst->setSquashedInIQ();
1289
1290 // @todo: Remove this hack where several statuses are set so the
1291 // inst will flow through the rest of the pipeline.
1292 squashed_inst->setIssued();
1293 squashed_inst->setCanCommit();
1294 squashed_inst->clearInIQ();
1295
1296 //Update Thread IQ Count
1297 count[squashed_inst->threadNumber]--;
1298
1299 ++freeEntries;
1300 }
1301
1302 // IQ clears out the heads of the dependency graph only when
1303 // instructions reach writeback stage. If an instruction is squashed
1304 // before writeback stage, its head of dependency graph would not be
1305 // cleared out; it holds the instruction's DynInstPtr. This
1306 // prevents freeing the squashed instruction's DynInst.
1307 // Thus, we need to manually clear out the squashed instructions'
1308 // heads of dependency graph.
1309 for (int dest_reg_idx = 0;
1310 dest_reg_idx < squashed_inst->numDestRegs();
1311 dest_reg_idx++)
1312 {
1313 PhysRegIdPtr dest_reg =
1314 squashed_inst->renamedDestIdx(dest_reg_idx);
1315 if (dest_reg->isFixedMapping()){
1316 continue;
1317 }
1318 assert(dependGraph.empty(dest_reg->flatIndex()));
1319 dependGraph.clearInst(dest_reg->flatIndex());
1320 }
1321 instList[tid].erase(squash_it--);
1323 }
1324}
1325
1326bool
1328 const DynInstPtr &lhs, const DynInstPtr &rhs) const
1329{
1330 return lhs->seqNum > rhs->seqNum;
1331}
1332
1333bool
1335{
1336 // Loop through the instruction's source registers, adding
1337 // them to the dependency list if they are not ready.
1338 int8_t total_src_regs = new_inst->numSrcRegs();
1339 bool return_val = false;
1340
1341 for (int src_reg_idx = 0;
1342 src_reg_idx < total_src_regs;
1343 src_reg_idx++)
1344 {
1345 // Only add it to the dependency graph if it's not ready.
1346 if (!new_inst->readySrcIdx(src_reg_idx)) {
1347 PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
1348
1349 // Check the IQ's scoreboard to make sure the register
1350 // hasn't become ready while the instruction was in flight
1351 // between stages. Only if it really isn't ready should
1352 // it be added to the dependency graph.
1353 if (src_reg->isFixedMapping()) {
1354 continue;
1355 } else if (!regScoreboard[src_reg->flatIndex()]) {
1356 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1357 "is being added to the dependency chain.\n",
1358 new_inst->pcState(), src_reg->index(),
1359 src_reg->className());
1360
1361 dependGraph.insert(src_reg->flatIndex(), new_inst);
1362
1363 // Change the return value to indicate that something
1364 // was added to the dependency graph.
1365 return_val = true;
1366 } else {
1367 DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1368 "became ready before it reached the IQ.\n",
1369 new_inst->pcState(), src_reg->index(),
1370 src_reg->className());
1371 // Mark a register ready within the instruction.
1372 new_inst->markSrcRegReady(src_reg_idx);
1373 }
1374 }
1375 }
1376
1377 return return_val;
1378}
1379
1380void
1382{
1383 // Nothing really needs to be marked when an instruction becomes
1384 // the producer of a register's value, but for convenience a ptr
1385 // to the producing instruction will be placed in the head node of
1386 // the dependency links.
1387 int8_t total_dest_regs = new_inst->numDestRegs();
1388
1389 for (int dest_reg_idx = 0;
1390 dest_reg_idx < total_dest_regs;
1391 dest_reg_idx++)
1392 {
1393 PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
1394
1395 // Some registers have fixed mapping, and there is no need to track
1396 // dependencies as these instructions must be executed at commit.
1397 if (dest_reg->isFixedMapping()) {
1398 continue;
1399 }
1400
1401 if (!dependGraph.empty(dest_reg->flatIndex())) {
1402 dependGraph.dump();
1403 panic("Dependency graph %i (%s) (flat: %i) not empty!",
1404 dest_reg->index(), dest_reg->className(),
1405 dest_reg->flatIndex());
1406 }
1407
1408 dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1409
1410 // Mark the scoreboard to say it's not yet ready.
1411 regScoreboard[dest_reg->flatIndex()] = false;
1412 }
1413}
1414
1415void
1417{
1418 // If the instruction now has all of its source registers
1419 // available, then add it to the list of ready instructions.
1420 if (inst->readyToIssue()) {
1421
1422 //Add the instruction to the proper ready list.
1423 if (inst->isMemRef()) {
1424
1425 DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1426
1427 // Message to the mem dependence unit that this instruction has
1428 // its registers ready.
1429 memDepUnit[inst->threadNumber].regsReady(inst);
1430
1431 return;
1432 }
1433
1434 OpClass op_class = inst->opClass();
1435
1436 DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1437 "the ready list, PC %s opclass:%i [sn:%llu].\n",
1438 inst->pcState(), op_class, inst->seqNum);
1439
1440 readyInsts[op_class].push(inst);
1441
1442 // Will need to reorder the list if either a queue is not on the list,
1443 // or it has an older instruction than last time.
1444 if (!queueOnList[op_class]) {
1445 addToOrderList(op_class);
1446 } else if (readyInsts[op_class].top()->seqNum <
1447 (*readyIt[op_class]).oldestInst) {
1448 listOrder.erase(readyIt[op_class]);
1449 addToOrderList(op_class);
1450 }
1451 }
1452}
1453
1454int
1456{
1457 return numEntries - freeEntries;
1458}
1459
1460void
1462{
1463 for (int i = 0; i < Num_OpClasses; ++i) {
1464 cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1465
1466 cprintf("\n");
1467 }
1468
1469 cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1470
1471 NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1472 NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1473
1474 cprintf("Non speculative list: ");
1475
1476 while (non_spec_it != non_spec_end_it) {
1477 cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1478 (*non_spec_it).second->seqNum);
1479 ++non_spec_it;
1480 }
1481
1482 cprintf("\n");
1483
1484 ListOrderIt list_order_it = listOrder.begin();
1485 ListOrderIt list_order_end_it = listOrder.end();
1486 int i = 1;
1487
1488 cprintf("List order: ");
1489
1490 while (list_order_it != list_order_end_it) {
1491 cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1492 (*list_order_it).oldestInst);
1493
1494 ++list_order_it;
1495 ++i;
1496 }
1497
1498 cprintf("\n");
1499}
1500
1501
1502void
1504{
1505 for (ThreadID tid = 0; tid < numThreads; ++tid) {
1506 int num = 0;
1507 int valid_num = 0;
1508 ListIt inst_list_it = instList[tid].begin();
1509
1510 while (inst_list_it != instList[tid].end()) {
1511 cprintf("Instruction:%i\n", num);
1512 if (!(*inst_list_it)->isSquashed()) {
1513 if (!(*inst_list_it)->isIssued()) {
1514 ++valid_num;
1515 cprintf("Count:%i\n", valid_num);
1516 } else if ((*inst_list_it)->isMemRef() &&
1517 !(*inst_list_it)->memOpDone()) {
1518 // Loads that have not been marked as executed
1519 // still count towards the total instructions.
1520 ++valid_num;
1521 cprintf("Count:%i\n", valid_num);
1522 }
1523 }
1524
1525 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1526 "Issued:%i\nSquashed:%i\n",
1527 (*inst_list_it)->pcState(),
1528 (*inst_list_it)->seqNum,
1529 (*inst_list_it)->threadNumber,
1530 (*inst_list_it)->isIssued(),
1531 (*inst_list_it)->isSquashed());
1532
1533 if ((*inst_list_it)->isMemRef()) {
1534 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1535 }
1536
1537 cprintf("\n");
1538
1539 inst_list_it++;
1540 ++num;
1541 }
1542 }
1543
1544 cprintf("Insts to Execute list:\n");
1545
1546 int num = 0;
1547 int valid_num = 0;
1548 ListIt inst_list_it = instsToExecute.begin();
1549
1550 while (inst_list_it != instsToExecute.end())
1551 {
1552 cprintf("Instruction:%i\n",
1553 num);
1554 if (!(*inst_list_it)->isSquashed()) {
1555 if (!(*inst_list_it)->isIssued()) {
1556 ++valid_num;
1557 cprintf("Count:%i\n", valid_num);
1558 } else if ((*inst_list_it)->isMemRef() &&
1559 !(*inst_list_it)->memOpDone()) {
1560 // Loads that have not been marked as executed
1561 // still count towards the total instructions.
1562 ++valid_num;
1563 cprintf("Count:%i\n", valid_num);
1564 }
1565 }
1566
1567 cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1568 "Issued:%i\nSquashed:%i\n",
1569 (*inst_list_it)->pcState(),
1570 (*inst_list_it)->seqNum,
1571 (*inst_list_it)->threadNumber,
1572 (*inst_list_it)->isIssued(),
1573 (*inst_list_it)->isSquashed());
1574
1575 if ((*inst_list_it)->isMemRef()) {
1576 cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1577 }
1578
1579 cprintf("\n");
1580
1581 inst_list_it++;
1582 ++num;
1583 }
1584}
1585
1586} // namespace o3
1587} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition base.hh:390
bool switchedOut() const
Determine if the CPU is switched out.
Definition base.hh:373
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
virtual std::string name() const
Definition named.hh:47
Physical register ID.
Definition reg_class.hh:394
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:451
constexpr RegIndex index() const
Visible RegId methods.
Definition reg_class.hh:150
int getNumPinnedWritesToComplete() const
Definition reg_class.hh:474
void decrNumPinnedWritesToComplete()
Definition reg_class.hh:485
bool isPinned() const
Definition reg_class.hh:471
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:275
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition reg_class.hh:448
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition cpu.hh:485
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition cpu.cc:1298
void freeUnitNextCycle(int fu_idx)
Frees a FU at the end of this cycle.
Definition fu_pool.cc:190
Cycles getOpLatency(OpClass capability)
Returns the operation execution latency of the given capability.
Definition fu_pool.hh:169
bool isPipelined(OpClass capability)
Returns the issue latency of the given capability.
Definition fu_pool.hh:174
static constexpr auto NoCapableFU
Definition fu_pool.hh:142
static constexpr auto NoFreeFU
Definition fu_pool.hh:143
int getUnit(OpClass capability)
Gets a FU providing the requested capability.
Definition fu_pool.cc:162
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition iew.cc:765
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr)
Construct a FU completion event.
Definition inst_queue.cc:66
virtual const char * description() const
Return a C string describing the event.
Definition inst_queue.cc:82
A standard instruction queue class.
Definition inst_queue.hh:99
std::string name() const
Returns the name of the IQ.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread.
gem5::o3::InstructionQueue::IQStats iqStats
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
std::list< DynInstPtr > instList[MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
unsigned numEntries
The number of entries in the instruction queue.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
FUPool * fuPool
Function unit pool.
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress).
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
unsigned numFreeEntries()
Returns total number of free entries.
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
void resetState()
Resets all instruction queue state.
bool isDrained() const
Determine if we are drained.
unsigned count[MaxThreads]
Per Thread IQ count.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit.
unsigned freeEntries
Number of free IQ entries left.
MemDepUnit memDepUnit[MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void drainSanityCheck() const
Perform sanity checks after a drain.
unsigned numPhysRegs
The number of physical registers in the CPU.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void takeOverFrom()
Takes over execution from another CPU's thread.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
gem5::o3::InstructionQueue::IQIOStats iqIOStats
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an IQ.
Definition inst_queue.cc:87
InstSeqNum squashedSeqNum[MaxThreads]
The sequence number of the squashed instruction.
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
std::list< DynInstPtr >::iterator ListIt
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
void resetEntries()
Resets max entries for all threads.
int countInsts()
Debugging function to count how many entries are in the IQ.
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
std::list< ListOrderEntry >::iterator ListOrderIt
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
ThreadID numThreads
Number of Total Threads.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute.
bool isFull()
Returns whether or not the IQ is full.
void squash(ThreadID tid)
Squashes instructions for a thread.
IEW * iewStage
Pointer to IEW stage.
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
~InstructionQueue()
Destructs the IQ.
void doSquash(ThreadID tid)
Does the actual squashing.
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
int wbOutstanding
Number of instructions currently in flight to FUs.
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.
unsigned maxEntries[MaxThreads]
Max IQ Entries Per Thread.
CPU * cpu
Pointer to the CPU.
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
DependencyGraph< DynInstPtr > dependGraph
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
void completeInst(const DynInstPtr &inst)
Notifies completion of an instruction.
void nonSpecInstReady(const DynInstPtr &inst)
Indicate that a non-speculative instruction is ready.
void issue(const DynInstPtr &inst)
Issues the given instruction.
void insert(const DynInstPtr &inst)
Inserts a memory instruction.
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squashes all instructions up until a given sequence number for a specific thread.
void violation(const DynInstPtr &store_inst, const DynInstPtr &violating_load)
Indicates an ordering violation between a store and a younger load.
void replay()
Replays all instructions that have been rescheduled by moving them to the ready list.
void init(const BaseO3CPUParams &params, ThreadID tid, CPU *cpu)
Initializes the unit with parameters and a thread id.
void regsReady(const DynInstPtr &inst)
Indicate that an instruction has its registers ready.
void insertNonSpec(const DynInstPtr &inst)
Inserts a non-speculative memory instruction.
void reschedule(const DynInstPtr &inst)
Reschedules an instruction to be re-executed.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a barrier instruction.
void setIQ(InstructionQueue *iq_ptr)
Sets the pointer to the IQ.
Derived & ysubnames(const char **names)
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Statistics container.
Definition group.hh:93
Derived & init(size_type _x, size_type _y)
Derived & init(size_type size)
Set this vector to have the given size.
STL list class.
Definition stl.hh:51
Definition test.h:63
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
Bitfield< 7 > i
Definition misc_types.hh:67
static constexpr int MaxThreads
Definition limits.hh:38
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition info.hh:61
const FlagsType total
Print the total.
Definition info.hh:59
const FlagsType dist
Print the distribution.
Definition info.hh:65
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235
static const OpClass Num_OpClasses
Definition op_class.hh:137
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ VecRegClass
Vector Register.
Definition reg_class.hh:63
@ VecElemClass
Vector Register Native Elem lane.
Definition reg_class.hh:65
statistics::Scalar vecInstQueueWakeupAccesses
statistics::Scalar intInstQueueWakeupAccesses
IQIOStats(statistics::Group *parent)
statistics::Scalar fpInstQueueWakeupAccesses
statistics::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
statistics::Scalar floatInstsIssued
Stat for number of floating point instructions issued.
statistics::Vector fuBusy
Number of times the FU was busy.
statistics::Scalar instsAdded
Stat for number of instructions added.
statistics::Distribution numIssuedDist
Distribution of number of instructions in the queue.
statistics::Scalar nonSpecInstsAdded
Stat for number of non-speculative instructions added.
statistics::Scalar squashedInstsExamined
Stat for number of squashed instructions examined when squashing.
statistics::Scalar miscInstsIssued
Stat for number of miscellaneous instructions issued.
statistics::Scalar branchInstsIssued
Stat for number of branch instructions issued.
statistics::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
statistics::Scalar memInstsIssued
Stat for number of memory instructions issued.
statistics::Scalar intInstsIssued
Stat for number of integer instructions issued.
statistics::Formula issueRate
Number of instructions issued per cycle.
IQStats(CPU *cpu, const unsigned &total_width)
statistics::Scalar squashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
statistics::Scalar squashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
statistics::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
statistics::Scalar squashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Entry for the list age ordering by op class.
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const

Generated on Mon Jul 10 2023 15:32:01 for gem5 by doxygen 1.9.7