gem5  v22.1.0.0
inst_queue.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2014, 2017-2020 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved.
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #include "cpu/o3/inst_queue.hh"
43 
44 #include <limits>
45 #include <vector>
46 
47 #include "base/logging.hh"
48 #include "cpu/o3/dyn_inst.hh"
49 #include "cpu/o3/fu_pool.hh"
50 #include "cpu/o3/limits.hh"
51 #include "debug/IQ.hh"
52 #include "enums/OpClass.hh"
53 #include "params/BaseO3CPU.hh"
54 #include "sim/core.hh"
55 
56 // clang complains about std::set being overloaded with Packet::set if
57 // we open up the entire namespace std
58 using std::list;
59 
60 namespace gem5
61 {
62 
63 namespace o3
64 {
65 
67  int fu_idx, InstructionQueue *iq_ptr)
68  : Event(Stat_Event_Pri, AutoDelete),
69  inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
70 {
71 }
72 
73 void
75 {
76  iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
77  inst = NULL;
78 }
79 
80 
81 const char *
83 {
84  return "Functional unit completion";
85 }
86 
88  const BaseO3CPUParams &params)
89  : cpu(cpu_ptr),
90  iewStage(iew_ptr),
91  fuPool(params.fuPool),
92  iqPolicy(params.smtIQPolicy),
93  numThreads(params.numThreads),
94  numEntries(params.numIQEntries),
95  totalWidth(params.issueWidth),
98  iqIOStats(cpu)
99 {
100  assert(fuPool);
101 
102  const auto &reg_classes = params.isa[0]->regClasses();
103  // Set the number of total physical registers
104  // As the vector registers have two addressing modes, they are added twice
105  numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs +
106  params.numPhysVecRegs +
107  params.numPhysVecRegs * (
108  reg_classes.at(VecElemClass)->numRegs() /
109  reg_classes.at(VecRegClass)->numRegs()) +
110  params.numPhysVecPredRegs +
111  params.numPhysCCRegs;
112 
113  //Create an entry for each physical register within the
114  //dependency graph.
115  dependGraph.resize(numPhysRegs);
116 
117  // Resize the register scoreboard.
118  regScoreboard.resize(numPhysRegs);
119 
120  //Initialize Mem Dependence Units
121  for (ThreadID tid = 0; tid < MaxThreads; tid++) {
122  memDepUnit[tid].init(params, tid, cpu_ptr);
123  memDepUnit[tid].setIQ(this);
124  }
125 
126  resetState();
127 
128  //Figure out resource sharing policy
129  if (iqPolicy == SMTQueuePolicy::Dynamic) {
130  //Set Max Entries to Total ROB Capacity
131  for (ThreadID tid = 0; tid < numThreads; tid++) {
132  maxEntries[tid] = numEntries;
133  }
134 
135  } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
136  //@todo:make work if part_amt doesnt divide evenly.
137  int part_amt = numEntries / numThreads;
138 
139  //Divide ROB up evenly
140  for (ThreadID tid = 0; tid < numThreads; tid++) {
141  maxEntries[tid] = part_amt;
142  }
143 
144  DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
145  "%i entries per thread.\n",part_amt);
146  } else if (iqPolicy == SMTQueuePolicy::Threshold) {
147  double threshold = (double)params.smtIQThreshold / 100;
148 
149  int thresholdIQ = (int)((double)threshold * numEntries);
150 
151  //Divide up by threshold amount
152  for (ThreadID tid = 0; tid < numThreads; tid++) {
153  maxEntries[tid] = thresholdIQ;
154  }
155 
156  DPRINTF(IQ, "IQ sharing policy set to Threshold:"
157  "%i entries per thread.\n",thresholdIQ);
158  }
159  for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
160  maxEntries[tid] = 0;
161  }
162 }
163 
165 {
166  dependGraph.reset();
167 #ifdef DEBUG
168  cprintf("Nodes traversed: %i, removed: %i\n",
169  dependGraph.nodesTraversed, dependGraph.nodesRemoved);
170 #endif
171 }
172 
173 std::string
175 {
176  return cpu->name() + ".iq";
177 }
178 
179 InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
180  : statistics::Group(cpu),
181  ADD_STAT(instsAdded, statistics::units::Count::get(),
182  "Number of instructions added to the IQ (excludes non-spec)"),
183  ADD_STAT(nonSpecInstsAdded, statistics::units::Count::get(),
184  "Number of non-speculative instructions added to the IQ"),
185  ADD_STAT(instsIssued, statistics::units::Count::get(),
186  "Number of instructions issued"),
187  ADD_STAT(intInstsIssued, statistics::units::Count::get(),
188  "Number of integer instructions issued"),
189  ADD_STAT(floatInstsIssued, statistics::units::Count::get(),
190  "Number of float instructions issued"),
191  ADD_STAT(branchInstsIssued, statistics::units::Count::get(),
192  "Number of branch instructions issued"),
193  ADD_STAT(memInstsIssued, statistics::units::Count::get(),
194  "Number of memory instructions issued"),
195  ADD_STAT(miscInstsIssued, statistics::units::Count::get(),
196  "Number of miscellaneous instructions issued"),
197  ADD_STAT(squashedInstsIssued, statistics::units::Count::get(),
198  "Number of squashed instructions issued"),
199  ADD_STAT(squashedInstsExamined, statistics::units::Count::get(),
200  "Number of squashed instructions iterated over during squash; "
201  "mainly for profiling"),
202  ADD_STAT(squashedOperandsExamined, statistics::units::Count::get(),
203  "Number of squashed operands that are examined and possibly "
204  "removed from graph"),
205  ADD_STAT(squashedNonSpecRemoved, statistics::units::Count::get(),
206  "Number of squashed non-spec instructions that were removed"),
207  ADD_STAT(numIssuedDist, statistics::units::Count::get(),
208  "Number of insts issued each cycle"),
209  ADD_STAT(statFuBusy, statistics::units::Count::get(),
210  "attempts to use FU when none available"),
211  ADD_STAT(statIssuedInstType, statistics::units::Count::get(),
212  "Number of instructions issued per FU type, per thread"),
213  ADD_STAT(issueRate, statistics::units::Rate<
214  statistics::units::Count, statistics::units::Cycle>::get(),
215  "Inst issue rate", instsIssued / cpu->baseStats.numCycles),
216  ADD_STAT(fuBusy, statistics::units::Count::get(), "FU busy when requested"),
217  ADD_STAT(fuBusyRate, statistics::units::Rate<
218  statistics::units::Count, statistics::units::Count>::get(),
219  "FU busy rate (busy events/executed inst)")
220 {
221  instsAdded
222  .prereq(instsAdded);
223 
226 
229 
232 
235 
238 
241 
244 
247 
250 
253 
256 /*
257  queueResDist
258  .init(Num_OpClasses, 0, 99, 2)
259  .name(name() + ".IQ:residence:")
260  .desc("cycles from dispatch to issue")
261  .flags(total | pdf | cdf )
262  ;
263  for (int i = 0; i < Num_OpClasses; ++i) {
264  queueResDist.subname(i, opClassStrings[i]);
265  }
266 */
268  .init(0,total_width,1)
270  ;
271 /*
272  dist_unissued
273  .init(Num_OpClasses+2)
274  .name(name() + ".unissued_cause")
275  .desc("Reason ready instruction not issued")
276  .flags(pdf | dist)
277  ;
278  for (int i=0; i < (Num_OpClasses + 2); ++i) {
279  dist_unissued.subname(i, unissued_names[i]);
280  }
281 */
283  .init(cpu->numThreads,enums::Num_OpClass)
285  ;
286  statIssuedInstType.ysubnames(enums::OpClassStrings);
287 
288  //
289  // How long did instructions for a particular FU type wait prior to issue
290  //
291 /*
292  issueDelayDist
293  .init(Num_OpClasses,0,99,2)
294  .name(name() + ".")
295  .desc("cycles from operands ready to issue")
296  .flags(pdf | cdf)
297  ;
298  for (int i=0; i<Num_OpClasses; ++i) {
299  std::stringstream subname;
300  subname << opClassStrings[i] << "_delay";
301  issueDelayDist.subname(i, subname.str());
302  }
303 */
304  issueRate
306  ;
307 
308  statFuBusy
311  ;
312  for (int i=0; i < Num_OpClasses; ++i) {
313  statFuBusy.subname(i, enums::OpClassStrings[i]);
314  }
315 
316  fuBusy
317  .init(cpu->numThreads)
319  ;
320 
321  fuBusyRate
323  ;
325 }
326 
328  : statistics::Group(parent),
329  ADD_STAT(intInstQueueReads, statistics::units::Count::get(),
330  "Number of integer instruction queue reads"),
331  ADD_STAT(intInstQueueWrites, statistics::units::Count::get(),
332  "Number of integer instruction queue writes"),
333  ADD_STAT(intInstQueueWakeupAccesses, statistics::units::Count::get(),
334  "Number of integer instruction queue wakeup accesses"),
335  ADD_STAT(fpInstQueueReads, statistics::units::Count::get(),
336  "Number of floating instruction queue reads"),
337  ADD_STAT(fpInstQueueWrites, statistics::units::Count::get(),
338  "Number of floating instruction queue writes"),
339  ADD_STAT(fpInstQueueWakeupAccesses, statistics::units::Count::get(),
340  "Number of floating instruction queue wakeup accesses"),
341  ADD_STAT(vecInstQueueReads, statistics::units::Count::get(),
342  "Number of vector instruction queue reads"),
343  ADD_STAT(vecInstQueueWrites, statistics::units::Count::get(),
344  "Number of vector instruction queue writes"),
345  ADD_STAT(vecInstQueueWakeupAccesses, statistics::units::Count::get(),
346  "Number of vector instruction queue wakeup accesses"),
347  ADD_STAT(intAluAccesses, statistics::units::Count::get(),
348  "Number of integer alu accesses"),
349  ADD_STAT(fpAluAccesses, statistics::units::Count::get(),
350  "Number of floating point alu accesses"),
351  ADD_STAT(vecAluAccesses, statistics::units::Count::get(),
352  "Number of vector alu accesses")
353 {
354  using namespace statistics;
356  .flags(total);
357 
359  .flags(total);
360 
362  .flags(total);
363 
365  .flags(total);
366 
368  .flags(total);
369 
371  .flags(total);
372 
374  .flags(total);
375 
377  .flags(total);
378 
380  .flags(total);
381 
383  .flags(total);
384 
386  .flags(total);
387 
389  .flags(total);
390 }
391 
392 void
394 {
395  //Initialize thread IQ counts
396  for (ThreadID tid = 0; tid < MaxThreads; tid++) {
397  count[tid] = 0;
398  instList[tid].clear();
399  }
400 
401  // Initialize the number of free IQ entries.
403 
404  // Note that in actuality, the registers corresponding to the logical
405  // registers start off as ready. However this doesn't matter for the
406  // IQ as the instruction should have been correctly told if those
407  // registers are ready in rename. Thus it can all be initialized as
408  // unready.
409  for (int i = 0; i < numPhysRegs; ++i) {
410  regScoreboard[i] = false;
411  }
412 
413  for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
414  squashedSeqNum[tid] = 0;
415  }
416 
417  for (int i = 0; i < Num_OpClasses; ++i) {
418  while (!readyInsts[i].empty())
419  readyInsts[i].pop();
420  queueOnList[i] = false;
421  readyIt[i] = listOrder.end();
422  }
423  nonSpecInsts.clear();
424  listOrder.clear();
425  deferredMemInsts.clear();
426  blockedMemInsts.clear();
427  retryMemInsts.clear();
428  wbOutstanding = 0;
429 }
430 
431 void
433 {
434  activeThreads = at_ptr;
435 }
436 
437 void
439 {
440  issueToExecuteQueue = i2e_ptr;
441 }
442 
443 void
445 {
446  timeBuffer = tb_ptr;
447 
449 }
450 
451 bool
453 {
454  bool drained = dependGraph.empty() &&
455  instsToExecute.empty() &&
456  wbOutstanding == 0;
457  for (ThreadID tid = 0; tid < numThreads; ++tid)
458  drained = drained && memDepUnit[tid].isDrained();
459 
460  return drained;
461 }
462 
463 void
465 {
466  assert(dependGraph.empty());
467  assert(instsToExecute.empty());
468  for (ThreadID tid = 0; tid < numThreads; ++tid)
470 }
471 
472 void
474 {
475  resetState();
476 }
477 
478 int
480 {
481  if (iqPolicy == SMTQueuePolicy::Partitioned) {
482  return numEntries / num_threads;
483  } else {
484  return 0;
485  }
486 }
487 
488 
489 void
491 {
492  if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
493  int active_threads = activeThreads->size();
494 
495  list<ThreadID>::iterator threads = activeThreads->begin();
497 
498  while (threads != end) {
499  ThreadID tid = *threads++;
500 
501  if (iqPolicy == SMTQueuePolicy::Partitioned) {
502  maxEntries[tid] = numEntries / active_threads;
503  } else if (iqPolicy == SMTQueuePolicy::Threshold &&
504  active_threads == 1) {
505  maxEntries[tid] = numEntries;
506  }
507  }
508  }
509 }
510 
511 unsigned
513 {
514  return freeEntries;
515 }
516 
517 unsigned
519 {
520  return maxEntries[tid] - count[tid];
521 }
522 
523 // Might want to do something more complex if it knows how many instructions
524 // will be issued this cycle.
525 bool
527 {
528  if (freeEntries == 0) {
529  return(true);
530  } else {
531  return(false);
532  }
533 }
534 
535 bool
537 {
538  if (numFreeEntries(tid) == 0) {
539  return(true);
540  } else {
541  return(false);
542  }
543 }
544 
545 bool
547 {
548  if (!listOrder.empty()) {
549  return true;
550  }
551 
552  for (int i = 0; i < Num_OpClasses; ++i) {
553  if (!readyInsts[i].empty()) {
554  return true;
555  }
556  }
557 
558  return false;
559 }
560 
561 void
563 {
564  if (new_inst->isFloating()) {
566  } else if (new_inst->isVector()) {
568  } else {
570  }
571  // Make sure the instruction is valid
572  assert(new_inst);
573 
574  DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
575  new_inst->seqNum, new_inst->pcState());
576 
577  assert(freeEntries != 0);
578 
579  instList[new_inst->threadNumber].push_back(new_inst);
580 
581  --freeEntries;
582 
583  new_inst->setInIQ();
584 
585  // Look through its source registers (physical regs), and mark any
586  // dependencies.
587  addToDependents(new_inst);
588 
589  // Have this instruction set itself as the producer of its destination
590  // register(s).
591  addToProducers(new_inst);
592 
593  if (new_inst->isMemRef()) {
594  memDepUnit[new_inst->threadNumber].insert(new_inst);
595  } else {
596  addIfReady(new_inst);
597  }
598 
600 
601  count[new_inst->threadNumber]++;
602 
603  assert(freeEntries == (numEntries - countInsts()));
604 }
605 
606 void
608 {
609  // @todo: Clean up this code; can do it by setting inst as unable
610  // to issue, then calling normal insert on the inst.
611  if (new_inst->isFloating()) {
613  } else if (new_inst->isVector()) {
615  } else {
617  }
618 
619  assert(new_inst);
620 
621  nonSpecInsts[new_inst->seqNum] = new_inst;
622 
623  DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
624  "to the IQ.\n",
625  new_inst->seqNum, new_inst->pcState());
626 
627  assert(freeEntries != 0);
628 
629  instList[new_inst->threadNumber].push_back(new_inst);
630 
631  --freeEntries;
632 
633  new_inst->setInIQ();
634 
635  // Have this instruction set itself as the producer of its destination
636  // register(s).
637  addToProducers(new_inst);
638 
639  // If it's a memory instruction, add it to the memory dependency
640  // unit.
641  if (new_inst->isMemRef()) {
642  memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
643  }
644 
646 
647  count[new_inst->threadNumber]++;
648 
649  assert(freeEntries == (numEntries - countInsts()));
650 }
651 
652 void
654 {
655  memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
656 
657  insertNonSpec(barr_inst);
658 }
659 
662 {
663  assert(!instsToExecute.empty());
664  DynInstPtr inst = std::move(instsToExecute.front());
665  instsToExecute.pop_front();
666  if (inst->isFloating()) {
668  } else if (inst->isVector()) {
670  } else {
672  }
673  return inst;
674 }
675 
676 void
678 {
679  assert(!readyInsts[op_class].empty());
680 
681  ListOrderEntry queue_entry;
682 
683  queue_entry.queueType = op_class;
684 
685  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
686 
687  ListOrderIt list_it = listOrder.begin();
688  ListOrderIt list_end_it = listOrder.end();
689 
690  while (list_it != list_end_it) {
691  if ((*list_it).oldestInst > queue_entry.oldestInst) {
692  break;
693  }
694 
695  list_it++;
696  }
697 
698  readyIt[op_class] = listOrder.insert(list_it, queue_entry);
699  queueOnList[op_class] = true;
700 }
701 
702 void
704 {
705  // Get iterator of next item on the list
706  // Delete the original iterator
707  // Determine if the next item is either the end of the list or younger
708  // than the new instruction. If so, then add in a new iterator right here.
709  // If not, then move along.
710  ListOrderEntry queue_entry;
711  OpClass op_class = (*list_order_it).queueType;
712  ListOrderIt next_it = list_order_it;
713 
714  ++next_it;
715 
716  queue_entry.queueType = op_class;
717  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
718 
719  while (next_it != listOrder.end() &&
720  (*next_it).oldestInst < queue_entry.oldestInst) {
721  ++next_it;
722  }
723 
724  readyIt[op_class] = listOrder.insert(next_it, queue_entry);
725 }
726 
727 void
729 {
730  DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
731  assert(!cpu->switchedOut());
732  // The CPU could have been sleeping until this op completed (*extremely*
733  // long latency op). Wake it if it was. This may be overkill.
734  --wbOutstanding;
735  iewStage->wakeCPU();
736 
737  if (fu_idx > -1)
738  fuPool->freeUnitNextCycle(fu_idx);
739 
740  // @todo: Ensure that these FU Completions happen at the beginning
741  // of a cycle, otherwise they could add too many instructions to
742  // the queue.
743  issueToExecuteQueue->access(-1)->size++;
744  instsToExecute.push_back(inst);
745 }
746 
747 // @todo: Figure out a better way to remove the squashed items from the
748 // lists. Checking the top item of each list to see if it's squashed
749 // wastes time and forces jumps.
750 void
752 {
753  DPRINTF(IQ, "Attempting to schedule ready instructions from "
754  "the IQ.\n");
755 
756  IssueStruct *i2e_info = issueToExecuteQueue->access(0);
757 
758  DynInstPtr mem_inst;
759  while ((mem_inst = getDeferredMemInstToExecute())) {
760  addReadyMemInst(mem_inst);
761  }
762 
763  // See if any cache blocked instructions are able to be executed
764  while ((mem_inst = getBlockedMemInstToExecute())) {
765  addReadyMemInst(mem_inst);
766  }
767 
768  // Have iterator to head of the list
769  // While I haven't exceeded bandwidth or reached the end of the list,
770  // Try to get a FU that can do what this op needs.
771  // If successful, change the oldestInst to the new top of the list, put
772  // the queue in the proper place in the list.
773  // Increment the iterator.
774  // This will avoid trying to schedule a certain op class if there are no
775  // FUs that handle it.
776  int total_issued = 0;
777  ListOrderIt order_it = listOrder.begin();
778  ListOrderIt order_end_it = listOrder.end();
779 
780  while (total_issued < totalWidth && order_it != order_end_it) {
781  OpClass op_class = (*order_it).queueType;
782 
783  assert(!readyInsts[op_class].empty());
784 
785  DynInstPtr issuing_inst = readyInsts[op_class].top();
786 
787  if (issuing_inst->isFloating()) {
789  } else if (issuing_inst->isVector()) {
791  } else {
793  }
794 
795  assert(issuing_inst->seqNum == (*order_it).oldestInst);
796 
797  if (issuing_inst->isSquashed()) {
798  readyInsts[op_class].pop();
799 
800  if (!readyInsts[op_class].empty()) {
801  moveToYoungerInst(order_it);
802  } else {
803  readyIt[op_class] = listOrder.end();
804  queueOnList[op_class] = false;
805  }
806 
807  listOrder.erase(order_it++);
808 
810 
811  continue;
812  }
813 
814  int idx = FUPool::NoCapableFU;
815  Cycles op_latency = Cycles(1);
816  ThreadID tid = issuing_inst->threadNumber;
817 
818  if (op_class != No_OpClass) {
819  idx = fuPool->getUnit(op_class);
820  if (issuing_inst->isFloating()) {
822  } else if (issuing_inst->isVector()) {
824  } else {
826  }
827  if (idx > FUPool::NoFreeFU) {
828  op_latency = fuPool->getOpLatency(op_class);
829  }
830  }
831 
832  // If we have an instruction that doesn't require a FU, or a
833  // valid FU, then schedule for execution.
834  if (idx != FUPool::NoFreeFU) {
835  if (op_latency == Cycles(1)) {
836  i2e_info->size++;
837  instsToExecute.push_back(issuing_inst);
838 
839  // Add the FU onto the list of FU's to be freed next
840  // cycle if we used one.
841  if (idx >= 0)
843  } else {
844  bool pipelined = fuPool->isPipelined(op_class);
845  // Generate completion event for the FU
846  ++wbOutstanding;
847  FUCompletion *execution = new FUCompletion(issuing_inst,
848  idx, this);
849 
850  cpu->schedule(execution,
851  cpu->clockEdge(Cycles(op_latency - 1)));
852 
853  if (!pipelined) {
854  // If FU isn't pipelined, then it must be freed
855  // upon the execution completing.
856  execution->setFreeFU();
857  } else {
858  // Add the FU onto the list of FU's to be freed next cycle.
860  }
861  }
862 
863  DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
864  "[sn:%llu]\n",
865  tid, issuing_inst->pcState(),
866  issuing_inst->seqNum);
867 
868  readyInsts[op_class].pop();
869 
870  if (!readyInsts[op_class].empty()) {
871  moveToYoungerInst(order_it);
872  } else {
873  readyIt[op_class] = listOrder.end();
874  queueOnList[op_class] = false;
875  }
876 
877  issuing_inst->setIssued();
878  ++total_issued;
879 
880 #if TRACING_ON
881  issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
882 #endif
883 
884  if (issuing_inst->firstIssue == -1)
885  issuing_inst->firstIssue = curTick();
886 
887  if (!issuing_inst->isMemRef()) {
888  // Memory instructions can not be freed from the IQ until they
889  // complete.
890  ++freeEntries;
891  count[tid]--;
892  issuing_inst->clearInIQ();
893  } else {
894  memDepUnit[tid].issue(issuing_inst);
895  }
896 
897  listOrder.erase(order_it++);
898  iqStats.statIssuedInstType[tid][op_class]++;
899  } else {
900  iqStats.statFuBusy[op_class]++;
901  iqStats.fuBusy[tid]++;
902  ++order_it;
903  }
904  }
905 
906  iqStats.numIssuedDist.sample(total_issued);
907  iqStats.instsIssued+= total_issued;
908 
909  // If we issued any instructions, tell the CPU we had activity.
910  // @todo If the way deferred memory instructions are handeled due to
911  // translation changes then the deferredMemInsts condition should be
912  // removed from the code below.
913  if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
915  } else {
916  DPRINTF(IQ, "Not able to schedule any instructions.\n");
917  }
918 }
919 
920 void
922 {
923  DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
924  "to execute.\n", inst);
925 
926  NonSpecMapIt inst_it = nonSpecInsts.find(inst);
927 
928  assert(inst_it != nonSpecInsts.end());
929 
930  ThreadID tid = (*inst_it).second->threadNumber;
931 
932  (*inst_it).second->setAtCommit();
933 
934  (*inst_it).second->setCanIssue();
935 
936  if (!(*inst_it).second->isMemRef()) {
937  addIfReady((*inst_it).second);
938  } else {
939  memDepUnit[tid].nonSpecInstReady((*inst_it).second);
940  }
941 
942  (*inst_it).second = NULL;
943 
944  nonSpecInsts.erase(inst_it);
945 }
946 
947 void
949 {
950  DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
951  tid,inst);
952 
953  ListIt iq_it = instList[tid].begin();
954 
955  while (iq_it != instList[tid].end() &&
956  (*iq_it)->seqNum <= inst) {
957  ++iq_it;
958  instList[tid].pop_front();
959  }
960 
961  assert(freeEntries == (numEntries - countInsts()));
962 }
963 
964 int
966 {
967  int dependents = 0;
968 
969  // The instruction queue here takes care of both floating and int ops
970  if (completed_inst->isFloating()) {
972  } else if (completed_inst->isVector()) {
974  } else {
976  }
977 
978  completed_inst->lastWakeDependents = curTick();
979 
980  DPRINTF(IQ, "Waking dependents of completed instruction.\n");
981 
982  assert(!completed_inst->isSquashed());
983 
984  // Tell the memory dependence unit to wake any dependents on this
985  // instruction if it is a memory instruction. Also complete the memory
986  // instruction at this point since we know it executed without issues.
987  ThreadID tid = completed_inst->threadNumber;
988  if (completed_inst->isMemRef()) {
989  memDepUnit[tid].completeInst(completed_inst);
990 
991  DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
992  completed_inst->pcState(), completed_inst->seqNum);
993 
994  ++freeEntries;
995  completed_inst->memOpDone(true);
996  count[tid]--;
997  } else if (completed_inst->isReadBarrier() ||
998  completed_inst->isWriteBarrier()) {
999  // Completes a non mem ref barrier
1000  memDepUnit[tid].completeInst(completed_inst);
1001  }
1002 
1003  for (int dest_reg_idx = 0;
1004  dest_reg_idx < completed_inst->numDestRegs();
1005  dest_reg_idx++)
1006  {
1007  PhysRegIdPtr dest_reg =
1008  completed_inst->renamedDestIdx(dest_reg_idx);
1009 
1010  // Special case of uniq or control registers. They are not
1011  // handled by the IQ and thus have no dependency graph entry.
1012  if (dest_reg->isFixedMapping()) {
1013  DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1014  dest_reg->index(), dest_reg->className());
1015  continue;
1016  }
1017 
1018  // Avoid waking up dependents if the register is pinned
1019  dest_reg->decrNumPinnedWritesToComplete();
1020  if (dest_reg->isPinned())
1021  completed_inst->setPinnedRegsWritten();
1022 
1023  if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1024  DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1025  dest_reg->index(), dest_reg->className());
1026  continue;
1027  }
1028 
1029  DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1030  dest_reg->index(),
1031  dest_reg->className());
1032 
1033  //Go through the dependency chain, marking the registers as
1034  //ready within the waiting instructions.
1035  DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1036 
1037  while (dep_inst) {
1038  DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1039  "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1040 
1041  // Might want to give more information to the instruction
1042  // so that it knows which of its source registers is
1043  // ready. However that would mean that the dependency
1044  // graph entries would need to hold the src_reg_idx.
1045  dep_inst->markSrcRegReady();
1046 
1047  addIfReady(dep_inst);
1048 
1049  dep_inst = dependGraph.pop(dest_reg->flatIndex());
1050 
1051  ++dependents;
1052  }
1053 
1054  // Reset the head node now that all of its dependents have
1055  // been woken up.
1056  assert(dependGraph.empty(dest_reg->flatIndex()));
1057  dependGraph.clearInst(dest_reg->flatIndex());
1058 
1059  // Mark the scoreboard as having that register ready.
1060  regScoreboard[dest_reg->flatIndex()] = true;
1061  }
1062  return dependents;
1063 }
1064 
1065 void
1067 {
1068  OpClass op_class = ready_inst->opClass();
1069 
1070  readyInsts[op_class].push(ready_inst);
1071 
1072  // Will need to reorder the list if either a queue is not on the list,
1073  // or it has an older instruction than last time.
1074  if (!queueOnList[op_class]) {
1075  addToOrderList(op_class);
1076  } else if (readyInsts[op_class].top()->seqNum <
1077  (*readyIt[op_class]).oldestInst) {
1078  listOrder.erase(readyIt[op_class]);
1079  addToOrderList(op_class);
1080  }
1081 
1082  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1083  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1084  ready_inst->pcState(), op_class, ready_inst->seqNum);
1085 }
1086 
1087 void
1089 {
1090  DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1091 
1092  // Reset DTB translation state
1093  resched_inst->translationStarted(false);
1094  resched_inst->translationCompleted(false);
1095 
1096  resched_inst->clearCanIssue();
1097  memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1098 }
1099 
1100 void
1102 {
1103  memDepUnit[replay_inst->threadNumber].replay();
1104 }
1105 
1106 void
1108 {
1109  deferredMemInsts.push_back(deferred_inst);
1110 }
1111 
1112 void
1114 {
1115  blocked_inst->clearIssued();
1116  blocked_inst->clearCanIssue();
1117  blockedMemInsts.push_back(blocked_inst);
1118  DPRINTF(IQ, "Memory inst [sn:%llu] PC %s is blocked, will be "
1119  "reissued later\n", blocked_inst->seqNum,
1120  blocked_inst->pcState());
1121 }
1122 
1123 void
1125 {
1126  DPRINTF(IQ, "Cache is unblocked, rescheduling blocked memory "
1127  "instructions\n");
1129  // Get the CPU ticking again
1130  cpu->wakeCPU();
1131 }
1132 
1133 DynInstPtr
1135 {
1136  for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1137  ++it) {
1138  if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1139  DynInstPtr mem_inst = std::move(*it);
1140  deferredMemInsts.erase(it);
1141  return mem_inst;
1142  }
1143  }
1144  return nullptr;
1145 }
1146 
1147 DynInstPtr
1149 {
1150  if (retryMemInsts.empty()) {
1151  return nullptr;
1152  } else {
1153  DynInstPtr mem_inst = std::move(retryMemInsts.front());
1154  retryMemInsts.pop_front();
1155  return mem_inst;
1156  }
1157 }
1158 
1159 void
1161  const DynInstPtr &faulting_load)
1162 {
1164  memDepUnit[store->threadNumber].violation(store, faulting_load);
1165 }
1166 
1167 void
1169 {
1170  DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1171  "the IQ.\n", tid);
1172 
1173  // Read instruction sequence number of last instruction out of the
1174  // time buffer.
1175  squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1176 
1177  doSquash(tid);
1178 
1179  // Also tell the memory dependence unit to squash.
1180  memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1181 }
1182 
1183 void
1185 {
1186  // Start at the tail.
1187  ListIt squash_it = instList[tid].end();
1188  --squash_it;
1189 
1190  DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1191  tid, squashedSeqNum[tid]);
1192 
1193  // Squash any instructions younger than the squashed sequence number
1194  // given.
1195  while (squash_it != instList[tid].end() &&
1196  (*squash_it)->seqNum > squashedSeqNum[tid]) {
1197 
1198  DynInstPtr squashed_inst = (*squash_it);
1199  if (squashed_inst->isFloating()) {
1201  } else if (squashed_inst->isVector()) {
1203  } else {
1205  }
1206 
1207  // Only handle the instruction if it actually is in the IQ and
1208  // hasn't already been squashed in the IQ.
1209  if (squashed_inst->threadNumber != tid ||
1210  squashed_inst->isSquashedInIQ()) {
1211  --squash_it;
1212  continue;
1213  }
1214 
1215  if (!squashed_inst->isIssued() ||
1216  (squashed_inst->isMemRef() &&
1217  !squashed_inst->memOpDone())) {
1218 
1219  DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1220  tid, squashed_inst->seqNum, squashed_inst->pcState());
1221 
1222  bool is_acq_rel = squashed_inst->isFullMemBarrier() &&
1223  (squashed_inst->isLoad() ||
1224  (squashed_inst->isStore() &&
1225  !squashed_inst->isStoreConditional()));
1226 
1227  // Remove the instruction from the dependency list.
1228  if (is_acq_rel ||
1229  (!squashed_inst->isNonSpeculative() &&
1230  !squashed_inst->isStoreConditional() &&
1231  !squashed_inst->isAtomic() &&
1232  !squashed_inst->isReadBarrier() &&
1233  !squashed_inst->isWriteBarrier())) {
1234 
1235  for (int src_reg_idx = 0;
1236  src_reg_idx < squashed_inst->numSrcRegs();
1237  src_reg_idx++)
1238  {
1239  PhysRegIdPtr src_reg =
1240  squashed_inst->renamedSrcIdx(src_reg_idx);
1241 
1242  // Only remove it from the dependency graph if it
1243  // was placed there in the first place.
1244 
1245  // Instead of doing a linked list traversal, we
1246  // can just remove these squashed instructions
1247  // either at issue time, or when the register is
1248  // overwritten. The only downside to this is it
1249  // leaves more room for error.
1250 
1251  if (!squashed_inst->readySrcIdx(src_reg_idx) &&
1252  !src_reg->isFixedMapping()) {
1253  dependGraph.remove(src_reg->flatIndex(),
1254  squashed_inst);
1255  }
1256 
1258  }
1259 
1260  } else if (!squashed_inst->isStoreConditional() ||
1261  !squashed_inst->isCompleted()) {
1262  NonSpecMapIt ns_inst_it =
1263  nonSpecInsts.find(squashed_inst->seqNum);
1264 
1265  // we remove non-speculative instructions from
1266  // nonSpecInsts already when they are ready, and so we
1267  // cannot always expect to find them
1268  if (ns_inst_it == nonSpecInsts.end()) {
1269  // loads that became ready but stalled on a
1270  // blocked cache are alreayd removed from
1271  // nonSpecInsts, and have not faulted
1272  assert(squashed_inst->getFault() != NoFault ||
1273  squashed_inst->isMemRef());
1274  } else {
1275 
1276  (*ns_inst_it).second = NULL;
1277 
1278  nonSpecInsts.erase(ns_inst_it);
1279 
1281  }
1282  }
1283 
1284  // Might want to also clear out the head of the dependency graph.
1285 
1286  // Mark it as squashed within the IQ.
1287  squashed_inst->setSquashedInIQ();
1288 
1289  // @todo: Remove this hack where several statuses are set so the
1290  // inst will flow through the rest of the pipeline.
1291  squashed_inst->setIssued();
1292  squashed_inst->setCanCommit();
1293  squashed_inst->clearInIQ();
1294 
1295  //Update Thread IQ Count
1296  count[squashed_inst->threadNumber]--;
1297 
1298  ++freeEntries;
1299  }
1300 
1301  // IQ clears out the heads of the dependency graph only when
1302  // instructions reach writeback stage. If an instruction is squashed
1303  // before writeback stage, its head of dependency graph would not be
1304  // cleared out; it holds the instruction's DynInstPtr. This
1305  // prevents freeing the squashed instruction's DynInst.
1306  // Thus, we need to manually clear out the squashed instructions'
1307  // heads of dependency graph.
1308  for (int dest_reg_idx = 0;
1309  dest_reg_idx < squashed_inst->numDestRegs();
1310  dest_reg_idx++)
1311  {
1312  PhysRegIdPtr dest_reg =
1313  squashed_inst->renamedDestIdx(dest_reg_idx);
1314  if (dest_reg->isFixedMapping()){
1315  continue;
1316  }
1317  assert(dependGraph.empty(dest_reg->flatIndex()));
1318  dependGraph.clearInst(dest_reg->flatIndex());
1319  }
1320  instList[tid].erase(squash_it--);
1322  }
1323 }
1324 
1325 bool
1327  const DynInstPtr &lhs, const DynInstPtr &rhs) const
1328 {
1329  return lhs->seqNum > rhs->seqNum;
1330 }
1331 
1332 bool
1334 {
1335  // Loop through the instruction's source registers, adding
1336  // them to the dependency list if they are not ready.
1337  int8_t total_src_regs = new_inst->numSrcRegs();
1338  bool return_val = false;
1339 
1340  for (int src_reg_idx = 0;
1341  src_reg_idx < total_src_regs;
1342  src_reg_idx++)
1343  {
1344  // Only add it to the dependency graph if it's not ready.
1345  if (!new_inst->readySrcIdx(src_reg_idx)) {
1346  PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
1347 
1348  // Check the IQ's scoreboard to make sure the register
1349  // hasn't become ready while the instruction was in flight
1350  // between stages. Only if it really isn't ready should
1351  // it be added to the dependency graph.
1352  if (src_reg->isFixedMapping()) {
1353  continue;
1354  } else if (!regScoreboard[src_reg->flatIndex()]) {
1355  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1356  "is being added to the dependency chain.\n",
1357  new_inst->pcState(), src_reg->index(),
1358  src_reg->className());
1359 
1360  dependGraph.insert(src_reg->flatIndex(), new_inst);
1361 
1362  // Change the return value to indicate that something
1363  // was added to the dependency graph.
1364  return_val = true;
1365  } else {
1366  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1367  "became ready before it reached the IQ.\n",
1368  new_inst->pcState(), src_reg->index(),
1369  src_reg->className());
1370  // Mark a register ready within the instruction.
1371  new_inst->markSrcRegReady(src_reg_idx);
1372  }
1373  }
1374  }
1375 
1376  return return_val;
1377 }
1378 
1379 void
1381 {
1382  // Nothing really needs to be marked when an instruction becomes
1383  // the producer of a register's value, but for convenience a ptr
1384  // to the producing instruction will be placed in the head node of
1385  // the dependency links.
1386  int8_t total_dest_regs = new_inst->numDestRegs();
1387 
1388  for (int dest_reg_idx = 0;
1389  dest_reg_idx < total_dest_regs;
1390  dest_reg_idx++)
1391  {
1392  PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
1393 
1394  // Some registers have fixed mapping, and there is no need to track
1395  // dependencies as these instructions must be executed at commit.
1396  if (dest_reg->isFixedMapping()) {
1397  continue;
1398  }
1399 
1400  if (!dependGraph.empty(dest_reg->flatIndex())) {
1401  dependGraph.dump();
1402  panic("Dependency graph %i (%s) (flat: %i) not empty!",
1403  dest_reg->index(), dest_reg->className(),
1404  dest_reg->flatIndex());
1405  }
1406 
1407  dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1408 
1409  // Mark the scoreboard to say it's not yet ready.
1410  regScoreboard[dest_reg->flatIndex()] = false;
1411  }
1412 }
1413 
1414 void
1416 {
1417  // If the instruction now has all of its source registers
1418  // available, then add it to the list of ready instructions.
1419  if (inst->readyToIssue()) {
1420 
1421  //Add the instruction to the proper ready list.
1422  if (inst->isMemRef()) {
1423 
1424  DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1425 
1426  // Message to the mem dependence unit that this instruction has
1427  // its registers ready.
1428  memDepUnit[inst->threadNumber].regsReady(inst);
1429 
1430  return;
1431  }
1432 
1433  OpClass op_class = inst->opClass();
1434 
1435  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1436  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1437  inst->pcState(), op_class, inst->seqNum);
1438 
1439  readyInsts[op_class].push(inst);
1440 
1441  // Will need to reorder the list if either a queue is not on the list,
1442  // or it has an older instruction than last time.
1443  if (!queueOnList[op_class]) {
1444  addToOrderList(op_class);
1445  } else if (readyInsts[op_class].top()->seqNum <
1446  (*readyIt[op_class]).oldestInst) {
1447  listOrder.erase(readyIt[op_class]);
1448  addToOrderList(op_class);
1449  }
1450  }
1451 }
1452 
1453 int
1455 {
1456  return numEntries - freeEntries;
1457 }
1458 
1459 void
1461 {
1462  for (int i = 0; i < Num_OpClasses; ++i) {
1463  cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1464 
1465  cprintf("\n");
1466  }
1467 
1468  cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1469 
1470  NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1471  NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1472 
1473  cprintf("Non speculative list: ");
1474 
1475  while (non_spec_it != non_spec_end_it) {
1476  cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1477  (*non_spec_it).second->seqNum);
1478  ++non_spec_it;
1479  }
1480 
1481  cprintf("\n");
1482 
1483  ListOrderIt list_order_it = listOrder.begin();
1484  ListOrderIt list_order_end_it = listOrder.end();
1485  int i = 1;
1486 
1487  cprintf("List order: ");
1488 
1489  while (list_order_it != list_order_end_it) {
1490  cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1491  (*list_order_it).oldestInst);
1492 
1493  ++list_order_it;
1494  ++i;
1495  }
1496 
1497  cprintf("\n");
1498 }
1499 
1500 
1501 void
1503 {
1504  for (ThreadID tid = 0; tid < numThreads; ++tid) {
1505  int num = 0;
1506  int valid_num = 0;
1507  ListIt inst_list_it = instList[tid].begin();
1508 
1509  while (inst_list_it != instList[tid].end()) {
1510  cprintf("Instruction:%i\n", num);
1511  if (!(*inst_list_it)->isSquashed()) {
1512  if (!(*inst_list_it)->isIssued()) {
1513  ++valid_num;
1514  cprintf("Count:%i\n", valid_num);
1515  } else if ((*inst_list_it)->isMemRef() &&
1516  !(*inst_list_it)->memOpDone()) {
1517  // Loads that have not been marked as executed
1518  // still count towards the total instructions.
1519  ++valid_num;
1520  cprintf("Count:%i\n", valid_num);
1521  }
1522  }
1523 
1524  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1525  "Issued:%i\nSquashed:%i\n",
1526  (*inst_list_it)->pcState(),
1527  (*inst_list_it)->seqNum,
1528  (*inst_list_it)->threadNumber,
1529  (*inst_list_it)->isIssued(),
1530  (*inst_list_it)->isSquashed());
1531 
1532  if ((*inst_list_it)->isMemRef()) {
1533  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1534  }
1535 
1536  cprintf("\n");
1537 
1538  inst_list_it++;
1539  ++num;
1540  }
1541  }
1542 
1543  cprintf("Insts to Execute list:\n");
1544 
1545  int num = 0;
1546  int valid_num = 0;
1547  ListIt inst_list_it = instsToExecute.begin();
1548 
1549  while (inst_list_it != instsToExecute.end())
1550  {
1551  cprintf("Instruction:%i\n",
1552  num);
1553  if (!(*inst_list_it)->isSquashed()) {
1554  if (!(*inst_list_it)->isIssued()) {
1555  ++valid_num;
1556  cprintf("Count:%i\n", valid_num);
1557  } else if ((*inst_list_it)->isMemRef() &&
1558  !(*inst_list_it)->memOpDone()) {
1559  // Loads that have not been marked as executed
1560  // still count towards the total instructions.
1561  ++valid_num;
1562  cprintf("Count:%i\n", valid_num);
1563  }
1564  }
1565 
1566  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1567  "Issued:%i\nSquashed:%i\n",
1568  (*inst_list_it)->pcState(),
1569  (*inst_list_it)->seqNum,
1570  (*inst_list_it)->threadNumber,
1571  (*inst_list_it)->isIssued(),
1572  (*inst_list_it)->isSquashed());
1573 
1574  if ((*inst_list_it)->isMemRef()) {
1575  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1576  }
1577 
1578  cprintf("\n");
1579 
1580  inst_list_it++;
1581  ++num;
1582  }
1583 }
1584 
1585 } // namespace o3
1586 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition: base.hh:367
bool switchedOut() const
Determine if the CPU is switched out.
Definition: base.hh:356
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
virtual std::string name() const
Definition: named.hh:47
Physical register ID.
Definition: reg_class.hh:392
constexpr RegIndex index() const
Visible RegId methods.
Definition: reg_class.hh:148
int getNumPinnedWritesToComplete() const
Definition: reg_class.hh:472
void decrNumPinnedWritesToComplete()
Definition: reg_class.hh:483
bool isPinned() const
Definition: reg_class.hh:469
constexpr const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:273
const RegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:449
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition: reg_class.hh:446
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition: cpu.hh:94
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition: cpu.hh:485
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition: cpu.cc:1399
void freeUnitNextCycle(int fu_idx)
Frees a FU at the end of this cycle.
Definition: fu_pool.cc:190
Cycles getOpLatency(OpClass capability)
Returns the operation execution latency of the given capability.
Definition: fu_pool.hh:169
bool isPipelined(OpClass capability)
Returns the issue latency of the given capability.
Definition: fu_pool.hh:174
static constexpr auto NoCapableFU
Definition: fu_pool.hh:142
static constexpr auto NoFreeFU
Definition: fu_pool.hh:143
int getUnit(OpClass capability)
Gets a FU providing the requested capability.
Definition: fu_pool.cc:162
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition: iew.hh:88
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition: iew.cc:803
FU completion event class.
Definition: inst_queue.hh:106
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr)
Construct a FU completion event.
Definition: inst_queue.cc:66
virtual const char * description() const
Return a C string describing the event.
Definition: inst_queue.cc:82
A standard instruction queue class.
Definition: inst_queue.hh:99
std::string name() const
Returns the name of the IQ.
Definition: inst_queue.cc:174
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread.
Definition: inst_queue.cc:948
gem5::o3::InstructionQueue::IQStats iqStats
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
Definition: inst_queue.cc:728
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
Definition: inst_queue.cc:1148
std::list< DynInstPtr > instList[MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
Definition: inst_queue.hh:321
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
Definition: inst_queue.hh:337
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk.
Definition: inst_queue.cc:1107
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
Definition: inst_queue.hh:357
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
Definition: inst_queue.hh:433
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
Definition: inst_queue.cc:1415
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
Definition: inst_queue.hh:368
unsigned numEntries
The number of entries in the instruction queue.
Definition: inst_queue.hh:430
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly.
Definition: inst_queue.cc:653
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
Definition: inst_queue.hh:389
FUPool * fuPool
Function unit pool.
Definition: inst_queue.hh:314
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
Definition: inst_queue.cc:965
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress).
Definition: inst_queue.hh:329
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
Definition: inst_queue.hh:305
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
Definition: inst_queue.hh:324
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
Definition: inst_queue.cc:444
unsigned numFreeEntries()
Returns total number of free entries.
Definition: inst_queue.cc:512
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
Definition: inst_queue.hh:332
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
Definition: inst_queue.cc:1088
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
Definition: inst_queue.hh:311
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
Definition: inst_queue.cc:607
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
Definition: inst_queue.cc:1066
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
Definition: inst_queue.cc:1101
void resetState()
Resets all instruction queue state.
Definition: inst_queue.cc:393
bool isDrained() const
Determine if we are drained.
Definition: inst_queue.cc:452
unsigned count[MaxThreads]
Per Thread IQ count.
Definition: inst_queue.hh:421
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
Definition: inst_queue.cc:1124
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit.
Definition: inst_queue.hh:366
unsigned freeEntries
Number of free IQ entries left.
Definition: inst_queue.hh:427
MemDepUnit memDepUnit[MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
Definition: inst_queue.hh:300
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
Definition: inst_queue.cc:1460
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
Definition: inst_queue.cc:1113
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition: inst_queue.cc:464
unsigned numPhysRegs
The number of physical registers in the CPU.
Definition: inst_queue.hh:436
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
Definition: inst_queue.cc:1134
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
Definition: inst_queue.cc:1502
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition: inst_queue.cc:473
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
Definition: inst_queue.hh:412
gem5::o3::InstructionQueue::IQIOStats iqIOStats
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
Definition: inst_queue.cc:703
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an IQ.
Definition: inst_queue.cc:87
InstSeqNum squashedSeqNum[MaxThreads]
The sequence number of the squashed instruction.
Definition: inst_queue.hh:447
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
Definition: inst_queue.cc:1160
std::list< DynInstPtr >::iterator ListIt
Definition: inst_queue.hh:102
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Definition: inst_queue.cc:546
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
Definition: inst_queue.hh:444
void resetEntries()
Resets max entries for all threads.
Definition: inst_queue.cc:490
int countInsts()
Debugging function to count how many entries are in the IQ.
Definition: inst_queue.cc:1454
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
Definition: inst_queue.hh:418
std::list< ListOrderEntry >::iterator ListOrderIt
Definition: inst_queue.hh:386
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
Definition: inst_queue.cc:432
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
Definition: inst_queue.cc:677
ThreadID numThreads
Number of Total Threads.
Definition: inst_queue.hh:415
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
Definition: inst_queue.hh:308
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
Definition: inst_queue.cc:921
std::vector< bool > regScoreboard
A cache of the recently woken registers.
Definition: inst_queue.hh:455
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute.
Definition: inst_queue.cc:751
bool isFull()
Returns whether or not the IQ is full.
Definition: inst_queue.cc:526
void squash(ThreadID tid)
Squashes instructions for a thread.
Definition: inst_queue.cc:1168
IEW * iewStage
Pointer to IEW stage.
Definition: inst_queue.hh:295
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
Definition: inst_queue.hh:384
~InstructionQueue()
Destructs the IQ.
Definition: inst_queue.cc:164
void doSquash(ThreadID tid)
Does the actual squashing.
Definition: inst_queue.cc:1184
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
Definition: inst_queue.cc:438
int wbOutstanding
Number of instructions currently in flight to FUs.
Definition: inst_queue.hh:439
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.
Definition: inst_queue.cc:562
unsigned maxEntries[MaxThreads]
Max IQ Entries Per Thread.
Definition: inst_queue.hh:424
CPU * cpu
Pointer to the CPU.
Definition: inst_queue.hh:289
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
Definition: inst_queue.cc:1333
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
Definition: inst_queue.cc:479
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
Definition: inst_queue.cc:661
DependencyGraph< DynInstPtr > dependGraph
Definition: inst_queue.hh:405
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
Definition: inst_queue.hh:394
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
Definition: inst_queue.cc:1380
void completeInst(const DynInstPtr &inst)
Notifies completion of an instruction.
void nonSpecInstReady(const DynInstPtr &inst)
Indicate that a non-speculative instruction is ready.
void issue(const DynInstPtr &inst)
Issues the given instruction.
void insert(const DynInstPtr &inst)
Inserts a memory instruction.
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squashes all instructions up until a given sequence number for a specific thread.
void violation(const DynInstPtr &store_inst, const DynInstPtr &violating_load)
Indicates an ordering violation between a store and a younger load.
void replay()
Replays all instructions that have been rescheduled by moving them to the ready list.
void init(const BaseO3CPUParams &params, ThreadID tid, CPU *cpu)
Initializes the unit with parameters and a thread id.
Definition: mem_dep_unit.cc:92
void regsReady(const DynInstPtr &inst)
Indicate that an instruction has its registers ready.
void insertNonSpec(const DynInstPtr &inst)
Inserts a non-speculative memory instruction.
void reschedule(const DynInstPtr &inst)
Reschedules an instruction to be re-executed.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a barrier instruction.
void setIQ(InstructionQueue *iq_ptr)
Sets the pointer to the IQ.
Derived & ysubnames(const char **names)
Definition: statistics.hh:478
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:402
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:358
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
Definition: statistics.hh:372
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2113
Statistics container.
Definition: group.hh:94
Derived & init(size_type _x, size_type _y)
Definition: statistics.hh:1174
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1040
STL list class.
Definition: stl.hh:51
Definition: test.h:63
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
Bitfield< 7 > i
Definition: misc_types.hh:67
static constexpr int MaxThreads
Definition: limits.hh:38
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition: info.hh:62
const FlagsType total
Print the total.
Definition: info.hh:60
const FlagsType dist
Print the distribution.
Definition: info.hh:66
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:235
static const OpClass Num_OpClasses
Definition: op_class.hh:108
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
constexpr decltype(nullptr) NoFault
Definition: types.hh:253
uint64_t InstSeqNum
Definition: inst_seq.hh:40
@ VecRegClass
Vector Register.
Definition: reg_class.hh:63
@ VecElemClass
Vector Register Native Elem lane.
Definition: reg_class.hh:65
static scfx_rep_node * list
Definition: scfx_rep.cc:336
statistics::Scalar vecInstQueueWakeupAccesses
Definition: inst_queue.hh:558
statistics::Scalar intInstQueueWakeupAccesses
Definition: inst_queue.hh:552
IQIOStats(statistics::Group *parent)
Definition: inst_queue.cc:327
statistics::Scalar fpInstQueueWakeupAccesses
Definition: inst_queue.hh:555
statistics::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
Definition: inst_queue.hh:535
statistics::Scalar floatInstsIssued
Stat for number of floating point instructions issued.
Definition: inst_queue.hh:495
statistics::Vector fuBusy
Number of times the FU was busy.
Definition: inst_queue.hh:541
statistics::Scalar instsAdded
Stat for number of instructions added.
Definition: inst_queue.hh:487
statistics::Distribution numIssuedDist
Distribution of number of instructions in the queue.
Definition: inst_queue.hh:523
statistics::Scalar nonSpecInstsAdded
Stat for number of non-speculative instructions added.
Definition: inst_queue.hh:489
statistics::Scalar squashedInstsExamined
Stat for number of squashed instructions examined when squashing.
Definition: inst_queue.hh:507
statistics::Scalar miscInstsIssued
Stat for number of miscellaneous instructions issued.
Definition: inst_queue.hh:501
statistics::Scalar branchInstsIssued
Stat for number of branch instructions issued.
Definition: inst_queue.hh:497
statistics::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
Definition: inst_queue.hh:543
statistics::Scalar memInstsIssued
Stat for number of memory instructions issued.
Definition: inst_queue.hh:499
statistics::Scalar intInstsIssued
Stat for number of integer instructions issued.
Definition: inst_queue.hh:493
statistics::Formula issueRate
Number of instructions issued per cycle.
Definition: inst_queue.hh:538
IQStats(CPU *cpu, const unsigned &total_width)
Definition: inst_queue.cc:179
statistics::Scalar squashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
Definition: inst_queue.hh:511
statistics::Scalar squashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
Definition: inst_queue.hh:504
statistics::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
Definition: inst_queue.hh:532
statistics::Scalar squashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Definition: inst_queue.hh:515
Entry for the list age ordering by op class.
Definition: inst_queue.hh:372
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const
Definition: inst_queue.cc:1326

Generated on Wed Dec 21 2022 10:22:30 for gem5 by doxygen 1.9.1