gem5  v20.0.0.3
inst_queue_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2014, 2017-2019 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved.
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  */
41 
42 #ifndef __CPU_O3_INST_QUEUE_IMPL_HH__
43 #define __CPU_O3_INST_QUEUE_IMPL_HH__
44 
45 #include <limits>
46 #include <vector>
47 
48 #include "base/logging.hh"
49 #include "cpu/o3/fu_pool.hh"
50 #include "cpu/o3/inst_queue.hh"
51 #include "debug/IQ.hh"
52 #include "enums/OpClass.hh"
53 #include "params/DerivO3CPU.hh"
54 #include "sim/core.hh"
55 
56 // clang complains about std::set being overloaded with Packet::set if
57 // we open up the entire namespace std
58 using std::list;
59 
60 template <class Impl>
62  int fu_idx, InstructionQueue<Impl> *iq_ptr)
63  : Event(Stat_Event_Pri, AutoDelete),
64  inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
65 {
66 }
67 
68 template <class Impl>
69 void
71 {
72  iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
73  inst = NULL;
74 }
75 
76 
77 template <class Impl>
78 const char *
80 {
81  return "Functional unit completion";
82 }
83 
84 template <class Impl>
86  DerivO3CPUParams *params)
87  : cpu(cpu_ptr),
88  iewStage(iew_ptr),
89  fuPool(params->fuPool),
90  iqPolicy(params->smtIQPolicy),
91  numEntries(params->numIQEntries),
92  totalWidth(params->issueWidth),
94 {
95  assert(fuPool);
96 
97  numThreads = params->numThreads;
98 
99  // Set the number of total physical registers
100  // As the vector registers have two addressing modes, they are added twice
101  numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
102  params->numPhysVecRegs +
103  params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
104  params->numPhysVecPredRegs +
105  params->numPhysCCRegs;
106 
107  //Create an entry for each physical register within the
108  //dependency graph.
109  dependGraph.resize(numPhysRegs);
110 
111  // Resize the register scoreboard.
112  regScoreboard.resize(numPhysRegs);
113 
114  //Initialize Mem Dependence Units
115  for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
116  memDepUnit[tid].init(params, tid);
117  memDepUnit[tid].setIQ(this);
118  }
119 
120  resetState();
121 
122  //Figure out resource sharing policy
123  if (iqPolicy == SMTQueuePolicy::Dynamic) {
124  //Set Max Entries to Total ROB Capacity
125  for (ThreadID tid = 0; tid < numThreads; tid++) {
126  maxEntries[tid] = numEntries;
127  }
128 
129  } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
130  //@todo:make work if part_amt doesnt divide evenly.
131  int part_amt = numEntries / numThreads;
132 
133  //Divide ROB up evenly
134  for (ThreadID tid = 0; tid < numThreads; tid++) {
135  maxEntries[tid] = part_amt;
136  }
137 
138  DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
139  "%i entries per thread.\n",part_amt);
140  } else if (iqPolicy == SMTQueuePolicy::Threshold) {
141  double threshold = (double)params->smtIQThreshold / 100;
142 
143  int thresholdIQ = (int)((double)threshold * numEntries);
144 
145  //Divide up by threshold amount
146  for (ThreadID tid = 0; tid < numThreads; tid++) {
147  maxEntries[tid] = thresholdIQ;
148  }
149 
150  DPRINTF(IQ, "IQ sharing policy set to Threshold:"
151  "%i entries per thread.\n",thresholdIQ);
152  }
153  for (ThreadID tid = numThreads; tid < Impl::MaxThreads; tid++) {
154  maxEntries[tid] = 0;
155  }
156 }
157 
158 template <class Impl>
160 {
161  dependGraph.reset();
162 #ifdef DEBUG
163  cprintf("Nodes traversed: %i, removed: %i\n",
164  dependGraph.nodesTraversed, dependGraph.nodesRemoved);
165 #endif
166 }
167 
168 template <class Impl>
169 std::string
171 {
172  return cpu->name() + ".iq";
173 }
174 
175 template <class Impl>
176 void
178 {
179  using namespace Stats;
181  .name(name() + ".iqInstsAdded")
182  .desc("Number of instructions added to the IQ (excludes non-spec)")
184 
186  .name(name() + ".iqNonSpecInstsAdded")
187  .desc("Number of non-speculative instructions added to the IQ")
189 
191  .name(name() + ".iqInstsIssued")
192  .desc("Number of instructions issued")
194 
196  .name(name() + ".iqIntInstsIssued")
197  .desc("Number of integer instructions issued")
199 
201  .name(name() + ".iqFloatInstsIssued")
202  .desc("Number of float instructions issued")
204 
206  .name(name() + ".iqBranchInstsIssued")
207  .desc("Number of branch instructions issued")
209 
211  .name(name() + ".iqMemInstsIssued")
212  .desc("Number of memory instructions issued")
214 
216  .name(name() + ".iqMiscInstsIssued")
217  .desc("Number of miscellaneous instructions issued")
219 
221  .name(name() + ".iqSquashedInstsIssued")
222  .desc("Number of squashed instructions issued")
224 
226  .name(name() + ".iqSquashedInstsExamined")
227  .desc("Number of squashed instructions iterated over during squash;"
228  " mainly for profiling")
230 
232  .name(name() + ".iqSquashedOperandsExamined")
233  .desc("Number of squashed operands that are examined and possibly "
234  "removed from graph")
236 
238  .name(name() + ".iqSquashedNonSpecRemoved")
239  .desc("Number of squashed non-spec instructions that were removed")
241 /*
242  queueResDist
243  .init(Num_OpClasses, 0, 99, 2)
244  .name(name() + ".IQ:residence:")
245  .desc("cycles from dispatch to issue")
246  .flags(total | pdf | cdf )
247  ;
248  for (int i = 0; i < Num_OpClasses; ++i) {
249  queueResDist.subname(i, opClassStrings[i]);
250  }
251 */
253  .init(0,totalWidth,1)
254  .name(name() + ".issued_per_cycle")
255  .desc("Number of insts issued each cycle")
256  .flags(pdf)
257  ;
258 /*
259  dist_unissued
260  .init(Num_OpClasses+2)
261  .name(name() + ".unissued_cause")
262  .desc("Reason ready instruction not issued")
263  .flags(pdf | dist)
264  ;
265  for (int i=0; i < (Num_OpClasses + 2); ++i) {
266  dist_unissued.subname(i, unissued_names[i]);
267  }
268 */
270  .init(numThreads,Enums::Num_OpClass)
271  .name(name() + ".FU_type")
272  .desc("Type of FU issued")
273  .flags(total | pdf | dist)
274  ;
275  statIssuedInstType.ysubnames(Enums::OpClassStrings);
276 
277  //
278  // How long did instructions for a particular FU type wait prior to issue
279  //
280 /*
281  issueDelayDist
282  .init(Num_OpClasses,0,99,2)
283  .name(name() + ".")
284  .desc("cycles from operands ready to issue")
285  .flags(pdf | cdf)
286  ;
287 
288  for (int i=0; i<Num_OpClasses; ++i) {
289  std::stringstream subname;
290  subname << opClassStrings[i] << "_delay";
291  issueDelayDist.subname(i, subname.str());
292  }
293 */
294  issueRate
295  .name(name() + ".rate")
296  .desc("Inst issue rate")
297  .flags(total)
298  ;
299  issueRate = iqInstsIssued / cpu->numCycles;
300 
301  statFuBusy
303  .name(name() + ".fu_full")
304  .desc("attempts to use FU when none available")
305  .flags(pdf | dist)
306  ;
307  for (int i=0; i < Num_OpClasses; ++i) {
308  statFuBusy.subname(i, Enums::OpClassStrings[i]);
309  }
310 
311  fuBusy
312  .init(numThreads)
313  .name(name() + ".fu_busy_cnt")
314  .desc("FU busy when requested")
315  .flags(total)
316  ;
317 
318  fuBusyRate
319  .name(name() + ".fu_busy_rate")
320  .desc("FU busy rate (busy events/executed inst)")
321  .flags(total)
322  ;
324 
325  for (ThreadID tid = 0; tid < numThreads; tid++) {
326  // Tell mem dependence unit to reg stats as well.
327  memDepUnit[tid].regStats();
328  }
329 
331  .name(name() + ".int_inst_queue_reads")
332  .desc("Number of integer instruction queue reads")
333  .flags(total);
334 
336  .name(name() + ".int_inst_queue_writes")
337  .desc("Number of integer instruction queue writes")
338  .flags(total);
339 
341  .name(name() + ".int_inst_queue_wakeup_accesses")
342  .desc("Number of integer instruction queue wakeup accesses")
343  .flags(total);
344 
346  .name(name() + ".fp_inst_queue_reads")
347  .desc("Number of floating instruction queue reads")
348  .flags(total);
349 
351  .name(name() + ".fp_inst_queue_writes")
352  .desc("Number of floating instruction queue writes")
353  .flags(total);
354 
356  .name(name() + ".fp_inst_queue_wakeup_accesses")
357  .desc("Number of floating instruction queue wakeup accesses")
358  .flags(total);
359 
361  .name(name() + ".vec_inst_queue_reads")
362  .desc("Number of vector instruction queue reads")
363  .flags(total);
364 
366  .name(name() + ".vec_inst_queue_writes")
367  .desc("Number of vector instruction queue writes")
368  .flags(total);
369 
371  .name(name() + ".vec_inst_queue_wakeup_accesses")
372  .desc("Number of vector instruction queue wakeup accesses")
373  .flags(total);
374 
376  .name(name() + ".int_alu_accesses")
377  .desc("Number of integer alu accesses")
378  .flags(total);
379 
381  .name(name() + ".fp_alu_accesses")
382  .desc("Number of floating point alu accesses")
383  .flags(total);
384 
386  .name(name() + ".vec_alu_accesses")
387  .desc("Number of vector alu accesses")
388  .flags(total);
389 
390 }
391 
392 template <class Impl>
393 void
395 {
396  //Initialize thread IQ counts
397  for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
398  count[tid] = 0;
399  instList[tid].clear();
400  }
401 
402  // Initialize the number of free IQ entries.
404 
405  // Note that in actuality, the registers corresponding to the logical
406  // registers start off as ready. However this doesn't matter for the
407  // IQ as the instruction should have been correctly told if those
408  // registers are ready in rename. Thus it can all be initialized as
409  // unready.
410  for (int i = 0; i < numPhysRegs; ++i) {
411  regScoreboard[i] = false;
412  }
413 
414  for (ThreadID tid = 0; tid < Impl::MaxThreads; ++tid) {
415  squashedSeqNum[tid] = 0;
416  }
417 
418  for (int i = 0; i < Num_OpClasses; ++i) {
419  while (!readyInsts[i].empty())
420  readyInsts[i].pop();
421  queueOnList[i] = false;
422  readyIt[i] = listOrder.end();
423  }
424  nonSpecInsts.clear();
425  listOrder.clear();
426  deferredMemInsts.clear();
427  blockedMemInsts.clear();
428  retryMemInsts.clear();
429  wbOutstanding = 0;
430 }
431 
432 template <class Impl>
433 void
435 {
436  activeThreads = at_ptr;
437 }
438 
439 template <class Impl>
440 void
442 {
443  issueToExecuteQueue = i2e_ptr;
444 }
445 
446 template <class Impl>
447 void
449 {
450  timeBuffer = tb_ptr;
451 
453 }
454 
455 template <class Impl>
456 bool
458 {
459  bool drained = dependGraph.empty() &&
460  instsToExecute.empty() &&
461  wbOutstanding == 0;
462  for (ThreadID tid = 0; tid < numThreads; ++tid)
463  drained = drained && memDepUnit[tid].isDrained();
464 
465  return drained;
466 }
467 
468 template <class Impl>
469 void
471 {
472  assert(dependGraph.empty());
473  assert(instsToExecute.empty());
474  for (ThreadID tid = 0; tid < numThreads; ++tid)
476 }
477 
478 template <class Impl>
479 void
481 {
482  resetState();
483 }
484 
485 template <class Impl>
486 int
488 {
489  if (iqPolicy == SMTQueuePolicy::Partitioned) {
490  return numEntries / num_threads;
491  } else {
492  return 0;
493  }
494 }
495 
496 
497 template <class Impl>
498 void
500 {
501  if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
502  int active_threads = activeThreads->size();
503 
504  list<ThreadID>::iterator threads = activeThreads->begin();
506 
507  while (threads != end) {
508  ThreadID tid = *threads++;
509 
510  if (iqPolicy == SMTQueuePolicy::Partitioned) {
511  maxEntries[tid] = numEntries / active_threads;
512  } else if (iqPolicy == SMTQueuePolicy::Threshold &&
513  active_threads == 1) {
514  maxEntries[tid] = numEntries;
515  }
516  }
517  }
518 }
519 
520 template <class Impl>
521 unsigned
523 {
524  return freeEntries;
525 }
526 
527 template <class Impl>
528 unsigned
530 {
531  return maxEntries[tid] - count[tid];
532 }
533 
534 // Might want to do something more complex if it knows how many instructions
535 // will be issued this cycle.
536 template <class Impl>
537 bool
539 {
540  if (freeEntries == 0) {
541  return(true);
542  } else {
543  return(false);
544  }
545 }
546 
547 template <class Impl>
548 bool
550 {
551  if (numFreeEntries(tid) == 0) {
552  return(true);
553  } else {
554  return(false);
555  }
556 }
557 
558 template <class Impl>
559 bool
561 {
562  if (!listOrder.empty()) {
563  return true;
564  }
565 
566  for (int i = 0; i < Num_OpClasses; ++i) {
567  if (!readyInsts[i].empty()) {
568  return true;
569  }
570  }
571 
572  return false;
573 }
574 
575 template <class Impl>
576 void
578 {
579  if (new_inst->isFloating()) {
581  } else if (new_inst->isVector()) {
583  } else {
585  }
586  // Make sure the instruction is valid
587  assert(new_inst);
588 
589  DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
590  new_inst->seqNum, new_inst->pcState());
591 
592  assert(freeEntries != 0);
593 
594  instList[new_inst->threadNumber].push_back(new_inst);
595 
596  --freeEntries;
597 
598  new_inst->setInIQ();
599 
600  // Look through its source registers (physical regs), and mark any
601  // dependencies.
602  addToDependents(new_inst);
603 
604  // Have this instruction set itself as the producer of its destination
605  // register(s).
606  addToProducers(new_inst);
607 
608  if (new_inst->isMemRef()) {
609  memDepUnit[new_inst->threadNumber].insert(new_inst);
610  } else {
611  addIfReady(new_inst);
612  }
613 
614  ++iqInstsAdded;
615 
616  count[new_inst->threadNumber]++;
617 
618  assert(freeEntries == (numEntries - countInsts()));
619 }
620 
621 template <class Impl>
622 void
624 {
625  // @todo: Clean up this code; can do it by setting inst as unable
626  // to issue, then calling normal insert on the inst.
627  if (new_inst->isFloating()) {
629  } else if (new_inst->isVector()) {
631  } else {
633  }
634 
635  assert(new_inst);
636 
637  nonSpecInsts[new_inst->seqNum] = new_inst;
638 
639  DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
640  "to the IQ.\n",
641  new_inst->seqNum, new_inst->pcState());
642 
643  assert(freeEntries != 0);
644 
645  instList[new_inst->threadNumber].push_back(new_inst);
646 
647  --freeEntries;
648 
649  new_inst->setInIQ();
650 
651  // Have this instruction set itself as the producer of its destination
652  // register(s).
653  addToProducers(new_inst);
654 
655  // If it's a memory instruction, add it to the memory dependency
656  // unit.
657  if (new_inst->isMemRef()) {
658  memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
659  }
660 
662 
663  count[new_inst->threadNumber]++;
664 
665  assert(freeEntries == (numEntries - countInsts()));
666 }
667 
668 template <class Impl>
669 void
671 {
672  memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
673 
674  insertNonSpec(barr_inst);
675 }
676 
677 template <class Impl>
678 typename Impl::DynInstPtr
680 {
681  assert(!instsToExecute.empty());
682  DynInstPtr inst = std::move(instsToExecute.front());
683  instsToExecute.pop_front();
684  if (inst->isFloating()) {
686  } else if (inst->isVector()) {
688  } else {
690  }
691  return inst;
692 }
693 
694 template <class Impl>
695 void
697 {
698  assert(!readyInsts[op_class].empty());
699 
700  ListOrderEntry queue_entry;
701 
702  queue_entry.queueType = op_class;
703 
704  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
705 
706  ListOrderIt list_it = listOrder.begin();
707  ListOrderIt list_end_it = listOrder.end();
708 
709  while (list_it != list_end_it) {
710  if ((*list_it).oldestInst > queue_entry.oldestInst) {
711  break;
712  }
713 
714  list_it++;
715  }
716 
717  readyIt[op_class] = listOrder.insert(list_it, queue_entry);
718  queueOnList[op_class] = true;
719 }
720 
721 template <class Impl>
722 void
724 {
725  // Get iterator of next item on the list
726  // Delete the original iterator
727  // Determine if the next item is either the end of the list or younger
728  // than the new instruction. If so, then add in a new iterator right here.
729  // If not, then move along.
730  ListOrderEntry queue_entry;
731  OpClass op_class = (*list_order_it).queueType;
732  ListOrderIt next_it = list_order_it;
733 
734  ++next_it;
735 
736  queue_entry.queueType = op_class;
737  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
738 
739  while (next_it != listOrder.end() &&
740  (*next_it).oldestInst < queue_entry.oldestInst) {
741  ++next_it;
742  }
743 
744  readyIt[op_class] = listOrder.insert(next_it, queue_entry);
745 }
746 
747 template <class Impl>
748 void
750 {
751  DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
752  assert(!cpu->switchedOut());
753  // The CPU could have been sleeping until this op completed (*extremely*
754  // long latency op). Wake it if it was. This may be overkill.
755  --wbOutstanding;
756  iewStage->wakeCPU();
757 
758  if (fu_idx > -1)
759  fuPool->freeUnitNextCycle(fu_idx);
760 
761  // @todo: Ensure that these FU Completions happen at the beginning
762  // of a cycle, otherwise they could add too many instructions to
763  // the queue.
765  instsToExecute.push_back(inst);
766 }
767 
768 // @todo: Figure out a better way to remove the squashed items from the
769 // lists. Checking the top item of each list to see if it's squashed
770 // wastes time and forces jumps.
771 template <class Impl>
772 void
774 {
775  DPRINTF(IQ, "Attempting to schedule ready instructions from "
776  "the IQ.\n");
777 
778  IssueStruct *i2e_info = issueToExecuteQueue->access(0);
779 
780  DynInstPtr mem_inst;
781  while (mem_inst = std::move(getDeferredMemInstToExecute())) {
782  addReadyMemInst(mem_inst);
783  }
784 
785  // See if any cache blocked instructions are able to be executed
786  while (mem_inst = std::move(getBlockedMemInstToExecute())) {
787  addReadyMemInst(mem_inst);
788  }
789 
790  // Have iterator to head of the list
791  // While I haven't exceeded bandwidth or reached the end of the list,
792  // Try to get a FU that can do what this op needs.
793  // If successful, change the oldestInst to the new top of the list, put
794  // the queue in the proper place in the list.
795  // Increment the iterator.
796  // This will avoid trying to schedule a certain op class if there are no
797  // FUs that handle it.
798  int total_issued = 0;
799  ListOrderIt order_it = listOrder.begin();
800  ListOrderIt order_end_it = listOrder.end();
801 
802  while (total_issued < totalWidth && order_it != order_end_it) {
803  OpClass op_class = (*order_it).queueType;
804 
805  assert(!readyInsts[op_class].empty());
806 
807  DynInstPtr issuing_inst = readyInsts[op_class].top();
808 
809  if (issuing_inst->isFloating()) {
811  } else if (issuing_inst->isVector()) {
813  } else {
815  }
816 
817  assert(issuing_inst->seqNum == (*order_it).oldestInst);
818 
819  if (issuing_inst->isSquashed()) {
820  readyInsts[op_class].pop();
821 
822  if (!readyInsts[op_class].empty()) {
823  moveToYoungerInst(order_it);
824  } else {
825  readyIt[op_class] = listOrder.end();
826  queueOnList[op_class] = false;
827  }
828 
829  listOrder.erase(order_it++);
830 
832 
833  continue;
834  }
835 
836  int idx = FUPool::NoCapableFU;
837  Cycles op_latency = Cycles(1);
838  ThreadID tid = issuing_inst->threadNumber;
839 
840  if (op_class != No_OpClass) {
841  idx = fuPool->getUnit(op_class);
842  if (issuing_inst->isFloating()) {
843  fpAluAccesses++;
844  } else if (issuing_inst->isVector()) {
845  vecAluAccesses++;
846  } else {
847  intAluAccesses++;
848  }
849  if (idx > FUPool::NoFreeFU) {
850  op_latency = fuPool->getOpLatency(op_class);
851  }
852  }
853 
854  // If we have an instruction that doesn't require a FU, or a
855  // valid FU, then schedule for execution.
856  if (idx != FUPool::NoFreeFU) {
857  if (op_latency == Cycles(1)) {
858  i2e_info->size++;
859  instsToExecute.push_back(issuing_inst);
860 
861  // Add the FU onto the list of FU's to be freed next
862  // cycle if we used one.
863  if (idx >= 0)
865  } else {
866  bool pipelined = fuPool->isPipelined(op_class);
867  // Generate completion event for the FU
868  ++wbOutstanding;
869  FUCompletion *execution = new FUCompletion(issuing_inst,
870  idx, this);
871 
872  cpu->schedule(execution,
873  cpu->clockEdge(Cycles(op_latency - 1)));
874 
875  if (!pipelined) {
876  // If FU isn't pipelined, then it must be freed
877  // upon the execution completing.
878  execution->setFreeFU();
879  } else {
880  // Add the FU onto the list of FU's to be freed next cycle.
882  }
883  }
884 
885  DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
886  "[sn:%llu]\n",
887  tid, issuing_inst->pcState(),
888  issuing_inst->seqNum);
889 
890  readyInsts[op_class].pop();
891 
892  if (!readyInsts[op_class].empty()) {
893  moveToYoungerInst(order_it);
894  } else {
895  readyIt[op_class] = listOrder.end();
896  queueOnList[op_class] = false;
897  }
898 
899  issuing_inst->setIssued();
900  ++total_issued;
901 
902 #if TRACING_ON
903  issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
904 #endif
905 
906  if (!issuing_inst->isMemRef()) {
907  // Memory instructions can not be freed from the IQ until they
908  // complete.
909  ++freeEntries;
910  count[tid]--;
911  issuing_inst->clearInIQ();
912  } else {
913  memDepUnit[tid].issue(issuing_inst);
914  }
915 
916  listOrder.erase(order_it++);
917  statIssuedInstType[tid][op_class]++;
918  } else {
919  statFuBusy[op_class]++;
920  fuBusy[tid]++;
921  ++order_it;
922  }
923  }
924 
925  numIssuedDist.sample(total_issued);
926  iqInstsIssued+= total_issued;
927 
928  // If we issued any instructions, tell the CPU we had activity.
929  // @todo If the way deferred memory instructions are handeled due to
930  // translation changes then the deferredMemInsts condition should be removed
931  // from the code below.
932  if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
933  cpu->activityThisCycle();
934  } else {
935  DPRINTF(IQ, "Not able to schedule any instructions.\n");
936  }
937 }
938 
939 template <class Impl>
940 void
942 {
943  DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
944  "to execute.\n", inst);
945 
946  NonSpecMapIt inst_it = nonSpecInsts.find(inst);
947 
948  assert(inst_it != nonSpecInsts.end());
949 
950  ThreadID tid = (*inst_it).second->threadNumber;
951 
952  (*inst_it).second->setAtCommit();
953 
954  (*inst_it).second->setCanIssue();
955 
956  if (!(*inst_it).second->isMemRef()) {
957  addIfReady((*inst_it).second);
958  } else {
959  memDepUnit[tid].nonSpecInstReady((*inst_it).second);
960  }
961 
962  (*inst_it).second = NULL;
963 
964  nonSpecInsts.erase(inst_it);
965 }
966 
967 template <class Impl>
968 void
970 {
971  DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
972  tid,inst);
973 
974  ListIt iq_it = instList[tid].begin();
975 
976  while (iq_it != instList[tid].end() &&
977  (*iq_it)->seqNum <= inst) {
978  ++iq_it;
979  instList[tid].pop_front();
980  }
981 
982  assert(freeEntries == (numEntries - countInsts()));
983 }
984 
985 template <class Impl>
986 int
988 {
989  int dependents = 0;
990 
991  // The instruction queue here takes care of both floating and int ops
992  if (completed_inst->isFloating()) {
994  } else if (completed_inst->isVector()) {
996  } else {
998  }
999 
1000  DPRINTF(IQ, "Waking dependents of completed instruction.\n");
1001 
1002  assert(!completed_inst->isSquashed());
1003 
1004  // Tell the memory dependence unit to wake any dependents on this
1005  // instruction if it is a memory instruction. Also complete the memory
1006  // instruction at this point since we know it executed without issues.
1007  // @todo: Might want to rename "completeMemInst" to something that
1008  // indicates that it won't need to be replayed, and call this
1009  // earlier. Might not be a big deal.
1010  if (completed_inst->isMemRef()) {
1011  memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
1012  completeMemInst(completed_inst);
1013  } else if (completed_inst->isMemBarrier() ||
1014  completed_inst->isWriteBarrier()) {
1015  memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
1016  }
1017 
1018  for (int dest_reg_idx = 0;
1019  dest_reg_idx < completed_inst->numDestRegs();
1020  dest_reg_idx++)
1021  {
1022  PhysRegIdPtr dest_reg =
1023  completed_inst->renamedDestRegIdx(dest_reg_idx);
1024 
1025  // Special case of uniq or control registers. They are not
1026  // handled by the IQ and thus have no dependency graph entry.
1027  if (dest_reg->isFixedMapping()) {
1028  DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1029  dest_reg->index(), dest_reg->className());
1030  continue;
1031  }
1032 
1033  // Avoid waking up dependents if the register is pinned
1034  dest_reg->decrNumPinnedWritesToComplete();
1035  if (dest_reg->isPinned())
1036  completed_inst->setPinnedRegsWritten();
1037 
1038  if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1039  DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1040  dest_reg->index(), dest_reg->className());
1041  continue;
1042  }
1043 
1044  DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1045  dest_reg->index(),
1046  dest_reg->className());
1047 
1048  //Go through the dependency chain, marking the registers as
1049  //ready within the waiting instructions.
1050  DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1051 
1052  while (dep_inst) {
1053  DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1054  "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1055 
1056  // Might want to give more information to the instruction
1057  // so that it knows which of its source registers is
1058  // ready. However that would mean that the dependency
1059  // graph entries would need to hold the src_reg_idx.
1060  dep_inst->markSrcRegReady();
1061 
1062  addIfReady(dep_inst);
1063 
1064  dep_inst = dependGraph.pop(dest_reg->flatIndex());
1065 
1066  ++dependents;
1067  }
1068 
1069  // Reset the head node now that all of its dependents have
1070  // been woken up.
1071  assert(dependGraph.empty(dest_reg->flatIndex()));
1072  dependGraph.clearInst(dest_reg->flatIndex());
1073 
1074  // Mark the scoreboard as having that register ready.
1075  regScoreboard[dest_reg->flatIndex()] = true;
1076  }
1077  return dependents;
1078 }
1079 
1080 template <class Impl>
1081 void
1083 {
1084  OpClass op_class = ready_inst->opClass();
1085 
1086  readyInsts[op_class].push(ready_inst);
1087 
1088  // Will need to reorder the list if either a queue is not on the list,
1089  // or it has an older instruction than last time.
1090  if (!queueOnList[op_class]) {
1091  addToOrderList(op_class);
1092  } else if (readyInsts[op_class].top()->seqNum <
1093  (*readyIt[op_class]).oldestInst) {
1094  listOrder.erase(readyIt[op_class]);
1095  addToOrderList(op_class);
1096  }
1097 
1098  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1099  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1100  ready_inst->pcState(), op_class, ready_inst->seqNum);
1101 }
1102 
1103 template <class Impl>
1104 void
1106 {
1107  DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1108 
1109  // Reset DTB translation state
1110  resched_inst->translationStarted(false);
1111  resched_inst->translationCompleted(false);
1112 
1113  resched_inst->clearCanIssue();
1114  memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1115 }
1116 
1117 template <class Impl>
1118 void
1120 {
1121  memDepUnit[replay_inst->threadNumber].replay();
1122 }
1123 
1124 template <class Impl>
1125 void
1127 {
1128  ThreadID tid = completed_inst->threadNumber;
1129 
1130  DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
1131  completed_inst->pcState(), completed_inst->seqNum);
1132 
1133  ++freeEntries;
1134 
1135  completed_inst->memOpDone(true);
1136 
1137  memDepUnit[tid].completed(completed_inst);
1138  count[tid]--;
1139 }
1140 
1141 template <class Impl>
1142 void
1144 {
1145  deferredMemInsts.push_back(deferred_inst);
1146 }
1147 
1148 template <class Impl>
1149 void
1151 {
1152  blocked_inst->clearIssued();
1153  blocked_inst->clearCanIssue();
1154  blockedMemInsts.push_back(blocked_inst);
1155 }
1156 
1157 template <class Impl>
1158 void
1160 {
1162  // Get the CPU ticking again
1163  cpu->wakeCPU();
1164 }
1165 
1166 template <class Impl>
1167 typename Impl::DynInstPtr
1169 {
1170  for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1171  ++it) {
1172  if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1173  DynInstPtr mem_inst = std::move(*it);
1174  deferredMemInsts.erase(it);
1175  return mem_inst;
1176  }
1177  }
1178  return nullptr;
1179 }
1180 
1181 template <class Impl>
1182 typename Impl::DynInstPtr
1184 {
1185  if (retryMemInsts.empty()) {
1186  return nullptr;
1187  } else {
1188  DynInstPtr mem_inst = std::move(retryMemInsts.front());
1189  retryMemInsts.pop_front();
1190  return mem_inst;
1191  }
1192 }
1193 
1194 template <class Impl>
1195 void
1197  const DynInstPtr &faulting_load)
1198 {
1200  memDepUnit[store->threadNumber].violation(store, faulting_load);
1201 }
1202 
1203 template <class Impl>
1204 void
1206 {
1207  DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1208  "the IQ.\n", tid);
1209 
1210  // Read instruction sequence number of last instruction out of the
1211  // time buffer.
1212  squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1213 
1214  doSquash(tid);
1215 
1216  // Also tell the memory dependence unit to squash.
1217  memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1218 }
1219 
1220 template <class Impl>
1221 void
1223 {
1224  // Start at the tail.
1225  ListIt squash_it = instList[tid].end();
1226  --squash_it;
1227 
1228  DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1229  tid, squashedSeqNum[tid]);
1230 
1231  // Squash any instructions younger than the squashed sequence number
1232  // given.
1233  while (squash_it != instList[tid].end() &&
1234  (*squash_it)->seqNum > squashedSeqNum[tid]) {
1235 
1236  DynInstPtr squashed_inst = (*squash_it);
1237  if (squashed_inst->isFloating()) {
1239  } else if (squashed_inst->isVector()) {
1241  } else {
1243  }
1244 
1245  // Only handle the instruction if it actually is in the IQ and
1246  // hasn't already been squashed in the IQ.
1247  if (squashed_inst->threadNumber != tid ||
1248  squashed_inst->isSquashedInIQ()) {
1249  --squash_it;
1250  continue;
1251  }
1252 
1253  if (!squashed_inst->isIssued() ||
1254  (squashed_inst->isMemRef() &&
1255  !squashed_inst->memOpDone())) {
1256 
1257  DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1258  tid, squashed_inst->seqNum, squashed_inst->pcState());
1259 
1260  bool is_acq_rel = squashed_inst->isMemBarrier() &&
1261  (squashed_inst->isLoad() ||
1262  (squashed_inst->isStore() &&
1263  !squashed_inst->isStoreConditional()));
1264 
1265  // Remove the instruction from the dependency list.
1266  if (is_acq_rel ||
1267  (!squashed_inst->isNonSpeculative() &&
1268  !squashed_inst->isStoreConditional() &&
1269  !squashed_inst->isAtomic() &&
1270  !squashed_inst->isMemBarrier() &&
1271  !squashed_inst->isWriteBarrier())) {
1272 
1273  for (int src_reg_idx = 0;
1274  src_reg_idx < squashed_inst->numSrcRegs();
1275  src_reg_idx++)
1276  {
1277  PhysRegIdPtr src_reg =
1278  squashed_inst->renamedSrcRegIdx(src_reg_idx);
1279 
1280  // Only remove it from the dependency graph if it
1281  // was placed there in the first place.
1282 
1283  // Instead of doing a linked list traversal, we
1284  // can just remove these squashed instructions
1285  // either at issue time, or when the register is
1286  // overwritten. The only downside to this is it
1287  // leaves more room for error.
1288 
1289  if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
1290  !src_reg->isFixedMapping()) {
1291  dependGraph.remove(src_reg->flatIndex(),
1292  squashed_inst);
1293  }
1294 
1296  }
1297 
1298  } else if (!squashed_inst->isStoreConditional() ||
1299  !squashed_inst->isCompleted()) {
1300  NonSpecMapIt ns_inst_it =
1301  nonSpecInsts.find(squashed_inst->seqNum);
1302 
1303  // we remove non-speculative instructions from
1304  // nonSpecInsts already when they are ready, and so we
1305  // cannot always expect to find them
1306  if (ns_inst_it == nonSpecInsts.end()) {
1307  // loads that became ready but stalled on a
1308  // blocked cache are alreayd removed from
1309  // nonSpecInsts, and have not faulted
1310  assert(squashed_inst->getFault() != NoFault ||
1311  squashed_inst->isMemRef());
1312  } else {
1313 
1314  (*ns_inst_it).second = NULL;
1315 
1316  nonSpecInsts.erase(ns_inst_it);
1317 
1319  }
1320  }
1321 
1322  // Might want to also clear out the head of the dependency graph.
1323 
1324  // Mark it as squashed within the IQ.
1325  squashed_inst->setSquashedInIQ();
1326 
1327  // @todo: Remove this hack where several statuses are set so the
1328  // inst will flow through the rest of the pipeline.
1329  squashed_inst->setIssued();
1330  squashed_inst->setCanCommit();
1331  squashed_inst->clearInIQ();
1332 
1333  //Update Thread IQ Count
1334  count[squashed_inst->threadNumber]--;
1335 
1336  ++freeEntries;
1337  }
1338 
1339  // IQ clears out the heads of the dependency graph only when
1340  // instructions reach writeback stage. If an instruction is squashed
1341  // before writeback stage, its head of dependency graph would not be
1342  // cleared out; it holds the instruction's DynInstPtr. This prevents
1343  // freeing the squashed instruction's DynInst.
1344  // Thus, we need to manually clear out the squashed instructions' heads
1345  // of dependency graph.
1346  for (int dest_reg_idx = 0;
1347  dest_reg_idx < squashed_inst->numDestRegs();
1348  dest_reg_idx++)
1349  {
1350  PhysRegIdPtr dest_reg =
1351  squashed_inst->renamedDestRegIdx(dest_reg_idx);
1352  if (dest_reg->isFixedMapping()){
1353  continue;
1354  }
1355  assert(dependGraph.empty(dest_reg->flatIndex()));
1356  dependGraph.clearInst(dest_reg->flatIndex());
1357  }
1358  instList[tid].erase(squash_it--);
1360  }
1361 }
1362 
1363 template <class Impl>
1364 bool
1366 {
1367  // Loop through the instruction's source registers, adding
1368  // them to the dependency list if they are not ready.
1369  int8_t total_src_regs = new_inst->numSrcRegs();
1370  bool return_val = false;
1371 
1372  for (int src_reg_idx = 0;
1373  src_reg_idx < total_src_regs;
1374  src_reg_idx++)
1375  {
1376  // Only add it to the dependency graph if it's not ready.
1377  if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
1378  PhysRegIdPtr src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
1379 
1380  // Check the IQ's scoreboard to make sure the register
1381  // hasn't become ready while the instruction was in flight
1382  // between stages. Only if it really isn't ready should
1383  // it be added to the dependency graph.
1384  if (src_reg->isFixedMapping()) {
1385  continue;
1386  } else if (!regScoreboard[src_reg->flatIndex()]) {
1387  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1388  "is being added to the dependency chain.\n",
1389  new_inst->pcState(), src_reg->index(),
1390  src_reg->className());
1391 
1392  dependGraph.insert(src_reg->flatIndex(), new_inst);
1393 
1394  // Change the return value to indicate that something
1395  // was added to the dependency graph.
1396  return_val = true;
1397  } else {
1398  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1399  "became ready before it reached the IQ.\n",
1400  new_inst->pcState(), src_reg->index(),
1401  src_reg->className());
1402  // Mark a register ready within the instruction.
1403  new_inst->markSrcRegReady(src_reg_idx);
1404  }
1405  }
1406  }
1407 
1408  return return_val;
1409 }
1410 
1411 template <class Impl>
1412 void
1414 {
1415  // Nothing really needs to be marked when an instruction becomes
1416  // the producer of a register's value, but for convenience a ptr
1417  // to the producing instruction will be placed in the head node of
1418  // the dependency links.
1419  int8_t total_dest_regs = new_inst->numDestRegs();
1420 
1421  for (int dest_reg_idx = 0;
1422  dest_reg_idx < total_dest_regs;
1423  dest_reg_idx++)
1424  {
1425  PhysRegIdPtr dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
1426 
1427  // Some registers have fixed mapping, and there is no need to track
1428  // dependencies as these instructions must be executed at commit.
1429  if (dest_reg->isFixedMapping()) {
1430  continue;
1431  }
1432 
1433  if (!dependGraph.empty(dest_reg->flatIndex())) {
1434  dependGraph.dump();
1435  panic("Dependency graph %i (%s) (flat: %i) not empty!",
1436  dest_reg->index(), dest_reg->className(),
1437  dest_reg->flatIndex());
1438  }
1439 
1440  dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1441 
1442  // Mark the scoreboard to say it's not yet ready.
1443  regScoreboard[dest_reg->flatIndex()] = false;
1444  }
1445 }
1446 
1447 template <class Impl>
1448 void
1450 {
1451  // If the instruction now has all of its source registers
1452  // available, then add it to the list of ready instructions.
1453  if (inst->readyToIssue()) {
1454 
1455  //Add the instruction to the proper ready list.
1456  if (inst->isMemRef()) {
1457 
1458  DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1459 
1460  // Message to the mem dependence unit that this instruction has
1461  // its registers ready.
1462  memDepUnit[inst->threadNumber].regsReady(inst);
1463 
1464  return;
1465  }
1466 
1467  OpClass op_class = inst->opClass();
1468 
1469  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1470  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1471  inst->pcState(), op_class, inst->seqNum);
1472 
1473  readyInsts[op_class].push(inst);
1474 
1475  // Will need to reorder the list if either a queue is not on the list,
1476  // or it has an older instruction than last time.
1477  if (!queueOnList[op_class]) {
1478  addToOrderList(op_class);
1479  } else if (readyInsts[op_class].top()->seqNum <
1480  (*readyIt[op_class]).oldestInst) {
1481  listOrder.erase(readyIt[op_class]);
1482  addToOrderList(op_class);
1483  }
1484  }
1485 }
1486 
1487 template <class Impl>
1488 int
1490 {
1491  return numEntries - freeEntries;
1492 }
1493 
1494 template <class Impl>
1495 void
1497 {
1498  for (int i = 0; i < Num_OpClasses; ++i) {
1499  cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1500 
1501  cprintf("\n");
1502  }
1503 
1504  cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1505 
1506  NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1507  NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1508 
1509  cprintf("Non speculative list: ");
1510 
1511  while (non_spec_it != non_spec_end_it) {
1512  cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1513  (*non_spec_it).second->seqNum);
1514  ++non_spec_it;
1515  }
1516 
1517  cprintf("\n");
1518 
1519  ListOrderIt list_order_it = listOrder.begin();
1520  ListOrderIt list_order_end_it = listOrder.end();
1521  int i = 1;
1522 
1523  cprintf("List order: ");
1524 
1525  while (list_order_it != list_order_end_it) {
1526  cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1527  (*list_order_it).oldestInst);
1528 
1529  ++list_order_it;
1530  ++i;
1531  }
1532 
1533  cprintf("\n");
1534 }
1535 
1536 
1537 template <class Impl>
1538 void
1540 {
1541  for (ThreadID tid = 0; tid < numThreads; ++tid) {
1542  int num = 0;
1543  int valid_num = 0;
1544  ListIt inst_list_it = instList[tid].begin();
1545 
1546  while (inst_list_it != instList[tid].end()) {
1547  cprintf("Instruction:%i\n", num);
1548  if (!(*inst_list_it)->isSquashed()) {
1549  if (!(*inst_list_it)->isIssued()) {
1550  ++valid_num;
1551  cprintf("Count:%i\n", valid_num);
1552  } else if ((*inst_list_it)->isMemRef() &&
1553  !(*inst_list_it)->memOpDone()) {
1554  // Loads that have not been marked as executed
1555  // still count towards the total instructions.
1556  ++valid_num;
1557  cprintf("Count:%i\n", valid_num);
1558  }
1559  }
1560 
1561  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1562  "Issued:%i\nSquashed:%i\n",
1563  (*inst_list_it)->pcState(),
1564  (*inst_list_it)->seqNum,
1565  (*inst_list_it)->threadNumber,
1566  (*inst_list_it)->isIssued(),
1567  (*inst_list_it)->isSquashed());
1568 
1569  if ((*inst_list_it)->isMemRef()) {
1570  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1571  }
1572 
1573  cprintf("\n");
1574 
1575  inst_list_it++;
1576  ++num;
1577  }
1578  }
1579 
1580  cprintf("Insts to Execute list:\n");
1581 
1582  int num = 0;
1583  int valid_num = 0;
1584  ListIt inst_list_it = instsToExecute.begin();
1585 
1586  while (inst_list_it != instsToExecute.end())
1587  {
1588  cprintf("Instruction:%i\n",
1589  num);
1590  if (!(*inst_list_it)->isSquashed()) {
1591  if (!(*inst_list_it)->isIssued()) {
1592  ++valid_num;
1593  cprintf("Count:%i\n", valid_num);
1594  } else if ((*inst_list_it)->isMemRef() &&
1595  !(*inst_list_it)->memOpDone()) {
1596  // Loads that have not been marked as executed
1597  // still count towards the total instructions.
1598  ++valid_num;
1599  cprintf("Count:%i\n", valid_num);
1600  }
1601  }
1602 
1603  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1604  "Issued:%i\nSquashed:%i\n",
1605  (*inst_list_it)->pcState(),
1606  (*inst_list_it)->seqNum,
1607  (*inst_list_it)->threadNumber,
1608  (*inst_list_it)->isIssued(),
1609  (*inst_list_it)->isSquashed());
1610 
1611  if ((*inst_list_it)->isMemRef()) {
1612  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1613  }
1614 
1615  cprintf("\n");
1616 
1617  inst_list_it++;
1618  ++num;
1619  }
1620 }
1621 
1622 #endif//__CPU_O3_INST_QUEUE_IMPL_HH__
Stats::Scalar iqMiscInstsIssued
Stat for number of miscellaneous instructions issued.
Definition: inst_queue.hh:492
int wbOutstanding
Number of instructions currently in flight to FUs.
Definition: inst_queue.hh:433
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
#define DPRINTF(x,...)
Definition: trace.hh:225
void regStats()
Registers statistics.
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition: info.hh:51
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
Definition: inst_queue.hh:412
Stats::Scalar iqFloatInstsIssued
Stat for number of floating point instructions issued.
Definition: inst_queue.hh:486
void completed(const DynInstPtr &inst)
Completes a memory instruction.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
Stats::Scalar vecInstQueueReads
Definition: inst_queue.hh:539
Derived & init(size_type _x, size_type _y)
Definition: statistics.hh:1285
decltype(nullptr) constexpr NoFault
Definition: types.hh:243
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation...
Definition: statistics.hh:376
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
Stats::Scalar fpAluAccesses
Definition: inst_queue.hh:544
void doSquash(ThreadID tid)
Does the actual squashing.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute...
int getNumPinnedWritesToComplete() const
Definition: reg_class.hh:332
Definition: test.h:61
Stats::Scalar intAluAccesses
Definition: inst_queue.hh:543
Bitfield< 7 > i
void violation(const DynInstPtr &store_inst, const DynInstPtr &violating_load)
Indicates an ordering violation between a store and a younger load.
Impl::CPUPol::IEW IEW
Definition: inst_queue.hh:88
Stats::Scalar iqInstsAdded
Stat for number of instructions added.
Definition: inst_queue.hh:478
InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an IQ.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
Definition: inst_queue.hh:383
Stats::Scalar fpInstQueueWakeupAccesses
Definition: inst_queue.hh:538
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
Definition: inst_queue.hh:317
void regStats()
Registers statistics.
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void replay()
Replays all instructions that have been rescheduled by moving them to the ready list.
InstSeqNum squashedSeqNum[Impl::MaxThreads]
The sequence number of the squashed instruction.
Definition: inst_queue.hh:441
Impl::DynInstPtr DynInstPtr
Definition: inst_queue.hh:86
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
Cycles getOpLatency(OpClass capability)
Returns the operation execution latency of the given capability.
Definition: fu_pool.hh:162
Stats::Scalar iqInstsIssued
Definition: inst_queue.hh:482
FU completion event class.
Definition: inst_queue.hh:97
Stats::Scalar iqMemInstsIssued
Stat for number of memory instructions issued.
Definition: inst_queue.hh:490
void freeUnitNextCycle(int fu_idx)
Frees a FU at the end of this cycle.
Definition: fu_pool.cc:186
constexpr unsigned NumVecElemPerVecReg
Definition: registers.hh:66
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly...
void setIQ(InstructionQueue< Impl > *iq_ptr)
Sets the pointer to the IQ.
unsigned numEntries
The number of entries in the instruction queue.
Definition: inst_queue.hh:424
static constexpr auto NoCapableFU
Definition: fu_pool.hh:135
std::list< DynInstPtr > instList[Impl::MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
Definition: inst_queue.hh:314
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:333
void decrNumPinnedWritesToComplete()
Definition: reg_class.hh:342
DynInstPtr inst
Executing instruction.
Definition: inst_queue.hh:100
void wakeDependents(const DynInstPtr &inst)
Wakes any dependents of a memory instruction.
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1149
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
Definition: inst_queue.hh:449
bool isPinned() const
Definition: reg_class.hh:330
Stats::Scalar fpInstQueueWrites
Definition: inst_queue.hh:537
void nonSpecInstReady(const DynInstPtr &inst)
Indicate that a non-speculative instruction is ready.
Impl::O3CPU O3CPU
Definition: inst_queue.hh:85
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
Definition: inst_queue.hh:325
bool isFull()
Returns whether or not the IQ is full.
unsigned numFreeEntries()
Returns total number of free entries.
Tick curTick()
The current simulated tick.
Definition: core.hh:44
int fuIdx
Index of the FU used for executing.
Definition: inst_queue.hh:103
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Stats::Scalar intInstQueueWakeupAccesses
Definition: inst_queue.hh:535
Stats::Scalar iqSquashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Definition: inst_queue.hh:503
void insertNonSpec(const DynInstPtr &inst)
Inserts a non-speculative memory instruction.
void issue(const DynInstPtr &inst)
Issues the given instruction.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
Definition: statistics.hh:347
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
Definition: inst_queue.hh:298
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
IEW * iewStage
Pointer to IEW stage.
Definition: inst_queue.hh:288
Stats::Scalar intInstQueueWrites
Definition: inst_queue.hh:534
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
Definition: inst_queue.hh:352
void regsReady(const DynInstPtr &inst)
Indicate that an instruction has its registers ready.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread...
uint64_t InstSeqNum
Definition: inst_seq.hh:37
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit...
Definition: inst_queue.hh:361
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
Definition: inst_queue.hh:378
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2606
Stats::Scalar iqBranchInstsIssued
Stat for number of branch instructions issued.
Definition: inst_queue.hh:488
STL list class.
Definition: stl.hh:51
static scfx_rep_node * list
Definition: scfx_rep.cc:336
virtual const char * description() const
Return a C string describing the event.
unsigned count[Impl::MaxThreads]
Per Thread IQ count.
Definition: inst_queue.hh:415
void takeOverFrom()
Takes over execution from another CPU&#39;s thread.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
Definition: inst_queue.hh:301
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
Definition: inst_queue.hh:438
Stats::Scalar iqSquashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
Definition: inst_queue.hh:494
Stats::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
Definition: inst_queue.hh:531
std::list< ListOrderEntry >::iterator ListOrderIt
Definition: inst_queue.hh:380
Stats::Scalar vecAluAccesses
Definition: inst_queue.hh:545
void reschedule(const DynInstPtr &inst)
Reschedules an instruction to be re-executed.
std::string name() const
Returns the name of the IQ.
const FlagsType total
Print the total.
Definition: info.hh:49
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
Definition: inst_queue.hh:330
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
Definition: inst_queue.hh:304
void insert(const DynInstPtr &inst)
Inserts a memory instruction.
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk. ...
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
void resetState()
Resets all instruction queue state.
Physical register ID.
Definition: reg_class.hh:223
~InstructionQueue()
Destructs the IQ.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:276
void drainSanityCheck() const
Perform sanity checks after a drain.
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:225
void squash(ThreadID tid)
Squashes instructions for a thread.
Stats::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
Definition: inst_queue.hh:523
void init(DerivO3CPUParams *params, ThreadID tid)
Initializes the unit with parameters and a thread id.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
Definition: inst_queue.hh:406
DependencyGraph< DynInstPtr > dependGraph
Definition: inst_queue.hh:399
int countInsts()
Debugging function to count how many entries are in the IQ.
const PhysRegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:305
bool freeFU
Should the FU be added to the list to be freed upon completing this event.
Definition: inst_queue.hh:111
Stats::Scalar iqSquashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
Definition: inst_queue.hh:500
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void resetEntries()
Resets max entries for all threads.
Stats::Scalar fpInstQueueReads
Definition: inst_queue.hh:536
Stats::Scalar vecInstQueueWrites
Definition: inst_queue.hh:540
static const OpClass Num_OpClasses
Definition: op_class.hh:105
Definition: eventq.hh:245
void completeMemInst(const DynInstPtr &completed_inst)
Completes a memory operation.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
Definition: inst_queue.hh:363
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squashes all instructions up until a given sequence number for a specific thread. ...
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
int size
Definition: comm.hh:108
Stats::Scalar iqSquashedInstsExamined
Stat for number of squashed instructions examined when squashing.
Definition: inst_queue.hh:496
O3CPU * cpu
Pointer to the CPU.
Definition: inst_queue.hh:282
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
Definition: inst_queue.hh:388
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
Stats::Scalar iqIntInstsIssued
Stat for number of integer instructions issued.
Definition: inst_queue.hh:484
const RegIndex & index() const
Index accessors.
Definition: reg_class.hh:173
Stats::Vector fuBusy
Number of times the FU was busy.
Definition: inst_queue.hh:529
void completeBarrier(const DynInstPtr &inst)
Completes a barrier instruction.
Entry for the list age ordering by op class.
Definition: inst_queue.hh:366
std::list< DynInstPtr >::iterator ListIt
Definition: inst_queue.hh:94
T * access(int idx)
Definition: timebuf.hh:208
wire getWire(int idx)
Definition: timebuf.hh:229
unsigned maxEntries[Impl::MaxThreads]
Max IQ Entries Per Thread.
Definition: inst_queue.hh:418
Stats::Scalar vecInstQueueWakeupAccesses
Definition: inst_queue.hh:541
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
Stats::Scalar iqNonSpecInstsAdded
Stat for number of non-speculative instructions added.
Definition: inst_queue.hh:480
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:309
int getUnit(OpClass capability)
Gets a FU providing the requested capability.
Definition: fu_pool.cc:158
Stats::Formula issueRate
Number of instructions issued per cycle.
Definition: inst_queue.hh:526
unsigned numPhysRegs
The number of physical registers in the CPU.
Definition: inst_queue.hh:430
Stats::Scalar intInstQueueReads
Definition: inst_queue.hh:533
MemDepUnit memDepUnit[Impl::MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
Definition: inst_queue.hh:293
Derived & ysubnames(const char **names)
Definition: statistics.hh:451
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
bool isPipelined(OpClass capability)
Returns the issue latency of the given capability.
Definition: fu_pool.hh:167
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a barrier instruction.
FUPool * fuPool
Function unit pool.
Definition: inst_queue.hh:307
const FlagsType dist
Print the distribution.
Definition: info.hh:55
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition: reg_class.hh:299
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress)...
Definition: inst_queue.hh:322
bool isDrained() const
Determine if we are drained.
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue< Impl > *iq_ptr)
Construct a FU completion event.
InstructionQueue< Impl > * iqPtr
Pointer back to the instruction queue.
Definition: inst_queue.hh:106
A standard instruction queue class.
Definition: inst_queue.hh:81
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
Definition: inst_queue.hh:427
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
Stats::Distribution numIssuedDist
Distribution of number of instructions in the queue.
Definition: inst_queue.hh:511
unsigned freeEntries
Number of free IQ entries left.
Definition: inst_queue.hh:421
Stats::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
Definition: inst_queue.hh:520
const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:202
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1896
ThreadID numThreads
Number of Total Threads.
Definition: inst_queue.hh:409
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:152
static constexpr auto NoFreeFU
Definition: fu_pool.hh:136
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.

Generated on Fri Jul 3 2020 15:53:00 for gem5 by doxygen 1.8.13