gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
inst_queue_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2014, 2017-2019 ARM Limited
3  * Copyright (c) 2013 Advanced Micro Devices, Inc.
4  * All rights reserved.
5  *
6  * The license below extends only to copyright in the software and shall
7  * not be construed as granting a license to any other intellectual
8  * property including but not limited to intellectual property relating
9  * to a hardware implementation of the functionality of the software
10  * licensed hereunder. You may use the software subject to the license
11  * terms below provided that you ensure that this notice is replicated
12  * unmodified and in its entirety in all distributions of the software,
13  * modified or unmodified, in source code or in binary form.
14  *
15  * Copyright (c) 2004-2006 The Regents of The University of Michigan
16  * All rights reserved.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions are
20  * met: redistributions of source code must retain the above copyright
21  * notice, this list of conditions and the following disclaimer;
22  * redistributions in binary form must reproduce the above copyright
23  * notice, this list of conditions and the following disclaimer in the
24  * documentation and/or other materials provided with the distribution;
25  * neither the name of the copyright holders nor the names of its
26  * contributors may be used to endorse or promote products derived from
27  * this software without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40  *
41  * Authors: Kevin Lim
42  * Korey Sewell
43  */
44 
45 #ifndef __CPU_O3_INST_QUEUE_IMPL_HH__
46 #define __CPU_O3_INST_QUEUE_IMPL_HH__
47 
48 #include <limits>
49 #include <vector>
50 
51 #include "base/logging.hh"
52 #include "cpu/o3/fu_pool.hh"
53 #include "cpu/o3/inst_queue.hh"
54 #include "debug/IQ.hh"
55 #include "enums/OpClass.hh"
56 #include "params/DerivO3CPU.hh"
57 #include "sim/core.hh"
58 
59 // clang complains about std::set being overloaded with Packet::set if
60 // we open up the entire namespace std
61 using std::list;
62 
63 template <class Impl>
65  int fu_idx, InstructionQueue<Impl> *iq_ptr)
66  : Event(Stat_Event_Pri, AutoDelete),
67  inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
68 {
69 }
70 
71 template <class Impl>
72 void
74 {
75  iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
76  inst = NULL;
77 }
78 
79 
80 template <class Impl>
81 const char *
83 {
84  return "Functional unit completion";
85 }
86 
87 template <class Impl>
89  DerivO3CPUParams *params)
90  : cpu(cpu_ptr),
91  iewStage(iew_ptr),
92  fuPool(params->fuPool),
93  iqPolicy(params->smtIQPolicy),
94  numEntries(params->numIQEntries),
95  totalWidth(params->issueWidth),
97 {
98  assert(fuPool);
99 
100  numThreads = params->numThreads;
101 
102  // Set the number of total physical registers
103  // As the vector registers have two addressing modes, they are added twice
104  numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
105  params->numPhysVecRegs +
106  params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
107  params->numPhysVecPredRegs +
108  params->numPhysCCRegs;
109 
110  //Create an entry for each physical register within the
111  //dependency graph.
112  dependGraph.resize(numPhysRegs);
113 
114  // Resize the register scoreboard.
115  regScoreboard.resize(numPhysRegs);
116 
117  //Initialize Mem Dependence Units
118  for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
119  memDepUnit[tid].init(params, tid);
120  memDepUnit[tid].setIQ(this);
121  }
122 
123  resetState();
124 
125  //Figure out resource sharing policy
126  if (iqPolicy == SMTQueuePolicy::Dynamic) {
127  //Set Max Entries to Total ROB Capacity
128  for (ThreadID tid = 0; tid < numThreads; tid++) {
129  maxEntries[tid] = numEntries;
130  }
131 
132  } else if (iqPolicy == SMTQueuePolicy::Partitioned) {
133  //@todo:make work if part_amt doesnt divide evenly.
134  int part_amt = numEntries / numThreads;
135 
136  //Divide ROB up evenly
137  for (ThreadID tid = 0; tid < numThreads; tid++) {
138  maxEntries[tid] = part_amt;
139  }
140 
141  DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
142  "%i entries per thread.\n",part_amt);
143  } else if (iqPolicy == SMTQueuePolicy::Threshold) {
144  double threshold = (double)params->smtIQThreshold / 100;
145 
146  int thresholdIQ = (int)((double)threshold * numEntries);
147 
148  //Divide up by threshold amount
149  for (ThreadID tid = 0; tid < numThreads; tid++) {
150  maxEntries[tid] = thresholdIQ;
151  }
152 
153  DPRINTF(IQ, "IQ sharing policy set to Threshold:"
154  "%i entries per thread.\n",thresholdIQ);
155  }
156  for (ThreadID tid = numThreads; tid < Impl::MaxThreads; tid++) {
157  maxEntries[tid] = 0;
158  }
159 }
160 
161 template <class Impl>
163 {
164  dependGraph.reset();
165 #ifdef DEBUG
166  cprintf("Nodes traversed: %i, removed: %i\n",
167  dependGraph.nodesTraversed, dependGraph.nodesRemoved);
168 #endif
169 }
170 
171 template <class Impl>
172 std::string
174 {
175  return cpu->name() + ".iq";
176 }
177 
178 template <class Impl>
179 void
181 {
182  using namespace Stats;
184  .name(name() + ".iqInstsAdded")
185  .desc("Number of instructions added to the IQ (excludes non-spec)")
187 
189  .name(name() + ".iqNonSpecInstsAdded")
190  .desc("Number of non-speculative instructions added to the IQ")
192 
194  .name(name() + ".iqInstsIssued")
195  .desc("Number of instructions issued")
197 
199  .name(name() + ".iqIntInstsIssued")
200  .desc("Number of integer instructions issued")
202 
204  .name(name() + ".iqFloatInstsIssued")
205  .desc("Number of float instructions issued")
207 
209  .name(name() + ".iqBranchInstsIssued")
210  .desc("Number of branch instructions issued")
212 
214  .name(name() + ".iqMemInstsIssued")
215  .desc("Number of memory instructions issued")
217 
219  .name(name() + ".iqMiscInstsIssued")
220  .desc("Number of miscellaneous instructions issued")
222 
224  .name(name() + ".iqSquashedInstsIssued")
225  .desc("Number of squashed instructions issued")
227 
229  .name(name() + ".iqSquashedInstsExamined")
230  .desc("Number of squashed instructions iterated over during squash;"
231  " mainly for profiling")
233 
235  .name(name() + ".iqSquashedOperandsExamined")
236  .desc("Number of squashed operands that are examined and possibly "
237  "removed from graph")
239 
241  .name(name() + ".iqSquashedNonSpecRemoved")
242  .desc("Number of squashed non-spec instructions that were removed")
244 /*
245  queueResDist
246  .init(Num_OpClasses, 0, 99, 2)
247  .name(name() + ".IQ:residence:")
248  .desc("cycles from dispatch to issue")
249  .flags(total | pdf | cdf )
250  ;
251  for (int i = 0; i < Num_OpClasses; ++i) {
252  queueResDist.subname(i, opClassStrings[i]);
253  }
254 */
256  .init(0,totalWidth,1)
257  .name(name() + ".issued_per_cycle")
258  .desc("Number of insts issued each cycle")
259  .flags(pdf)
260  ;
261 /*
262  dist_unissued
263  .init(Num_OpClasses+2)
264  .name(name() + ".unissued_cause")
265  .desc("Reason ready instruction not issued")
266  .flags(pdf | dist)
267  ;
268  for (int i=0; i < (Num_OpClasses + 2); ++i) {
269  dist_unissued.subname(i, unissued_names[i]);
270  }
271 */
273  .init(numThreads,Enums::Num_OpClass)
274  .name(name() + ".FU_type")
275  .desc("Type of FU issued")
276  .flags(total | pdf | dist)
277  ;
278  statIssuedInstType.ysubnames(Enums::OpClassStrings);
279 
280  //
281  // How long did instructions for a particular FU type wait prior to issue
282  //
283 /*
284  issueDelayDist
285  .init(Num_OpClasses,0,99,2)
286  .name(name() + ".")
287  .desc("cycles from operands ready to issue")
288  .flags(pdf | cdf)
289  ;
290 
291  for (int i=0; i<Num_OpClasses; ++i) {
292  std::stringstream subname;
293  subname << opClassStrings[i] << "_delay";
294  issueDelayDist.subname(i, subname.str());
295  }
296 */
297  issueRate
298  .name(name() + ".rate")
299  .desc("Inst issue rate")
300  .flags(total)
301  ;
302  issueRate = iqInstsIssued / cpu->numCycles;
303 
304  statFuBusy
306  .name(name() + ".fu_full")
307  .desc("attempts to use FU when none available")
308  .flags(pdf | dist)
309  ;
310  for (int i=0; i < Num_OpClasses; ++i) {
311  statFuBusy.subname(i, Enums::OpClassStrings[i]);
312  }
313 
314  fuBusy
315  .init(numThreads)
316  .name(name() + ".fu_busy_cnt")
317  .desc("FU busy when requested")
318  .flags(total)
319  ;
320 
321  fuBusyRate
322  .name(name() + ".fu_busy_rate")
323  .desc("FU busy rate (busy events/executed inst)")
324  .flags(total)
325  ;
327 
328  for (ThreadID tid = 0; tid < numThreads; tid++) {
329  // Tell mem dependence unit to reg stats as well.
330  memDepUnit[tid].regStats();
331  }
332 
334  .name(name() + ".int_inst_queue_reads")
335  .desc("Number of integer instruction queue reads")
336  .flags(total);
337 
339  .name(name() + ".int_inst_queue_writes")
340  .desc("Number of integer instruction queue writes")
341  .flags(total);
342 
344  .name(name() + ".int_inst_queue_wakeup_accesses")
345  .desc("Number of integer instruction queue wakeup accesses")
346  .flags(total);
347 
349  .name(name() + ".fp_inst_queue_reads")
350  .desc("Number of floating instruction queue reads")
351  .flags(total);
352 
354  .name(name() + ".fp_inst_queue_writes")
355  .desc("Number of floating instruction queue writes")
356  .flags(total);
357 
359  .name(name() + ".fp_inst_queue_wakeup_accesses")
360  .desc("Number of floating instruction queue wakeup accesses")
361  .flags(total);
362 
364  .name(name() + ".vec_inst_queue_reads")
365  .desc("Number of vector instruction queue reads")
366  .flags(total);
367 
369  .name(name() + ".vec_inst_queue_writes")
370  .desc("Number of vector instruction queue writes")
371  .flags(total);
372 
374  .name(name() + ".vec_inst_queue_wakeup_accesses")
375  .desc("Number of vector instruction queue wakeup accesses")
376  .flags(total);
377 
379  .name(name() + ".int_alu_accesses")
380  .desc("Number of integer alu accesses")
381  .flags(total);
382 
384  .name(name() + ".fp_alu_accesses")
385  .desc("Number of floating point alu accesses")
386  .flags(total);
387 
389  .name(name() + ".vec_alu_accesses")
390  .desc("Number of vector alu accesses")
391  .flags(total);
392 
393 }
394 
395 template <class Impl>
396 void
398 {
399  //Initialize thread IQ counts
400  for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
401  count[tid] = 0;
402  instList[tid].clear();
403  }
404 
405  // Initialize the number of free IQ entries.
407 
408  // Note that in actuality, the registers corresponding to the logical
409  // registers start off as ready. However this doesn't matter for the
410  // IQ as the instruction should have been correctly told if those
411  // registers are ready in rename. Thus it can all be initialized as
412  // unready.
413  for (int i = 0; i < numPhysRegs; ++i) {
414  regScoreboard[i] = false;
415  }
416 
417  for (ThreadID tid = 0; tid < Impl::MaxThreads; ++tid) {
418  squashedSeqNum[tid] = 0;
419  }
420 
421  for (int i = 0; i < Num_OpClasses; ++i) {
422  while (!readyInsts[i].empty())
423  readyInsts[i].pop();
424  queueOnList[i] = false;
425  readyIt[i] = listOrder.end();
426  }
427  nonSpecInsts.clear();
428  listOrder.clear();
429  deferredMemInsts.clear();
430  blockedMemInsts.clear();
431  retryMemInsts.clear();
432  wbOutstanding = 0;
433 }
434 
435 template <class Impl>
436 void
438 {
439  activeThreads = at_ptr;
440 }
441 
442 template <class Impl>
443 void
445 {
446  issueToExecuteQueue = i2e_ptr;
447 }
448 
449 template <class Impl>
450 void
452 {
453  timeBuffer = tb_ptr;
454 
456 }
457 
458 template <class Impl>
459 bool
461 {
462  bool drained = dependGraph.empty() &&
463  instsToExecute.empty() &&
464  wbOutstanding == 0;
465  for (ThreadID tid = 0; tid < numThreads; ++tid)
466  drained = drained && memDepUnit[tid].isDrained();
467 
468  return drained;
469 }
470 
471 template <class Impl>
472 void
474 {
475  assert(dependGraph.empty());
476  assert(instsToExecute.empty());
477  for (ThreadID tid = 0; tid < numThreads; ++tid)
479 }
480 
481 template <class Impl>
482 void
484 {
485  resetState();
486 }
487 
488 template <class Impl>
489 int
491 {
492  if (iqPolicy == SMTQueuePolicy::Partitioned) {
493  return numEntries / num_threads;
494  } else {
495  return 0;
496  }
497 }
498 
499 
500 template <class Impl>
501 void
503 {
504  if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
505  int active_threads = activeThreads->size();
506 
507  list<ThreadID>::iterator threads = activeThreads->begin();
509 
510  while (threads != end) {
511  ThreadID tid = *threads++;
512 
513  if (iqPolicy == SMTQueuePolicy::Partitioned) {
514  maxEntries[tid] = numEntries / active_threads;
515  } else if (iqPolicy == SMTQueuePolicy::Threshold &&
516  active_threads == 1) {
517  maxEntries[tid] = numEntries;
518  }
519  }
520  }
521 }
522 
523 template <class Impl>
524 unsigned
526 {
527  return freeEntries;
528 }
529 
530 template <class Impl>
531 unsigned
533 {
534  return maxEntries[tid] - count[tid];
535 }
536 
537 // Might want to do something more complex if it knows how many instructions
538 // will be issued this cycle.
539 template <class Impl>
540 bool
542 {
543  if (freeEntries == 0) {
544  return(true);
545  } else {
546  return(false);
547  }
548 }
549 
550 template <class Impl>
551 bool
553 {
554  if (numFreeEntries(tid) == 0) {
555  return(true);
556  } else {
557  return(false);
558  }
559 }
560 
561 template <class Impl>
562 bool
564 {
565  if (!listOrder.empty()) {
566  return true;
567  }
568 
569  for (int i = 0; i < Num_OpClasses; ++i) {
570  if (!readyInsts[i].empty()) {
571  return true;
572  }
573  }
574 
575  return false;
576 }
577 
578 template <class Impl>
579 void
581 {
582  if (new_inst->isFloating()) {
584  } else if (new_inst->isVector()) {
586  } else {
588  }
589  // Make sure the instruction is valid
590  assert(new_inst);
591 
592  DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n",
593  new_inst->seqNum, new_inst->pcState());
594 
595  assert(freeEntries != 0);
596 
597  instList[new_inst->threadNumber].push_back(new_inst);
598 
599  --freeEntries;
600 
601  new_inst->setInIQ();
602 
603  // Look through its source registers (physical regs), and mark any
604  // dependencies.
605  addToDependents(new_inst);
606 
607  // Have this instruction set itself as the producer of its destination
608  // register(s).
609  addToProducers(new_inst);
610 
611  if (new_inst->isMemRef()) {
612  memDepUnit[new_inst->threadNumber].insert(new_inst);
613  } else {
614  addIfReady(new_inst);
615  }
616 
617  ++iqInstsAdded;
618 
619  count[new_inst->threadNumber]++;
620 
621  assert(freeEntries == (numEntries - countInsts()));
622 }
623 
624 template <class Impl>
625 void
627 {
628  // @todo: Clean up this code; can do it by setting inst as unable
629  // to issue, then calling normal insert on the inst.
630  if (new_inst->isFloating()) {
632  } else if (new_inst->isVector()) {
634  } else {
636  }
637 
638  assert(new_inst);
639 
640  nonSpecInsts[new_inst->seqNum] = new_inst;
641 
642  DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s "
643  "to the IQ.\n",
644  new_inst->seqNum, new_inst->pcState());
645 
646  assert(freeEntries != 0);
647 
648  instList[new_inst->threadNumber].push_back(new_inst);
649 
650  --freeEntries;
651 
652  new_inst->setInIQ();
653 
654  // Have this instruction set itself as the producer of its destination
655  // register(s).
656  addToProducers(new_inst);
657 
658  // If it's a memory instruction, add it to the memory dependency
659  // unit.
660  if (new_inst->isMemRef()) {
661  memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
662  }
663 
665 
666  count[new_inst->threadNumber]++;
667 
668  assert(freeEntries == (numEntries - countInsts()));
669 }
670 
671 template <class Impl>
672 void
674 {
675  memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
676 
677  insertNonSpec(barr_inst);
678 }
679 
680 template <class Impl>
681 typename Impl::DynInstPtr
683 {
684  assert(!instsToExecute.empty());
685  DynInstPtr inst = std::move(instsToExecute.front());
686  instsToExecute.pop_front();
687  if (inst->isFloating()) {
689  } else if (inst->isVector()) {
691  } else {
693  }
694  return inst;
695 }
696 
697 template <class Impl>
698 void
700 {
701  assert(!readyInsts[op_class].empty());
702 
703  ListOrderEntry queue_entry;
704 
705  queue_entry.queueType = op_class;
706 
707  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
708 
709  ListOrderIt list_it = listOrder.begin();
710  ListOrderIt list_end_it = listOrder.end();
711 
712  while (list_it != list_end_it) {
713  if ((*list_it).oldestInst > queue_entry.oldestInst) {
714  break;
715  }
716 
717  list_it++;
718  }
719 
720  readyIt[op_class] = listOrder.insert(list_it, queue_entry);
721  queueOnList[op_class] = true;
722 }
723 
724 template <class Impl>
725 void
727 {
728  // Get iterator of next item on the list
729  // Delete the original iterator
730  // Determine if the next item is either the end of the list or younger
731  // than the new instruction. If so, then add in a new iterator right here.
732  // If not, then move along.
733  ListOrderEntry queue_entry;
734  OpClass op_class = (*list_order_it).queueType;
735  ListOrderIt next_it = list_order_it;
736 
737  ++next_it;
738 
739  queue_entry.queueType = op_class;
740  queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
741 
742  while (next_it != listOrder.end() &&
743  (*next_it).oldestInst < queue_entry.oldestInst) {
744  ++next_it;
745  }
746 
747  readyIt[op_class] = listOrder.insert(next_it, queue_entry);
748 }
749 
750 template <class Impl>
751 void
753 {
754  DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
755  assert(!cpu->switchedOut());
756  // The CPU could have been sleeping until this op completed (*extremely*
757  // long latency op). Wake it if it was. This may be overkill.
758  --wbOutstanding;
759  iewStage->wakeCPU();
760 
761  if (fu_idx > -1)
762  fuPool->freeUnitNextCycle(fu_idx);
763 
764  // @todo: Ensure that these FU Completions happen at the beginning
765  // of a cycle, otherwise they could add too many instructions to
766  // the queue.
768  instsToExecute.push_back(inst);
769 }
770 
771 // @todo: Figure out a better way to remove the squashed items from the
772 // lists. Checking the top item of each list to see if it's squashed
773 // wastes time and forces jumps.
774 template <class Impl>
775 void
777 {
778  DPRINTF(IQ, "Attempting to schedule ready instructions from "
779  "the IQ.\n");
780 
781  IssueStruct *i2e_info = issueToExecuteQueue->access(0);
782 
783  DynInstPtr mem_inst;
784  while (mem_inst = std::move(getDeferredMemInstToExecute())) {
785  addReadyMemInst(mem_inst);
786  }
787 
788  // See if any cache blocked instructions are able to be executed
789  while (mem_inst = std::move(getBlockedMemInstToExecute())) {
790  addReadyMemInst(mem_inst);
791  }
792 
793  // Have iterator to head of the list
794  // While I haven't exceeded bandwidth or reached the end of the list,
795  // Try to get a FU that can do what this op needs.
796  // If successful, change the oldestInst to the new top of the list, put
797  // the queue in the proper place in the list.
798  // Increment the iterator.
799  // This will avoid trying to schedule a certain op class if there are no
800  // FUs that handle it.
801  int total_issued = 0;
802  ListOrderIt order_it = listOrder.begin();
803  ListOrderIt order_end_it = listOrder.end();
804 
805  while (total_issued < totalWidth && order_it != order_end_it) {
806  OpClass op_class = (*order_it).queueType;
807 
808  assert(!readyInsts[op_class].empty());
809 
810  DynInstPtr issuing_inst = readyInsts[op_class].top();
811 
812  if (issuing_inst->isFloating()) {
814  } else if (issuing_inst->isVector()) {
816  } else {
818  }
819 
820  assert(issuing_inst->seqNum == (*order_it).oldestInst);
821 
822  if (issuing_inst->isSquashed()) {
823  readyInsts[op_class].pop();
824 
825  if (!readyInsts[op_class].empty()) {
826  moveToYoungerInst(order_it);
827  } else {
828  readyIt[op_class] = listOrder.end();
829  queueOnList[op_class] = false;
830  }
831 
832  listOrder.erase(order_it++);
833 
835 
836  continue;
837  }
838 
839  int idx = FUPool::NoCapableFU;
840  Cycles op_latency = Cycles(1);
841  ThreadID tid = issuing_inst->threadNumber;
842 
843  if (op_class != No_OpClass) {
844  idx = fuPool->getUnit(op_class);
845  if (issuing_inst->isFloating()) {
846  fpAluAccesses++;
847  } else if (issuing_inst->isVector()) {
848  vecAluAccesses++;
849  } else {
850  intAluAccesses++;
851  }
852  if (idx > FUPool::NoFreeFU) {
853  op_latency = fuPool->getOpLatency(op_class);
854  }
855  }
856 
857  // If we have an instruction that doesn't require a FU, or a
858  // valid FU, then schedule for execution.
859  if (idx != FUPool::NoFreeFU) {
860  if (op_latency == Cycles(1)) {
861  i2e_info->size++;
862  instsToExecute.push_back(issuing_inst);
863 
864  // Add the FU onto the list of FU's to be freed next
865  // cycle if we used one.
866  if (idx >= 0)
868  } else {
869  bool pipelined = fuPool->isPipelined(op_class);
870  // Generate completion event for the FU
871  ++wbOutstanding;
872  FUCompletion *execution = new FUCompletion(issuing_inst,
873  idx, this);
874 
875  cpu->schedule(execution,
876  cpu->clockEdge(Cycles(op_latency - 1)));
877 
878  if (!pipelined) {
879  // If FU isn't pipelined, then it must be freed
880  // upon the execution completing.
881  execution->setFreeFU();
882  } else {
883  // Add the FU onto the list of FU's to be freed next cycle.
885  }
886  }
887 
888  DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
889  "[sn:%llu]\n",
890  tid, issuing_inst->pcState(),
891  issuing_inst->seqNum);
892 
893  readyInsts[op_class].pop();
894 
895  if (!readyInsts[op_class].empty()) {
896  moveToYoungerInst(order_it);
897  } else {
898  readyIt[op_class] = listOrder.end();
899  queueOnList[op_class] = false;
900  }
901 
902  issuing_inst->setIssued();
903  ++total_issued;
904 
905 #if TRACING_ON
906  issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
907 #endif
908 
909  if (!issuing_inst->isMemRef()) {
910  // Memory instructions can not be freed from the IQ until they
911  // complete.
912  ++freeEntries;
913  count[tid]--;
914  issuing_inst->clearInIQ();
915  } else {
916  memDepUnit[tid].issue(issuing_inst);
917  }
918 
919  listOrder.erase(order_it++);
920  statIssuedInstType[tid][op_class]++;
921  } else {
922  statFuBusy[op_class]++;
923  fuBusy[tid]++;
924  ++order_it;
925  }
926  }
927 
928  numIssuedDist.sample(total_issued);
929  iqInstsIssued+= total_issued;
930 
931  // If we issued any instructions, tell the CPU we had activity.
932  // @todo If the way deferred memory instructions are handeled due to
933  // translation changes then the deferredMemInsts condition should be removed
934  // from the code below.
935  if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
936  cpu->activityThisCycle();
937  } else {
938  DPRINTF(IQ, "Not able to schedule any instructions.\n");
939  }
940 }
941 
942 template <class Impl>
943 void
945 {
946  DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready "
947  "to execute.\n", inst);
948 
949  NonSpecMapIt inst_it = nonSpecInsts.find(inst);
950 
951  assert(inst_it != nonSpecInsts.end());
952 
953  ThreadID tid = (*inst_it).second->threadNumber;
954 
955  (*inst_it).second->setAtCommit();
956 
957  (*inst_it).second->setCanIssue();
958 
959  if (!(*inst_it).second->isMemRef()) {
960  addIfReady((*inst_it).second);
961  } else {
962  memDepUnit[tid].nonSpecInstReady((*inst_it).second);
963  }
964 
965  (*inst_it).second = NULL;
966 
967  nonSpecInsts.erase(inst_it);
968 }
969 
970 template <class Impl>
971 void
973 {
974  DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n",
975  tid,inst);
976 
977  ListIt iq_it = instList[tid].begin();
978 
979  while (iq_it != instList[tid].end() &&
980  (*iq_it)->seqNum <= inst) {
981  ++iq_it;
982  instList[tid].pop_front();
983  }
984 
985  assert(freeEntries == (numEntries - countInsts()));
986 }
987 
988 template <class Impl>
989 int
991 {
992  int dependents = 0;
993 
994  // The instruction queue here takes care of both floating and int ops
995  if (completed_inst->isFloating()) {
997  } else if (completed_inst->isVector()) {
999  } else {
1001  }
1002 
1003  DPRINTF(IQ, "Waking dependents of completed instruction.\n");
1004 
1005  assert(!completed_inst->isSquashed());
1006 
1007  // Tell the memory dependence unit to wake any dependents on this
1008  // instruction if it is a memory instruction. Also complete the memory
1009  // instruction at this point since we know it executed without issues.
1010  // @todo: Might want to rename "completeMemInst" to something that
1011  // indicates that it won't need to be replayed, and call this
1012  // earlier. Might not be a big deal.
1013  if (completed_inst->isMemRef()) {
1014  memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
1015  completeMemInst(completed_inst);
1016  } else if (completed_inst->isMemBarrier() ||
1017  completed_inst->isWriteBarrier()) {
1018  memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
1019  }
1020 
1021  for (int dest_reg_idx = 0;
1022  dest_reg_idx < completed_inst->numDestRegs();
1023  dest_reg_idx++)
1024  {
1025  PhysRegIdPtr dest_reg =
1026  completed_inst->renamedDestRegIdx(dest_reg_idx);
1027 
1028  // Special case of uniq or control registers. They are not
1029  // handled by the IQ and thus have no dependency graph entry.
1030  if (dest_reg->isFixedMapping()) {
1031  DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1032  dest_reg->index(), dest_reg->className());
1033  continue;
1034  }
1035 
1036  // Avoid waking up dependents if the register is pinned
1037  dest_reg->decrNumPinnedWritesToComplete();
1038  if (dest_reg->isPinned())
1039  completed_inst->setPinnedRegsWritten();
1040 
1041  if (dest_reg->getNumPinnedWritesToComplete() != 0) {
1042  DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n",
1043  dest_reg->index(), dest_reg->className());
1044  continue;
1045  }
1046 
1047  DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1048  dest_reg->index(),
1049  dest_reg->className());
1050 
1051  //Go through the dependency chain, marking the registers as
1052  //ready within the waiting instructions.
1053  DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1054 
1055  while (dep_inst) {
1056  DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
1057  "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1058 
1059  // Might want to give more information to the instruction
1060  // so that it knows which of its source registers is
1061  // ready. However that would mean that the dependency
1062  // graph entries would need to hold the src_reg_idx.
1063  dep_inst->markSrcRegReady();
1064 
1065  addIfReady(dep_inst);
1066 
1067  dep_inst = dependGraph.pop(dest_reg->flatIndex());
1068 
1069  ++dependents;
1070  }
1071 
1072  // Reset the head node now that all of its dependents have
1073  // been woken up.
1074  assert(dependGraph.empty(dest_reg->flatIndex()));
1075  dependGraph.clearInst(dest_reg->flatIndex());
1076 
1077  // Mark the scoreboard as having that register ready.
1078  regScoreboard[dest_reg->flatIndex()] = true;
1079  }
1080  return dependents;
1081 }
1082 
1083 template <class Impl>
1084 void
1086 {
1087  OpClass op_class = ready_inst->opClass();
1088 
1089  readyInsts[op_class].push(ready_inst);
1090 
1091  // Will need to reorder the list if either a queue is not on the list,
1092  // or it has an older instruction than last time.
1093  if (!queueOnList[op_class]) {
1094  addToOrderList(op_class);
1095  } else if (readyInsts[op_class].top()->seqNum <
1096  (*readyIt[op_class]).oldestInst) {
1097  listOrder.erase(readyIt[op_class]);
1098  addToOrderList(op_class);
1099  }
1100 
1101  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1102  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1103  ready_inst->pcState(), op_class, ready_inst->seqNum);
1104 }
1105 
1106 template <class Impl>
1107 void
1109 {
1110  DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
1111 
1112  // Reset DTB translation state
1113  resched_inst->translationStarted(false);
1114  resched_inst->translationCompleted(false);
1115 
1116  resched_inst->clearCanIssue();
1117  memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1118 }
1119 
1120 template <class Impl>
1121 void
1123 {
1124  memDepUnit[replay_inst->threadNumber].replay();
1125 }
1126 
1127 template <class Impl>
1128 void
1130 {
1131  ThreadID tid = completed_inst->threadNumber;
1132 
1133  DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n",
1134  completed_inst->pcState(), completed_inst->seqNum);
1135 
1136  ++freeEntries;
1137 
1138  completed_inst->memOpDone(true);
1139 
1140  memDepUnit[tid].completed(completed_inst);
1141  count[tid]--;
1142 }
1143 
1144 template <class Impl>
1145 void
1147 {
1148  deferredMemInsts.push_back(deferred_inst);
1149 }
1150 
1151 template <class Impl>
1152 void
1154 {
1155  blocked_inst->clearIssued();
1156  blocked_inst->clearCanIssue();
1157  blockedMemInsts.push_back(blocked_inst);
1158 }
1159 
1160 template <class Impl>
1161 void
1163 {
1165  // Get the CPU ticking again
1166  cpu->wakeCPU();
1167 }
1168 
1169 template <class Impl>
1170 typename Impl::DynInstPtr
1172 {
1173  for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1174  ++it) {
1175  if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1176  DynInstPtr mem_inst = std::move(*it);
1177  deferredMemInsts.erase(it);
1178  return mem_inst;
1179  }
1180  }
1181  return nullptr;
1182 }
1183 
1184 template <class Impl>
1185 typename Impl::DynInstPtr
1187 {
1188  if (retryMemInsts.empty()) {
1189  return nullptr;
1190  } else {
1191  DynInstPtr mem_inst = std::move(retryMemInsts.front());
1192  retryMemInsts.pop_front();
1193  return mem_inst;
1194  }
1195 }
1196 
1197 template <class Impl>
1198 void
1200  const DynInstPtr &faulting_load)
1201 {
1203  memDepUnit[store->threadNumber].violation(store, faulting_load);
1204 }
1205 
1206 template <class Impl>
1207 void
1209 {
1210  DPRINTF(IQ, "[tid:%i] Starting to squash instructions in "
1211  "the IQ.\n", tid);
1212 
1213  // Read instruction sequence number of last instruction out of the
1214  // time buffer.
1215  squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1216 
1217  doSquash(tid);
1218 
1219  // Also tell the memory dependence unit to squash.
1220  memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1221 }
1222 
1223 template <class Impl>
1224 void
1226 {
1227  // Start at the tail.
1228  ListIt squash_it = instList[tid].end();
1229  --squash_it;
1230 
1231  DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n",
1232  tid, squashedSeqNum[tid]);
1233 
1234  // Squash any instructions younger than the squashed sequence number
1235  // given.
1236  while (squash_it != instList[tid].end() &&
1237  (*squash_it)->seqNum > squashedSeqNum[tid]) {
1238 
1239  DynInstPtr squashed_inst = (*squash_it);
1240  if (squashed_inst->isFloating()) {
1242  } else if (squashed_inst->isVector()) {
1244  } else {
1246  }
1247 
1248  // Only handle the instruction if it actually is in the IQ and
1249  // hasn't already been squashed in the IQ.
1250  if (squashed_inst->threadNumber != tid ||
1251  squashed_inst->isSquashedInIQ()) {
1252  --squash_it;
1253  continue;
1254  }
1255 
1256  if (!squashed_inst->isIssued() ||
1257  (squashed_inst->isMemRef() &&
1258  !squashed_inst->memOpDone())) {
1259 
1260  DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n",
1261  tid, squashed_inst->seqNum, squashed_inst->pcState());
1262 
1263  bool is_acq_rel = squashed_inst->isMemBarrier() &&
1264  (squashed_inst->isLoad() ||
1265  (squashed_inst->isStore() &&
1266  !squashed_inst->isStoreConditional()));
1267 
1268  // Remove the instruction from the dependency list.
1269  if (is_acq_rel ||
1270  (!squashed_inst->isNonSpeculative() &&
1271  !squashed_inst->isStoreConditional() &&
1272  !squashed_inst->isAtomic() &&
1273  !squashed_inst->isMemBarrier() &&
1274  !squashed_inst->isWriteBarrier())) {
1275 
1276  for (int src_reg_idx = 0;
1277  src_reg_idx < squashed_inst->numSrcRegs();
1278  src_reg_idx++)
1279  {
1280  PhysRegIdPtr src_reg =
1281  squashed_inst->renamedSrcRegIdx(src_reg_idx);
1282 
1283  // Only remove it from the dependency graph if it
1284  // was placed there in the first place.
1285 
1286  // Instead of doing a linked list traversal, we
1287  // can just remove these squashed instructions
1288  // either at issue time, or when the register is
1289  // overwritten. The only downside to this is it
1290  // leaves more room for error.
1291 
1292  if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
1293  !src_reg->isFixedMapping()) {
1294  dependGraph.remove(src_reg->flatIndex(),
1295  squashed_inst);
1296  }
1297 
1299  }
1300 
1301  } else if (!squashed_inst->isStoreConditional() ||
1302  !squashed_inst->isCompleted()) {
1303  NonSpecMapIt ns_inst_it =
1304  nonSpecInsts.find(squashed_inst->seqNum);
1305 
1306  // we remove non-speculative instructions from
1307  // nonSpecInsts already when they are ready, and so we
1308  // cannot always expect to find them
1309  if (ns_inst_it == nonSpecInsts.end()) {
1310  // loads that became ready but stalled on a
1311  // blocked cache are alreayd removed from
1312  // nonSpecInsts, and have not faulted
1313  assert(squashed_inst->getFault() != NoFault ||
1314  squashed_inst->isMemRef());
1315  } else {
1316 
1317  (*ns_inst_it).second = NULL;
1318 
1319  nonSpecInsts.erase(ns_inst_it);
1320 
1322  }
1323  }
1324 
1325  // Might want to also clear out the head of the dependency graph.
1326 
1327  // Mark it as squashed within the IQ.
1328  squashed_inst->setSquashedInIQ();
1329 
1330  // @todo: Remove this hack where several statuses are set so the
1331  // inst will flow through the rest of the pipeline.
1332  squashed_inst->setIssued();
1333  squashed_inst->setCanCommit();
1334  squashed_inst->clearInIQ();
1335 
1336  //Update Thread IQ Count
1337  count[squashed_inst->threadNumber]--;
1338 
1339  ++freeEntries;
1340  }
1341 
1342  // IQ clears out the heads of the dependency graph only when
1343  // instructions reach writeback stage. If an instruction is squashed
1344  // before writeback stage, its head of dependency graph would not be
1345  // cleared out; it holds the instruction's DynInstPtr. This prevents
1346  // freeing the squashed instruction's DynInst.
1347  // Thus, we need to manually clear out the squashed instructions' heads
1348  // of dependency graph.
1349  for (int dest_reg_idx = 0;
1350  dest_reg_idx < squashed_inst->numDestRegs();
1351  dest_reg_idx++)
1352  {
1353  PhysRegIdPtr dest_reg =
1354  squashed_inst->renamedDestRegIdx(dest_reg_idx);
1355  if (dest_reg->isFixedMapping()){
1356  continue;
1357  }
1358  assert(dependGraph.empty(dest_reg->flatIndex()));
1359  dependGraph.clearInst(dest_reg->flatIndex());
1360  }
1361  instList[tid].erase(squash_it--);
1363  }
1364 }
1365 
1366 template <class Impl>
1367 bool
1369 {
1370  // Loop through the instruction's source registers, adding
1371  // them to the dependency list if they are not ready.
1372  int8_t total_src_regs = new_inst->numSrcRegs();
1373  bool return_val = false;
1374 
1375  for (int src_reg_idx = 0;
1376  src_reg_idx < total_src_regs;
1377  src_reg_idx++)
1378  {
1379  // Only add it to the dependency graph if it's not ready.
1380  if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
1381  PhysRegIdPtr src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
1382 
1383  // Check the IQ's scoreboard to make sure the register
1384  // hasn't become ready while the instruction was in flight
1385  // between stages. Only if it really isn't ready should
1386  // it be added to the dependency graph.
1387  if (src_reg->isFixedMapping()) {
1388  continue;
1389  } else if (!regScoreboard[src_reg->flatIndex()]) {
1390  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1391  "is being added to the dependency chain.\n",
1392  new_inst->pcState(), src_reg->index(),
1393  src_reg->className());
1394 
1395  dependGraph.insert(src_reg->flatIndex(), new_inst);
1396 
1397  // Change the return value to indicate that something
1398  // was added to the dependency graph.
1399  return_val = true;
1400  } else {
1401  DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1402  "became ready before it reached the IQ.\n",
1403  new_inst->pcState(), src_reg->index(),
1404  src_reg->className());
1405  // Mark a register ready within the instruction.
1406  new_inst->markSrcRegReady(src_reg_idx);
1407  }
1408  }
1409  }
1410 
1411  return return_val;
1412 }
1413 
1414 template <class Impl>
1415 void
1417 {
1418  // Nothing really needs to be marked when an instruction becomes
1419  // the producer of a register's value, but for convenience a ptr
1420  // to the producing instruction will be placed in the head node of
1421  // the dependency links.
1422  int8_t total_dest_regs = new_inst->numDestRegs();
1423 
1424  for (int dest_reg_idx = 0;
1425  dest_reg_idx < total_dest_regs;
1426  dest_reg_idx++)
1427  {
1428  PhysRegIdPtr dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
1429 
1430  // Some registers have fixed mapping, and there is no need to track
1431  // dependencies as these instructions must be executed at commit.
1432  if (dest_reg->isFixedMapping()) {
1433  continue;
1434  }
1435 
1436  if (!dependGraph.empty(dest_reg->flatIndex())) {
1437  dependGraph.dump();
1438  panic("Dependency graph %i (%s) (flat: %i) not empty!",
1439  dest_reg->index(), dest_reg->className(),
1440  dest_reg->flatIndex());
1441  }
1442 
1443  dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1444 
1445  // Mark the scoreboard to say it's not yet ready.
1446  regScoreboard[dest_reg->flatIndex()] = false;
1447  }
1448 }
1449 
1450 template <class Impl>
1451 void
1453 {
1454  // If the instruction now has all of its source registers
1455  // available, then add it to the list of ready instructions.
1456  if (inst->readyToIssue()) {
1457 
1458  //Add the instruction to the proper ready list.
1459  if (inst->isMemRef()) {
1460 
1461  DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1462 
1463  // Message to the mem dependence unit that this instruction has
1464  // its registers ready.
1465  memDepUnit[inst->threadNumber].regsReady(inst);
1466 
1467  return;
1468  }
1469 
1470  OpClass op_class = inst->opClass();
1471 
1472  DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1473  "the ready list, PC %s opclass:%i [sn:%llu].\n",
1474  inst->pcState(), op_class, inst->seqNum);
1475 
1476  readyInsts[op_class].push(inst);
1477 
1478  // Will need to reorder the list if either a queue is not on the list,
1479  // or it has an older instruction than last time.
1480  if (!queueOnList[op_class]) {
1481  addToOrderList(op_class);
1482  } else if (readyInsts[op_class].top()->seqNum <
1483  (*readyIt[op_class]).oldestInst) {
1484  listOrder.erase(readyIt[op_class]);
1485  addToOrderList(op_class);
1486  }
1487  }
1488 }
1489 
1490 template <class Impl>
1491 int
1493 {
1494  return numEntries - freeEntries;
1495 }
1496 
1497 template <class Impl>
1498 void
1500 {
1501  for (int i = 0; i < Num_OpClasses; ++i) {
1502  cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1503 
1504  cprintf("\n");
1505  }
1506 
1507  cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1508 
1509  NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1510  NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1511 
1512  cprintf("Non speculative list: ");
1513 
1514  while (non_spec_it != non_spec_end_it) {
1515  cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(),
1516  (*non_spec_it).second->seqNum);
1517  ++non_spec_it;
1518  }
1519 
1520  cprintf("\n");
1521 
1522  ListOrderIt list_order_it = listOrder.begin();
1523  ListOrderIt list_order_end_it = listOrder.end();
1524  int i = 1;
1525 
1526  cprintf("List order: ");
1527 
1528  while (list_order_it != list_order_end_it) {
1529  cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType,
1530  (*list_order_it).oldestInst);
1531 
1532  ++list_order_it;
1533  ++i;
1534  }
1535 
1536  cprintf("\n");
1537 }
1538 
1539 
1540 template <class Impl>
1541 void
1543 {
1544  for (ThreadID tid = 0; tid < numThreads; ++tid) {
1545  int num = 0;
1546  int valid_num = 0;
1547  ListIt inst_list_it = instList[tid].begin();
1548 
1549  while (inst_list_it != instList[tid].end()) {
1550  cprintf("Instruction:%i\n", num);
1551  if (!(*inst_list_it)->isSquashed()) {
1552  if (!(*inst_list_it)->isIssued()) {
1553  ++valid_num;
1554  cprintf("Count:%i\n", valid_num);
1555  } else if ((*inst_list_it)->isMemRef() &&
1556  !(*inst_list_it)->memOpDone()) {
1557  // Loads that have not been marked as executed
1558  // still count towards the total instructions.
1559  ++valid_num;
1560  cprintf("Count:%i\n", valid_num);
1561  }
1562  }
1563 
1564  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1565  "Issued:%i\nSquashed:%i\n",
1566  (*inst_list_it)->pcState(),
1567  (*inst_list_it)->seqNum,
1568  (*inst_list_it)->threadNumber,
1569  (*inst_list_it)->isIssued(),
1570  (*inst_list_it)->isSquashed());
1571 
1572  if ((*inst_list_it)->isMemRef()) {
1573  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1574  }
1575 
1576  cprintf("\n");
1577 
1578  inst_list_it++;
1579  ++num;
1580  }
1581  }
1582 
1583  cprintf("Insts to Execute list:\n");
1584 
1585  int num = 0;
1586  int valid_num = 0;
1587  ListIt inst_list_it = instsToExecute.begin();
1588 
1589  while (inst_list_it != instsToExecute.end())
1590  {
1591  cprintf("Instruction:%i\n",
1592  num);
1593  if (!(*inst_list_it)->isSquashed()) {
1594  if (!(*inst_list_it)->isIssued()) {
1595  ++valid_num;
1596  cprintf("Count:%i\n", valid_num);
1597  } else if ((*inst_list_it)->isMemRef() &&
1598  !(*inst_list_it)->memOpDone()) {
1599  // Loads that have not been marked as executed
1600  // still count towards the total instructions.
1601  ++valid_num;
1602  cprintf("Count:%i\n", valid_num);
1603  }
1604  }
1605 
1606  cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n"
1607  "Issued:%i\nSquashed:%i\n",
1608  (*inst_list_it)->pcState(),
1609  (*inst_list_it)->seqNum,
1610  (*inst_list_it)->threadNumber,
1611  (*inst_list_it)->isIssued(),
1612  (*inst_list_it)->isSquashed());
1613 
1614  if ((*inst_list_it)->isMemRef()) {
1615  cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1616  }
1617 
1618  cprintf("\n");
1619 
1620  inst_list_it++;
1621  ++num;
1622  }
1623 }
1624 
1625 #endif//__CPU_O3_INST_QUEUE_IMPL_HH__
Stats::Scalar iqMiscInstsIssued
Stat for number of miscellaneous instructions issued.
Definition: inst_queue.hh:494
int wbOutstanding
Number of instructions currently in flight to FUs.
Definition: inst_queue.hh:435
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
#define DPRINTF(x,...)
Definition: trace.hh:229
void regStats()
Registers statistics.
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition: info.hh:53
std::list< ThreadID > * activeThreads
Pointer to list of active threads.
Definition: inst_queue.hh:414
Stats::Scalar iqFloatInstsIssued
Stat for number of floating point instructions issued.
Definition: inst_queue.hh:488
void completed(const DynInstPtr &inst)
Completes a memory instruction.
void replayMemInst(const DynInstPtr &replay_inst)
Replays a memory instruction.
Stats::Scalar vecInstQueueReads
Definition: inst_queue.hh:541
Derived & init(size_type _x, size_type _y)
Definition: statistics.hh:1288
decltype(nullptr) constexpr NoFault
Definition: types.hh:245
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation...
Definition: statistics.hh:379
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
void insertNonSpec(const DynInstPtr &new_inst)
Inserts a new, non-speculative instruction into the IQ.
Stats::Scalar fpAluAccesses
Definition: inst_queue.hh:546
void doSquash(ThreadID tid)
Does the actual squashing.
void scheduleReadyInsts()
Schedules ready instructions, adding the ready ones (oldest first) to the queue to execute...
int getNumPinnedWritesToComplete() const
Definition: reg_class.hh:338
Definition: test.h:61
Stats::Scalar intAluAccesses
Definition: inst_queue.hh:545
Bitfield< 7 > i
void violation(const DynInstPtr &store_inst, const DynInstPtr &violating_load)
Indicates an ordering violation between a store and a younger load.
Impl::CPUPol::IEW IEW
Definition: inst_queue.hh:90
Stats::Scalar iqInstsAdded
Stat for number of instructions added.
Definition: inst_queue.hh:480
InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
Constructs an IQ.
bool queueOnList[Num_OpClasses]
Tracks if each ready queue is on the age order list.
Definition: inst_queue.hh:385
Stats::Scalar fpInstQueueWakeupAccesses
Definition: inst_queue.hh:540
std::list< DynInstPtr > instsToExecute
List of instructions that are ready to be executed.
Definition: inst_queue.hh:319
void regStats()
Registers statistics.
void dumpInsts()
Debugging function to dump out all instructions that are in the IQ.
void replay()
Replays all instructions that have been rescheduled by moving them to the ready list.
InstSeqNum squashedSeqNum[Impl::MaxThreads]
The sequence number of the squashed instruction.
Definition: inst_queue.hh:443
Impl::DynInstPtr DynInstPtr
Definition: inst_queue.hh:88
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load)
Indicates an ordering violation between a store and a load.
void scheduleNonSpec(const InstSeqNum &inst)
Schedules a single specific non-speculative instruction.
DynInstPtr getInstToExecute()
Returns the oldest scheduled instruction, and removes it from the list of instructions waiting to exe...
Cycles getOpLatency(OpClass capability)
Returns the operation execution latency of the given capability.
Definition: fu_pool.hh:164
Stats::Scalar iqInstsIssued
Definition: inst_queue.hh:484
FU completion event class.
Definition: inst_queue.hh:99
Stats::Scalar iqMemInstsIssued
Stat for number of memory instructions issued.
Definition: inst_queue.hh:492
void freeUnitNextCycle(int fu_idx)
Frees a FU at the end of this cycle.
Definition: fu_pool.cc:188
void addToOrderList(OpClass op_class)
Add an op class to the age order list.
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets active threads list.
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a memory or write barrier into the IQ to make sure loads and stores are ordered properly...
void setIQ(InstructionQueue< Impl > *iq_ptr)
Sets the pointer to the IQ.
unsigned numEntries
The number of entries in the instruction queue.
Definition: inst_queue.hh:426
static constexpr auto NoCapableFU
Definition: fu_pool.hh:137
std::list< DynInstPtr > instList[Impl::MaxThreads]
List of all the instructions in the IQ (some of which may be issued).
Definition: inst_queue.hh:316
bool addToDependents(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a consumer.
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:336
void decrNumPinnedWritesToComplete()
Definition: reg_class.hh:348
DynInstPtr inst
Executing instruction.
Definition: inst_queue.hh:102
void wakeDependents(const DynInstPtr &inst)
Wakes any dependents of a memory instruction.
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1152
void rescheduleMemInst(const DynInstPtr &resched_inst)
Reschedules a memory instruction.
void setTimeBuffer(TimeBuffer< TimeStruct > *tb_ptr)
Sets the global time buffer.
std::vector< bool > regScoreboard
A cache of the recently woken registers.
Definition: inst_queue.hh:451
bool isPinned() const
Definition: reg_class.hh:336
Stats::Scalar fpInstQueueWrites
Definition: inst_queue.hh:539
void nonSpecInstReady(const DynInstPtr &inst)
Indicate that a non-speculative instruction is ready.
Impl::O3CPU O3CPU
Definition: inst_queue.hh:87
std::list< DynInstPtr > blockedMemInsts
List of instructions that have been cache blocked.
Definition: inst_queue.hh:327
bool isFull()
Returns whether or not the IQ is full.
unsigned numFreeEntries()
Returns total number of free entries.
Tick curTick()
The current simulated tick.
Definition: core.hh:47
int fuIdx
Index of the FU used for executing.
Definition: inst_queue.hh:105
bool hasReadyInsts()
Returns if there are any ready instructions in the IQ.
Stats::Scalar intInstQueueWakeupAccesses
Definition: inst_queue.hh:537
Stats::Scalar iqSquashedNonSpecRemoved
Stat for number of non-speculative instructions removed due to a squash.
Definition: inst_queue.hh:505
void insertNonSpec(const DynInstPtr &inst)
Inserts a non-speculative memory instruction.
void issue(const DynInstPtr &inst)
Issues the given instruction.
Derived & prereq(const Stat &prereq)
Set the prerequisite stat and marks this stat to print at the end of simulation.
Definition: statistics.hh:350
TimeBuffer< IssueStruct > * issueToExecuteQueue
The queue to the execute stage.
Definition: inst_queue.hh:300
int wakeDependents(const DynInstPtr &completed_inst)
Wakes all dependents of a completed instruction.
IEW * iewStage
Pointer to IEW stage.
Definition: inst_queue.hh:290
Stats::Scalar intInstQueueWrites
Definition: inst_queue.hh:536
void addReadyMemInst(const DynInstPtr &ready_inst)
Adds a ready memory instruction to the ready list.
ReadyInstQueue readyInsts[Num_OpClasses]
List of ready instructions, per op class.
Definition: inst_queue.hh:354
void regsReady(const DynInstPtr &inst)
Indicate that an instruction has its registers ready.
void commit(const InstSeqNum &inst, ThreadID tid=0)
Commits all instructions up to and including the given sequence number, for a specific thread...
uint64_t InstSeqNum
Definition: inst_seq.hh:40
std::map< InstSeqNum, DynInstPtr > nonSpecInsts
List of non-speculative instructions that will be scheduled once the IQ gets a signal from commit...
Definition: inst_queue.hh:363
std::list< ListOrderEntry > listOrder
List that contains the age order of the oldest instruction of each ready queue.
Definition: inst_queue.hh:380
void addIfReady(const DynInstPtr &inst)
Moves an instruction to the ready queue if it is ready.
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2609
Stats::Scalar iqBranchInstsIssued
Stat for number of branch instructions issued.
Definition: inst_queue.hh:490
STL list class.
Definition: stl.hh:54
static scfx_rep_node * list
Definition: scfx_rep.cc:336
virtual const char * description() const
Return a C string describing the event.
unsigned count[Impl::MaxThreads]
Per Thread IQ count.
Definition: inst_queue.hh:417
void takeOverFrom()
Takes over execution from another CPU&#39;s thread.
TimeBuffer< TimeStruct > * timeBuffer
The backwards time buffer.
Definition: inst_queue.hh:303
Cycles commitToIEWDelay
Delay between commit stage and the IQ.
Definition: inst_queue.hh:440
Stats::Scalar iqSquashedInstsIssued
Stat for number of squashed instructions that were ready to issue.
Definition: inst_queue.hh:496
Stats::Formula fuBusyRate
Number of times the FU was busy per instruction issued.
Definition: inst_queue.hh:533
std::list< ListOrderEntry >::iterator ListOrderIt
Definition: inst_queue.hh:382
Stats::Scalar vecAluAccesses
Definition: inst_queue.hh:547
void reschedule(const DynInstPtr &inst)
Reschedules an instruction to be re-executed.
std::string name() const
Returns the name of the IQ.
const FlagsType total
Print the total.
Definition: info.hh:51
std::list< DynInstPtr > retryMemInsts
List of instructions that were cache blocked, but a retry has been seen since, so they can now be ret...
Definition: inst_queue.hh:332
TimeBuffer< TimeStruct >::wire fromCommit
Wire to read information from timebuffer.
Definition: inst_queue.hh:306
void insert(const DynInstPtr &inst)
Inserts a memory instruction.
void deferMemInst(const DynInstPtr &deferred_inst)
Defers a memory instruction when its DTB translation incurs a hw page table walk. ...
DynInstPtr getBlockedMemInstToExecute()
Gets a memory instruction that was blocked on the cache.
void resetState()
Resets all instruction queue state.
Physical register ID.
Definition: reg_class.hh:229
~InstructionQueue()
Destructs the IQ.
DynInstPtr getDeferredMemInstToExecute()
Gets a memory instruction that was referred due to a delayed DTB translation if it is now ready to ex...
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
void drainSanityCheck() const
Perform sanity checks after a drain.
int16_t ThreadID
Thread index/ID type.
Definition: types.hh:227
void squash(ThreadID tid)
Squashes instructions for a thread.
Stats::Vector2d statIssuedInstType
Stat for total number issued for each instruction type.
Definition: inst_queue.hh:525
void init(DerivO3CPUParams *params, ThreadID tid)
Initializes the unit with parameters and a thread id.
SMTQueuePolicy iqPolicy
IQ sharing policy for SMT.
Definition: inst_queue.hh:408
DependencyGraph< DynInstPtr > dependGraph
Definition: inst_queue.hh:401
int countInsts()
Debugging function to count how many entries are in the IQ.
const PhysRegIndex & flatIndex() const
Flat index accessor.
Definition: reg_class.hh:311
bool freeFU
Should the FU be added to the list to be freed upon completing this event.
Definition: inst_queue.hh:113
Stats::Scalar iqSquashedOperandsExamined
Stat for number of squashed instruction operands examined when squashing.
Definition: inst_queue.hh:502
void blockMemInst(const DynInstPtr &blocked_inst)
Defers a memory instruction when it is cache blocked.
void resetEntries()
Resets max entries for all threads.
Stats::Scalar fpInstQueueReads
Definition: inst_queue.hh:538
Stats::Scalar vecInstQueueWrites
Definition: inst_queue.hh:542
static const OpClass Num_OpClasses
Definition: op_class.hh:107
Definition: eventq.hh:189
void completeMemInst(const DynInstPtr &completed_inst)
Completes a memory operation.
std::map< InstSeqNum, DynInstPtr >::iterator NonSpecMapIt
Definition: inst_queue.hh:365
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squashes all instructions up until a given sequence number for a specific thread. ...
int entryAmount(ThreadID num_threads)
Number of entries needed for given amount of threads.
int size
Definition: comm.hh:111
Stats::Scalar iqSquashedInstsExamined
Stat for number of squashed instructions examined when squashing.
Definition: inst_queue.hh:498
O3CPU * cpu
Pointer to the CPU.
Definition: inst_queue.hh:284
ListOrderIt readyIt[Num_OpClasses]
Iterators of each ready queue.
Definition: inst_queue.hh:390
void processFUCompletion(const DynInstPtr &inst, int fu_idx)
Process FU completion event.
Stats::Scalar iqIntInstsIssued
Stat for number of integer instructions issued.
Definition: inst_queue.hh:486
const RegIndex & index() const
Index accessors.
Definition: reg_class.hh:179
Stats::Vector fuBusy
Number of times the FU was busy.
Definition: inst_queue.hh:531
void completeBarrier(const DynInstPtr &inst)
Completes a barrier instruction.
Entry for the list age ordering by op class.
Definition: inst_queue.hh:368
std::list< DynInstPtr >::iterator ListIt
Definition: inst_queue.hh:96
T * access(int idx)
Definition: timebuf.hh:211
wire getWire(int idx)
Definition: timebuf.hh:232
unsigned maxEntries[Impl::MaxThreads]
Max IQ Entries Per Thread.
Definition: inst_queue.hh:420
Stats::Scalar vecInstQueueWakeupAccesses
Definition: inst_queue.hh:543
void setIssueToExecuteQueue(TimeBuffer< IssueStruct > *i2eQueue)
Sets the timer buffer between issue and execute.
void cacheUnblocked()
Notify instruction queue that a previous blockage has resolved.
void moveToYoungerInst(ListOrderIt age_order_it)
Called when the oldest instruction has been removed from a ready queue; this places that ready queue ...
Stats::Scalar iqNonSpecInstsAdded
Stat for number of non-speculative instructions added.
Definition: inst_queue.hh:482
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
int getUnit(OpClass capability)
Gets a FU providing the requested capability.
Definition: fu_pool.cc:160
Stats::Formula issueRate
Number of instructions issued per cycle.
Definition: inst_queue.hh:528
unsigned numPhysRegs
The number of physical registers in the CPU.
Definition: inst_queue.hh:432
Stats::Scalar intInstQueueReads
Definition: inst_queue.hh:535
MemDepUnit memDepUnit[Impl::MaxThreads]
The memory dependence unit, which tracks/predicts memory dependences between instructions.
Definition: inst_queue.hh:295
Derived & ysubnames(const char **names)
Definition: statistics.hh:454
constexpr unsigned NumVecElemPerVecReg
Definition: registers.hh:54
void addToProducers(const DynInstPtr &new_inst)
Adds an instruction to the dependency graph, as a producer.
bool isPipelined(OpClass capability)
Returns the issue latency of the given capability.
Definition: fu_pool.hh:169
void insertBarrier(const DynInstPtr &barr_inst)
Inserts a barrier instruction.
FUPool * fuPool
Function unit pool.
Definition: inst_queue.hh:309
const FlagsType dist
Print the distribution.
Definition: info.hh:57
bool isFixedMapping() const
Returns true if this register is always associated to the same architectural register.
Definition: reg_class.hh:305
std::list< DynInstPtr > deferredMemInsts
List of instructions waiting for their DTB translation to complete (hw page table walk in progress)...
Definition: inst_queue.hh:324
bool isDrained() const
Determine if we are drained.
FUCompletion(const DynInstPtr &_inst, int fu_idx, InstructionQueue< Impl > *iq_ptr)
Construct a FU completion event.
InstructionQueue< Impl > * iqPtr
Pointer back to the instruction queue.
Definition: inst_queue.hh:108
A standard instruction queue class.
Definition: inst_queue.hh:83
unsigned totalWidth
The total number of instructions that can be issued in one cycle.
Definition: inst_queue.hh:429
void dumpLists()
Debugging function to dump all the list sizes, as well as print out the list of nonspeculative instru...
Stats::Distribution numIssuedDist
Distribution of number of instructions in the queue.
Definition: inst_queue.hh:513
unsigned freeEntries
Number of free IQ entries left.
Definition: inst_queue.hh:423
Stats::Vector statFuBusy
Distribution of the cycles it takes to issue an instruction.
Definition: inst_queue.hh:522
const char * className() const
Return a const char* with the register class name.
Definition: reg_class.hh:208
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1899
ThreadID numThreads
Number of Total Threads.
Definition: inst_queue.hh:411
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:156
static constexpr auto NoFreeFU
Definition: fu_pool.hh:138
void insert(const DynInstPtr &new_inst)
Inserts a new instruction into the IQ.

Generated on Fri Feb 28 2020 16:26:59 for gem5 by doxygen 1.8.13