gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
schedule_stage.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <unordered_set>
37 
38 #include "debug/GPUSched.hh"
39 #include "debug/GPUVRF.hh"
44 #include "gpu-compute/wavefront.hh"
45 
46 ScheduleStage::ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu,
47  ScoreboardCheckToSchedule &from_scoreboard_check,
48  ScheduleToExecute &to_execute)
49  : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
50  toExecute(to_execute),
51  _name(cu.name() + ".ScheduleStage"),
52  vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
53  scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
54  locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
55 {
56  for (int j = 0; j < cu.numExeUnits(); ++j) {
57  scheduler.emplace_back(p);
58  }
59  wavesInSch.clear();
60  schList.resize(cu.numExeUnits());
61  for (auto &dq : schList) {
62  dq.clear();
63  }
64 }
65 
67 {
68  scheduler.clear();
69  wavesInSch.clear();
70  schList.clear();
71 }
72 
73 void
75 {
76 
78  "Scheduler should have same number of entries as CU's readyList");
79  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
81  }
82 
85 }
86 
87 void
89 {
90  toExecute.reset();
91 
92  // Update readyList
93  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
100  for (auto wIt = fromScoreboardCheck.readyWFs(j).begin();
101  wIt != fromScoreboardCheck.readyWFs(j).end();) {
102  if (wavesInSch.find((*wIt)->wfDynId) != wavesInSch.end()) {
103  *wIt = nullptr;
104  wIt = fromScoreboardCheck.readyWFs(j).erase(wIt);
105  } else {
106  wIt++;
107  }
108  }
109  }
110 
111  // Attempt to add another wave for each EXE type to schList queues
112  // VMEM resources are iterated first, effectively giving priority
113  // to VMEM over VALU for scheduling read of operands to the RFs.
114  // Scalar Memory are iterated after VMEM
115 
116  // Iterate VMEM and SMEM
117  int firstMemUnit = computeUnit.firstMemUnit();
118  int lastMemUnit = computeUnit.lastMemUnit();
119  for (int j = firstMemUnit; j <= lastMemUnit; j++) {
120  int readyListSize = fromScoreboardCheck.readyWFs(j).size();
121  // If no wave is ready to be scheduled on the execution resource
122  // then skip scheduling for this execution resource
123  if (!readyListSize) {
124  stats.rdyListEmpty[j]++;
125  continue;
126  }
128 
129  // Pick a wave and attempt to add it to schList
130  Wavefront *wf = scheduler[j].chooseWave();
131  GPUDynInstPtr &gpu_dyn_inst = wf->instructionBuffer.front();
132  assert(gpu_dyn_inst);
133  if (!addToSchList(j, gpu_dyn_inst)) {
134  // For waves not added to schList, increment count of cycles
135  // this wave spends in SCH stage.
136  wf->stats.schCycles++;
138  } else {
139  if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
140  wf->incLGKMInstsIssued();
141  } else {
142  wf->incVMemInstsIssued();
143  if (gpu_dyn_inst->isFlat()) {
144  wf->incLGKMInstsIssued();
145  }
146  }
147  if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
148  wf->incExpInstsIssued();
149  }
150  }
151  }
152 
153  // Iterate everything else
154  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
155  // skip the VMEM resources
156  if (j >= firstMemUnit && j <= lastMemUnit) {
157  continue;
158  }
159  int readyListSize = fromScoreboardCheck.readyWFs(j).size();
160  // If no wave is ready to be scheduled on the execution resource
161  // then skip scheduling for this execution resource
162  if (!readyListSize) {
163  stats.rdyListEmpty[j]++;
164  continue;
165  }
167 
168  // Pick a wave and attempt to add it to schList
169  Wavefront *wf = scheduler[j].chooseWave();
170  GPUDynInstPtr &gpu_dyn_inst = wf->instructionBuffer.front();
171  assert(gpu_dyn_inst);
172  if (!addToSchList(j, gpu_dyn_inst)) {
173  // For waves not added to schList, increment count of cycles
174  // this wave spends in SCH stage.
175  wf->stats.schCycles++;
177  }
178  }
179 
180  // At this point, the schList queue per EXE type may contain
181  // multiple waves, in order of age (oldest to youngest).
182  // Wave may be in RFBUSY, indicating they are waiting for registers
183  // to be read, or in RFREADY, indicating they are candidates for
184  // the dispatchList and execution
185 
186  // Iterate schList queues and check if any of the waves have finished
187  // reading their operands, moving those waves to RFREADY status
189 
190  // Fill the dispatch list with the oldest wave of each EXE type that
191  // is ready to execute
192  // Wave is picked if status in schList is RFREADY and it passes resource
193  // ready checks similar to those currently in SCB
195 
196  // Resource arbitration on waves in dispatchList
197  // Losing waves are re-inserted to the schList at a location determined
198  // by wave age
199 
200  // Arbitrate access to the VRF->LDS bus
202 
203  // Schedule write operations to the register files
205 
206  // Lastly, reserve resources for waves that are ready to execute.
208 }
209 
210 void
212  const GPUDynInstPtr &gpu_dyn_inst)
213 {
214  toExecute.dispatchTransition(gpu_dyn_inst, unitId, s);
215 }
216 
217 void
219 {
220  toExecute.dispatchTransition(unitId, s);
221 }
222 
223 bool
224 ScheduleStage::schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
225 {
226  assert(gpu_dyn_inst);
227  Wavefront *wf = gpu_dyn_inst->wavefront();
228  bool accessVrfWr = true;
229  if (!gpu_dyn_inst->isScalar()) {
230  accessVrfWr = computeUnit.vrf[wf->simdId]
231  ->canScheduleWriteOperands(wf, gpu_dyn_inst);
232  }
233  bool accessSrfWr = computeUnit.srf[wf->simdId]
234  ->canScheduleWriteOperands(wf, gpu_dyn_inst);
235  bool accessRf = accessVrfWr && accessSrfWr;
236  if (accessRf) {
237  if (!gpu_dyn_inst->isScalar()) {
238  computeUnit.vrf[wf->simdId]->scheduleWriteOperands(wf,
239  gpu_dyn_inst);
240  }
241  computeUnit.srf[wf->simdId]->scheduleWriteOperands(wf, gpu_dyn_inst);
242  return true;
243  } else {
245  if (!accessSrfWr) {
247  }
248  if (!accessVrfWr) {
250  }
251 
252  // Increment stall counts for WF
253  wf->stats.schStalls++;
254  wf->stats.schRfAccessStalls++;
255  }
256  return false;
257 }
258 
259 void
261 {
262  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
263  if (toExecute.dispatchStatus(j) == EMPTY ||
265  continue;
266  }
267 
268  // get the wave on dispatch list and attempt to allocate write
269  // resources in the RFs
270  const GPUDynInstPtr &gpu_dyn_inst = toExecute.readyInst(j);
271  assert(gpu_dyn_inst);
272  Wavefront *wf = gpu_dyn_inst->wavefront();
273  if (!schedRfWrites(j, gpu_dyn_inst)) {
274  reinsertToSchList(j, gpu_dyn_inst);
276  // if this is a flat inst, also transition the LM pipe to empty
277  // Note: since FLAT/LM arbitration occurs before scheduling
278  // destination operands to the RFs, it is possible that a LM
279  // instruction lost arbitration, but would have been able to
280  // pass the RF destination operand check here, and execute
281  // instead of the FLAT.
282  if (wf->instructionBuffer.front()->isFlat()) {
283  assert(toExecute.dispatchStatus(wf->localMem)
284  == SKIP);
286  }
287  }
288  }
289 }
290 
291 bool
292 ScheduleStage::addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
293 {
294  // Attempt to add the wave to the schList if the VRF can support the
295  // wave's next instruction
296  assert(gpu_dyn_inst);
297  Wavefront *wf = gpu_dyn_inst->wavefront();
298  bool accessVrf = true;
299  if (!gpu_dyn_inst->isScalar()) {
300  accessVrf = computeUnit.vrf[wf->simdId]
301  ->canScheduleReadOperands(wf, gpu_dyn_inst);
302  }
303  bool accessSrf = computeUnit.srf[wf->simdId]
304  ->canScheduleReadOperands(wf, gpu_dyn_inst);
305  // If RFs can support instruction, add to schList in RFBUSY state,
306  // place wave in wavesInSch and pipeMap, and schedule Rd/Wr operands
307  // to the VRF
308  bool accessRf = accessVrf && accessSrf;
309  if (accessRf) {
310  DPRINTF(GPUSched, "schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
311  exeType, wf->simdId, wf->wfDynId,
312  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
313 
315  wavesInSch.emplace(wf->wfDynId);
316  schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst, RFBUSY));
317  if (wf->isOldestInstBarrier() && wf->hasBarrier()) {
319  }
320  if (wf->isOldestInstWaitcnt()) {
322  }
323  if (wf->isOldestInstSleep()) {
325  }
326  if (!gpu_dyn_inst->isScalar()) {
327  computeUnit.vrf[wf->simdId]
328  ->scheduleReadOperands(wf, gpu_dyn_inst);
329  }
330  computeUnit.srf[wf->simdId]->scheduleReadOperands(wf, gpu_dyn_inst);
331 
332  DPRINTF(GPUSched, "schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
333  exeType, wf->simdId, wf->wfDynId,
334  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
335  return true;
336  } else {
337  // Number of stall cycles due to RF access denied
339  // Count number of denials due to each reason
340  // Multiple items may contribute to the denied request
341  if (!accessVrf) {
343  }
344  if (!accessSrf) {
346  }
347 
348  // Increment stall counts for WF
349  wf->stats.schStalls++;
350  wf->stats.schRfAccessStalls++;
351  DPRINTF(GPUSched, "schList[%d]: Could not add: "
352  "SIMD[%d] WV[%d]: %d: %s\n",
353  exeType, wf->simdId, wf->wfDynId,
354  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
355  }
356  return false;
357 }
358 
359 void
361  const GPUDynInstPtr &gpu_dyn_inst)
362 {
363  // Insert wave w into schList for specified exeType.
364  // Wave is inserted in age order, with oldest wave being at the
365  // front of the schList
366  assert(gpu_dyn_inst);
367  auto schIter = schList.at(exeType).begin();
368  while (schIter != schList.at(exeType).end()
369  && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
370  schIter++;
371  }
372  schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst, RFREADY));
373 }
374 
375 void
377 {
378  // Check for resource availability in the next cycle
379  scalarMemBusRdy = false;
380  scalarMemIssueRdy = false;
381  // check if there is a SRF->Global Memory bus available and
383  scalarMemBusRdy = true;
384  }
385  // check if we can issue a scalar memory instruction
387  scalarMemIssueRdy = true;
388  }
389 
390  glbMemBusRdy = false;
391  glbMemIssueRdy = false;
392  // check if there is a VRF->Global Memory bus available
394  glbMemBusRdy = true;
395  }
396  // check if we can issue a Global memory instruction
398  glbMemIssueRdy = true;
399  }
400 
401  locMemBusRdy = false;
402  locMemIssueRdy = false;
403  // check if there is a VRF->LDS bus available
405  locMemBusRdy = true;
406  }
407  // check if we can issue a LDS instruction
409  locMemIssueRdy = true;
410  }
411 }
412 
413 bool
415 {
416  assert(gpu_dyn_inst);
417  Wavefront *wf = gpu_dyn_inst->wavefront();
418  vectorAluRdy = false;
419  scalarAluRdy = false;
420  // check for available vector/scalar ALUs in the next cycle
421  if (computeUnit.vectorALUs[wf->simdId].rdy(Cycles(1))) {
422  vectorAluRdy = true;
423  }
424  if (computeUnit.scalarALUs[wf->scalarAlu].rdy(Cycles(1))) {
425  scalarAluRdy = true;
426  }
427 
428  if (gpu_dyn_inst->isNop()) {
429  // S_NOP requires SALU. V_NOP requires VALU.
430  // TODO: Scalar NOP does not require SALU in hardware,
431  // and is executed out of IB directly.
432  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
434  return false;
435  } else if (!gpu_dyn_inst->isScalar() && !vectorAluRdy) {
437  return false;
438  }
439  } else if (gpu_dyn_inst->isEndOfKernel()) {
440  // EndPgm instruction
441  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
443  return false;
444  }
445  } else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
446  || gpu_dyn_inst->isALU()) {
447  // Barrier, Branch, or ALU instruction
448  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
450  return false;
451  } else if (!gpu_dyn_inst->isScalar() && !vectorAluRdy) {
453  return false;
454  }
455  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
456  // Vector Global Memory instruction
457  bool rdy = true;
458  if (!glbMemIssueRdy) {
459  rdy = false;
461  }
462  if (!glbMemBusRdy) {
463  rdy = false;
465  }
466  if (!computeUnit.globalMemoryPipe.coalescerReady(gpu_dyn_inst)) {
467  rdy = false;
469  }
471  rdy = false;
473  }
474  if (!rdy) {
475  return false;
476  }
477  } else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
478  // Scalar Global Memory instruction
479  bool rdy = true;
480  if (!scalarMemIssueRdy) {
481  rdy = false;
483  }
484  if (!scalarMemBusRdy) {
485  rdy = false;
487  }
490  + wf->scalarWrGmReqsInPipe))
491  {
492  rdy = false;
494  }
495  if (!rdy) {
496  return false;
497  }
498  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
499  // Vector Local Memory instruction
500  bool rdy = true;
501  if (!locMemIssueRdy) {
502  rdy = false;
504  }
505  if (!locMemBusRdy) {
506  rdy = false;
508  }
510  isLMReqFIFOWrRdy(wf->rdLmReqsInPipe + wf->wrLmReqsInPipe)) {
511  rdy = false;
513  }
514  if (!rdy) {
515  return false;
516  }
517  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
518  // Vector Flat memory instruction
519  bool rdy = true;
520  if (!glbMemIssueRdy || !locMemIssueRdy) {
521  rdy = false;
523  }
524  if (!glbMemBusRdy || !locMemBusRdy) {
525  rdy = false;
527  }
528  if (!computeUnit.globalMemoryPipe.coalescerReady(gpu_dyn_inst)) {
529  rdy = false;
531  }
533  rdy = false;
535  }
537  isLMReqFIFOWrRdy(wf->rdLmReqsInPipe + wf->wrLmReqsInPipe)) {
538  rdy = false;
540  }
541  if (!rdy) {
542  return false;
543  }
544  } else {
545  panic("%s: unknown instr checked for readiness",
546  gpu_dyn_inst->disassemble());
547  return false;
548  }
550  return true;
551 }
552 
553 void
555 {
556  // update execution resource status
558  // iterate execution resources
559  for (int j = 0; j < computeUnit.numExeUnits(); j++) {
560  assert(toExecute.dispatchStatus(j) == EMPTY);
561 
562  // iterate waves in schList to pick one for dispatch
563  auto schIter = schList.at(j).begin();
564  bool dispatched = false;
565  while (schIter != schList.at(j).end()) {
566  // only attempt to dispatch if status is RFREADY
567  if (schIter->second == RFREADY) {
568  // Check if this wave is ready for dispatch
569  bool dispRdy = dispatchReady(schIter->first);
570  if (!dispatched && dispRdy) {
571  // No other wave has been dispatched for this exe
572  // resource, and this wave is ready. Place this wave
573  // on dispatchList and make it ready for execution
574  // next cycle.
575 
576  // Acquire a coalescer token if it is a global mem
577  // operation.
578  GPUDynInstPtr mp = schIter->first;
579  if (!mp->isMemSync() && !mp->isScalar() &&
580  (mp->isGlobalMem() || mp->isFlat())) {
582  }
583 
584  doDispatchListTransition(j, EXREADY, schIter->first);
585  DPRINTF(GPUSched, "dispatchList[%d]: fillDispatchList: "
586  "EMPTY->EXREADY\n", j);
587  schIter->first = nullptr;
588  schIter = schList.at(j).erase(schIter);
589  dispatched = true;
590  } else {
591  // Either another wave has been dispatched, or this wave
592  // was not ready, so it is stalled this cycle
593  schIter->first->wavefront()->stats.schStalls++;
594  if (!dispRdy) {
595  // not ready for dispatch, increment stall stat
596  schIter->first->wavefront()->stats.schResourceStalls++;
597  }
598  // Examine next wave for this resource
599  schIter++;
600  }
601  } else {
602  // Wave not in RFREADY, try next wave
603  schIter++;
604  }
605  }
606 
607  // Increment stall count if no wave sent to dispatchList for
608  // current execution resource
609  if (!dispatched) {
611  } else {
613  }
614  }
615 }
616 
617 void
619 {
620  // Arbitrate the VRF->GM and VRF->LDS buses for Flat memory ops
621  // Note: a Flat instruction in GFx8 reserves both VRF->Glb memory bus
622  // and a VRF->LDS bus. In GFx9, this is not the case.
623 
624  // iterate the GM pipelines
625  for (int i = 0; i < computeUnit.numVectorGlobalMemUnits; i++) {
626  // get the GM pipe index in the dispatchList
627  int gm_exe_unit = computeUnit.firstMemUnit() + i;
628  // get the wave in the dispatchList
629  GPUDynInstPtr &gpu_dyn_inst
630  = toExecute.readyInst(gm_exe_unit);
631  // If the WF is valid, ready to execute, and the instruction
632  // is a flat access, arbitrate with the WF's assigned LM pipe
633  if (gpu_dyn_inst && toExecute.dispatchStatus(gm_exe_unit)
634  == EXREADY && gpu_dyn_inst->isFlat()) {
635  Wavefront *wf = gpu_dyn_inst->wavefront();
636  // If the associated LM pipe also has a wave selected, block
637  // that wave and let the Flat instruction issue. The WF in the
638  // LM pipe is added back to the schList for consideration next
639  // cycle.
642  .readyInst(wf->localMem));
643  // Increment stall stats for LDS-VRF arbitration
646  ->wavefront()->stats.schLdsArbStalls++;
647  }
648  // With arbitration of LM pipe complete, transition the
649  // LM pipe to SKIP state in the dispatchList to inform EX stage
650  // that a Flat instruction is executing next cycle
651  doDispatchListTransition(wf->localMem, SKIP, gpu_dyn_inst);
652  DPRINTF(GPUSched, "dispatchList[%d]: arbVrfLds: "
653  "EXREADY->SKIP\n", wf->localMem);
654  }
655  }
656 }
657 
658 void
660 {
661  // Iterate the schList queues and check if operand reads
662  // have completed in the RFs. If so, mark the wave as ready for
663  // selection for dispatchList
664  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
665  for (auto &p : schList.at(j)) {
666  const GPUDynInstPtr &gpu_dyn_inst = p.first;
667  assert(gpu_dyn_inst);
668  Wavefront *wf = gpu_dyn_inst->wavefront();
669 
670  // Increment the number of cycles the wave spends in the
671  // SCH stage, since this loop visits every wave in SCH.
672  wf->stats.schCycles++;
673 
674  bool vrfRdy = true;
675  if (!gpu_dyn_inst->isScalar()) {
676  vrfRdy = computeUnit.vrf[wf->simdId]
677  ->operandReadComplete(wf, gpu_dyn_inst);
678  }
679  bool srfRdy = computeUnit.srf[wf->simdId]
680  ->operandReadComplete(wf, gpu_dyn_inst);
681  bool operandsReady = vrfRdy && srfRdy;
682  if (operandsReady) {
683  DPRINTF(GPUSched, "schList[%d]: WV[%d] operands ready for: "
684  "%d: %s\n", j, wf->wfDynId, gpu_dyn_inst->seqNum(),
685  gpu_dyn_inst->disassemble());
686  DPRINTF(GPUSched, "schList[%d]: WV[%d] RFBUSY->RFREADY\n",
687  j, wf->wfDynId);
688  p.second = RFREADY;
689  } else {
690  DPRINTF(GPUSched, "schList[%d]: WV[%d] operands not ready "
691  "for: %d: %s\n", j, wf->wfDynId,
692  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
693 
694  // operands not ready yet, increment SCH stage stats
695  // aggregate to all wavefronts on the CU
696  p.second = RFBUSY;
697 
698  // Increment stall stats
699  wf->stats.schStalls++;
700  wf->stats.schOpdNrdyStalls++;
701 
703  if (!vrfRdy) {
705  }
706  if (!srfRdy) {
708  }
709  }
710  }
711  }
712 }
713 
714 void
716 {
717  std::vector<bool> exeUnitReservations;
718  exeUnitReservations.resize(computeUnit.numExeUnits(), false);
719 
720  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
721  GPUDynInstPtr &gpu_dyn_inst = toExecute.readyInst(j);
722  if (gpu_dyn_inst) {
724  Wavefront *wf = gpu_dyn_inst->wavefront();
725  if (s == EMPTY) {
726  continue;
727  } else if (s == EXREADY) {
728  // Wave is ready for execution
729  std::vector<int> execUnitIds = wf->reserveResources();
730 
731  if (!gpu_dyn_inst->isScalar()) {
732  computeUnit.vrf[wf->simdId]
733  ->dispatchInstruction(gpu_dyn_inst);
734  }
735  computeUnit.srf[wf->simdId]->dispatchInstruction(gpu_dyn_inst);
736 
737  std::stringstream ss;
738  for (auto id : execUnitIds) {
739  ss << id << " ";
740  }
741  DPRINTF(GPUSched, "dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
742  " Reserving ExeRes[ %s]\n",
743  j, wf->simdId, wf->wfDynId, gpu_dyn_inst->seqNum(),
744  gpu_dyn_inst->disassemble(), ss.str());
745  // mark the resources as reserved for this cycle
746  for (auto execUnitId : execUnitIds) {
747  panic_if(exeUnitReservations.at(execUnitId),
748  "Execution unit %d is reserved!!!\n"
749  "SIMD[%d] WV[%d]: %d: %s",
750  execUnitId, wf->simdId, wf->wfDynId,
751  gpu_dyn_inst->seqNum(),
752  gpu_dyn_inst->disassemble());
753  exeUnitReservations.at(execUnitId) = true;
754  }
755 
756  // If wavefront::reserveResources reserved multiple resources,
757  // then we're executing a flat memory instruction. This means
758  // that we've reserved a global and local memory unit. Thus,
759  // we need to mark the latter execution unit as not available.
760  if (execUnitIds.size() > 1) {
761  M5_VAR_USED int lm_exec_unit = wf->localMem;
762  assert(toExecute.dispatchStatus(lm_exec_unit)
763  == SKIP);
764  }
765  } else if (s == SKIP) {
766  // Shared Memory pipe reserved for FLAT instruction.
767  // Verify the GM pipe for this wave is ready to execute
768  // and the wave in the GM pipe is the same as the wave
769  // in the LM pipe
770  M5_VAR_USED int gm_exec_unit = wf->globalMem;
771  assert(wf->wfDynId == toExecute
772  .readyInst(gm_exec_unit)->wfDynId);
773  assert(toExecute.dispatchStatus(gm_exec_unit)
774  == EXREADY);
775  }
776  }
777  }
778 }
779 
780 void
782 {
783  wavesInSch.erase(w->wfDynId);
784 }
785 
787  int num_exec_units)
788  : Stats::Group(parent, "ScheduleStage"),
789  ADD_STAT(rdyListEmpty ,"number of cycles no wave on ready list per "
790  "execution resource"),
791  ADD_STAT(rdyListNotEmpty, "number of cycles one or more wave on ready "
792  "list per execution resource"),
793  ADD_STAT(addToSchListStalls, "number of cycles a wave is not added to "
794  "schList per execution resource when ready list is not empty"),
795  ADD_STAT(schListToDispList, "number of cycles a wave is added to "
796  "dispatchList per execution resource"),
797  ADD_STAT(schListToDispListStalls, "number of cycles no wave is added to"
798  " dispatchList per execution resource"),
799  ADD_STAT(rfAccessStalls, "number of stalls due to RF access denied"),
800  ADD_STAT(ldsBusArbStalls, "number of stalls due to VRF->LDS bus "
801  "conflicts"),
802  ADD_STAT(opdNrdyStalls, "number of stalls in SCH due to operands not "
803  "ready"),
804  ADD_STAT(dispNrdyStalls, "number of stalls in SCH due to resource not "
805  "ready")
806 {
807  rdyListNotEmpty.init(num_exec_units);
808  rdyListEmpty.init(num_exec_units);
809  addToSchListStalls.init(num_exec_units);
810  schListToDispList.init(num_exec_units);
811  schListToDispListStalls.init(num_exec_units);
815 
819 
823  csprintf("VectorMemIssue"));
825  csprintf("VectorMemBusBusy"));
827  csprintf("VectorMemCoalescer"));
830  csprintf("ScalarMemIssue"));
832  csprintf("ScalarMemBusBusy"));
834  csprintf("ScalarMemFIFO"));
836  csprintf("LocalMemIssue"));
838  csprintf("LocalMemBusBusy"));
840  csprintf("LocalMemFIFO"));
842  csprintf("FlatMemIssue"));
844  csprintf("FlatMemBusBusy"));
846  csprintf("FlatMemCoalescer"));
848  csprintf("FlatMemFIFO"));
850 
856 }
ScheduleStage::exec
void exec()
Definition: schedule_stage.cc:88
ScheduleStage::ScheduleStageStats::ScheduleStageStats
ScheduleStageStats(Stats::Group *parent, int num_exec_units)
Definition: schedule_stage.cc:786
ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:244
ScheduleStage::ScheduleStageStats::rfAccessStalls
Stats::Vector rfAccessStalls
Definition: schedule_stage.hh:212
ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:232
ScheduleStage::~ScheduleStage
~ScheduleStage()
Definition: schedule_stage.cc:66
GlobalMemPipeline::acqCoalescerToken
void acqCoalescerToken(GPUDynInstPtr mp)
Definition: global_memory_pipeline.cc:81
ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:222
EMPTY
@ EMPTY
Definition: exec_stage.hh:60
ScheduleStage::SCH_VECTOR_MEM_COALESCER_NRDY
@ SCH_VECTOR_MEM_COALESCER_NRDY
Definition: schedule_stage.hh:78
ScheduleStage::SCH_SRF_WR_ACCESS_NRDY
@ SCH_SRF_WR_ACCESS_NRDY
Definition: schedule_stage.hh:105
ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:284
ScheduleToExecute::reset
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:114
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
Wavefront::globalMem
int globalMem
Definition: wavefront.hh:124
ScheduleStage::SCH_VECTOR_MEM_ISSUE_NRDY
@ SCH_VECTOR_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:76
Wavefront::WavefrontStats::schCycles
Stats::Scalar schCycles
Definition: wavefront.hh:341
ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:244
ScheduleToExecute::readyInst
GPUDynInstPtr & readyInst(int func_unit_id)
Definition: comm.cc:126
compute_unit.hh
gpu_static_inst.hh
ScheduleStage::SCH_SRF_RD_ACCESS_NRDY
@ SCH_SRF_RD_ACCESS_NRDY
Definition: schedule_stage.hh:104
ScheduleStage::SCH_VRF_OPD_NRDY
@ SCH_VRF_OPD_NRDY
Definition: schedule_stage.hh:96
Wavefront::scalarRdGmReqsInPipe
int scalarRdGmReqsInPipe
Definition: wavefront.hh:186
Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:72
ScheduleStage::glbMemBusRdy
bool glbMemBusRdy
Definition: schedule_stage.hh:143
ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:229
ScheduleToExecute::dispatchStatus
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
Definition: comm.cc:149
Wavefront::isOldestInstSleep
bool isOldestInstSleep()
Definition: wavefront.cc:587
ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:224
ScheduleStage::init
void init()
Definition: schedule_stage.cc:74
std::vector< bool >
ScheduleStage::SCH_FLAT_MEM_COALESCER_NRDY
@ SCH_FLAT_MEM_COALESCER_NRDY
Definition: schedule_stage.hh:89
Wavefront::scalarWrGmReqsInPipe
int scalarWrGmReqsInPipe
Definition: wavefront.hh:187
ScheduleStage::deleteFromSch
void deleteFromSch(Wavefront *w)
Definition: schedule_stage.cc:781
ScheduleStage::locMemBusRdy
bool locMemBusRdy
Definition: schedule_stage.hh:145
Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:90
ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:496
Wavefront::stats
Wavefront::WavefrontStats stats
Wavefront::incExpInstsIssued
void incExpInstsIssued()
Definition: wavefront.cc:1335
ScheduleStage::scheduler
std::vector< Scheduler > scheduler
Definition: schedule_stage.hh:127
ScoreboardCheckToSchedule::updateReadyList
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
Definition: comm.cc:88
ScheduleStage::reserveResources
void reserveResources()
Definition: schedule_stage.cc:715
ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:218
wavefront.hh
ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:248
ScheduleStage::SCH_RF_ACCESS_NRDY_CONDITIONS
@ SCH_RF_ACCESS_NRDY_CONDITIONS
Definition: schedule_stage.hh:107
WaitClass::rdy
bool rdy(Cycles cycles=Cycles(0)) const
Definition: misc.hh:90
ScheduleStage::SCH_RF_OPD_NRDY_CONDITIONS
@ SCH_RF_OPD_NRDY_CONDITIONS
Definition: schedule_stage.hh:99
SKIP
@ SKIP
Definition: exec_stage.hh:62
ScheduleStage::schList
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
Definition: schedule_stage.hh:181
Wavefront::setStatus
void setStatus(status_e newStatus)
Definition: wavefront.cc:517
GlobalMemPipeline::outstandingReqsCheck
bool outstandingReqsCheck(GPUDynInstPtr mp) const
Definition: global_memory_pipeline.cc:93
Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition: wavefront.hh:184
ComputeUnit
Definition: compute_unit.hh:200
ScheduleStage::SCH_SCALAR_MEM_ISSUE_NRDY
@ SCH_SCALAR_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:81
ScheduleStage::locMemIssueRdy
bool locMemIssueRdy
Definition: schedule_stage.hh:146
ScheduleStage::reinsertToSchList
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:360
ScheduleStage::SCH_LOCAL_MEM_FIFO_NRDY
@ SCH_LOCAL_MEM_FIFO_NRDY
Definition: schedule_stage.hh:86
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:296
vector_register_file.hh
ScheduleToExecute::dispatchTransition
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
Definition: comm.cc:132
ScheduleStage::ScheduleStageStats::ldsBusArbStalls
Stats::Scalar ldsBusArbStalls
Definition: schedule_stage.hh:217
ScheduleStage::SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:77
ScheduleStage::SCH_SCALAR_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:82
ArmISA::ss
Bitfield< 21 > ss
Definition: miscregs_types.hh:56
ScheduleStage::SCH_RF_ACCESS_NRDY
@ SCH_RF_ACCESS_NRDY
Definition: schedule_stage.hh:106
ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:226
ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:97
MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:278
Wavefront::localMem
int localMem
Definition: wavefront.hh:125
Wavefront::WavefrontStats::schRfAccessStalls
Stats::Scalar schRfAccessStalls
Definition: wavefront.hh:351
Wavefront::isOldestInstWaitcnt
bool isOldestInstWaitcnt()
Definition: wavefront.cc:601
ScalarMemPipeline::isGMReqFIFOWrRdy
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
Definition: scalar_memory_pipeline.hh:80
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:71
ScheduleStage::SCH_CEDE_SIMD_NRDY
@ SCH_CEDE_SIMD_NRDY
Definition: schedule_stage.hh:80
ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:294
ScheduleStage::SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:85
Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:86
ScheduleStage::glbMemIssueRdy
bool glbMemIssueRdy
Definition: schedule_stage.hh:144
ScheduleStage::ScheduleStage
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
Definition: schedule_stage.cc:46
scalar_register_file.hh
ScheduleStage::SCH_VECTOR_MEM_REQS_NRDY
@ SCH_VECTOR_MEM_REQS_NRDY
Definition: schedule_stage.hh:79
Wavefront::WavefrontStats::schOpdNrdyStalls
Stats::Scalar schOpdNrdyStalls
Definition: wavefront.hh:355
ScheduleStage::SCH_FLAT_MEM_ISSUE_NRDY
@ SCH_FLAT_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:87
ScheduleStage::scalarMemIssueRdy
bool scalarMemIssueRdy
Definition: schedule_stage.hh:142
ScheduleStage::SCH_FLAT_MEM_BUS_BUSY_NRDY
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:88
ScheduleStage::SCH_RF_OPD_NRDY
@ SCH_RF_OPD_NRDY
Definition: schedule_stage.hh:98
ScheduleStage::SCH_FLAT_MEM_REQS_NRDY
@ SCH_FLAT_MEM_REQS_NRDY
Definition: schedule_stage.hh:90
ScheduleStage::SCH_NRDY_CONDITIONS
@ SCH_NRDY_CONDITIONS
Definition: schedule_stage.hh:93
ScheduleStage::computeUnit
ComputeUnit & computeUnit
Definition: schedule_stage.hh:121
ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:230
ScheduleStage::scheduleRfDestOperands
void scheduleRfDestOperands()
Definition: schedule_stage.cc:260
ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:240
ScheduleStage::toExecute
ScheduleToExecute & toExecute
Definition: schedule_stage.hh:123
ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:237
ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:285
Wavefront::simdId
const int simdId
Definition: wavefront.hh:97
ScheduleStage::SCH_FLAT_MEM_FIFO_NRDY
@ SCH_FLAT_MEM_FIFO_NRDY
Definition: schedule_stage.hh:91
ScheduleStage::ScheduleStageStats::rdyListNotEmpty
Stats::Vector rdyListNotEmpty
Definition: schedule_stage.hh:191
ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:238
ScheduleStage::fromScoreboardCheck
ScoreboardCheckToSchedule & fromScoreboardCheck
Definition: schedule_stage.hh:122
ScheduleStage::vectorAluRdy
bool vectorAluRdy
Definition: schedule_stage.hh:139
Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition: wavefront.hh:182
EXREADY
@ EXREADY
Definition: exec_stage.hh:61
name
const std::string & name()
Definition: trace.cc:48
Wavefront::hasBarrier
bool hasBarrier() const
Definition: wavefront.cc:1430
Stats::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1028
ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:283
schedule_stage.hh
ScheduleStage::scalarMemBusRdy
bool scalarMemBusRdy
Definition: schedule_stage.hh:141
ScheduleStage::scalarAluRdy
bool scalarAluRdy
Definition: schedule_stage.hh:140
ScheduleStage::SCH_VRF_WR_ACCESS_NRDY
@ SCH_VRF_WR_ACCESS_NRDY
Definition: schedule_stage.hh:103
Wavefront::WavefrontStats::schStalls
Stats::Scalar schStalls
Definition: wavefront.hh:344
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
ScheduleStage::SCH_SCALAR_MEM_FIFO_NRDY
@ SCH_SCALAR_MEM_FIFO_NRDY
Definition: schedule_stage.hh:83
Wavefront::isOldestInstBarrier
bool isOldestInstBarrier()
Definition: wavefront.cc:648
ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:61
ScheduleStage::RFREADY
@ RFREADY
Definition: schedule_stage.hh:117
ScheduleStage::dispatchReady
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:414
ScheduleStage::SCH_SCALAR_ALU_NRDY
@ SCH_SCALAR_ALU_NRDY
Definition: schedule_stage.hh:74
ScheduleStage::ScheduleStageStats::opdNrdyStalls
Stats::Vector opdNrdyStalls
Definition: schedule_stage.hh:221
ScheduleStage::RFBUSY
@ RFBUSY
Definition: schedule_stage.hh:116
Wavefront
Definition: wavefront.hh:59
ScheduleStage::checkMemResources
void checkMemResources()
Definition: schedule_stage.cc:376
ScheduleStage::ScheduleStageStats::schListToDispList
Stats::Vector schListToDispList
Definition: schedule_stage.hh:201
Stats::Group
Statistics container.
Definition: group.hh:87
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
ScoreboardCheckToSchedule::readyWFs
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
Definition: comm.cc:78
ScheduleStage::addToSchList
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:292
ScheduleStage::ScheduleStageStats::schListToDispListStalls
Stats::Vector schListToDispListStalls
Definition: schedule_stage.hh:205
MipsISA::dq
Bitfield< 2 > dq
Definition: dt_constants.hh:128
Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:107
Stats::DataWrapVec::subname
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:383
ScoreboardCheckToSchedule::numReadyLists
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
Definition: comm.cc:72
Stats
Definition: statistics.cc:53
ScheduleStage::SCH_LOCAL_MEM_ISSUE_NRDY
@ SCH_LOCAL_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:84
ScheduleStage::wavesInSch
std::unordered_set< uint64_t > wavesInSch
Definition: schedule_stage.hh:170
Wavefront::incVMemInstsIssued
void incVMemInstsIssued()
Definition: wavefront.cc:1329
ScheduleStage::ScheduleStageStats::addToSchListStalls
Stats::Vector addToSchListStalls
Definition: schedule_stage.hh:196
Wavefront::scalarAlu
int scalarAlu
Definition: wavefront.hh:119
ArmISA::mp
Bitfield< 11 > mp
Definition: miscregs_types.hh:762
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
ArmISA::s
Bitfield< 4 > s
Definition: miscregs_types.hh:556
ScheduleStage::SCH_VRF_RD_ACCESS_NRDY
@ SCH_VRF_RD_ACCESS_NRDY
Definition: schedule_stage.hh:102
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:219
ScheduleStage::checkRfOperandReadComplete
void checkRfOperandReadComplete()
Definition: schedule_stage.cc:659
ScheduleStage::ScheduleStageStats::rdyListEmpty
Stats::Vector rdyListEmpty
Definition: schedule_stage.hh:190
DISPATCH_STATUS
DISPATCH_STATUS
Definition: exec_stage.hh:58
ScheduleStage::fillDispatchList
void fillDispatchList()
Definition: schedule_stage.cc:554
GlobalMemPipeline::coalescerReady
bool coalescerReady(GPUDynInstPtr mp) const
Definition: global_memory_pipeline.cc:62
ScheduleStage::schedRfWrites
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:224
ScheduleStage::stats
ScheduleStage::ScheduleStageStats stats
csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
ScheduleStage::arbitrateVrfToLdsBus
void arbitrateVrfToLdsBus()
Definition: schedule_stage.cc:618
ScheduleStage::doDispatchListTransition
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:211
ScheduleStage::SCH_VECTOR_ALU_NRDY
@ SCH_VECTOR_ALU_NRDY
Definition: schedule_stage.hh:75
ScheduleStage::SCH_RDY
@ SCH_RDY
Definition: schedule_stage.hh:92
Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:224
Wavefront::reserveResources
std::vector< int > reserveResources()
Definition: wavefront.cc:807
ScheduleStage::ScheduleStageStats::dispNrdyStalls
Stats::Vector dispNrdyStalls
Definition: schedule_stage.hh:226
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
Wavefront::incLGKMInstsIssued
void incLGKMInstsIssued()
Definition: wavefront.cc:1341
ScheduleStage::SCH_SRF_OPD_NRDY
@ SCH_SRF_OPD_NRDY
Definition: schedule_stage.hh:97

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17