gem5  v21.1.0.2
schedule_stage.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <unordered_set>
37 
38 #include "base/compiler.hh"
39 #include "debug/GPUSched.hh"
40 #include "debug/GPUVRF.hh"
45 #include "gpu-compute/wavefront.hh"
46 
47 namespace gem5
48 {
49 
50 ScheduleStage::ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu,
51  ScoreboardCheckToSchedule &from_scoreboard_check,
52  ScheduleToExecute &to_execute)
53  : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
54  toExecute(to_execute),
55  _name(cu.name() + ".ScheduleStage"),
56  vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
57  scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
58  locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
59 {
60  for (int j = 0; j < cu.numExeUnits(); ++j) {
61  scheduler.emplace_back(p);
62  }
63  wavesInSch.clear();
64  schList.resize(cu.numExeUnits());
65  for (auto &dq : schList) {
66  dq.clear();
67  }
68 }
69 
71 {
72  scheduler.clear();
73  wavesInSch.clear();
74  schList.clear();
75 }
76 
77 void
79 {
80 
82  "Scheduler should have same number of entries as CU's readyList");
83  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
85  }
86 
89 }
90 
91 void
93 {
94  toExecute.reset();
95 
96  // Update readyList
97  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
104  for (auto wIt = fromScoreboardCheck.readyWFs(j).begin();
105  wIt != fromScoreboardCheck.readyWFs(j).end();) {
106  if (wavesInSch.find((*wIt)->wfDynId) != wavesInSch.end()) {
107  *wIt = nullptr;
108  wIt = fromScoreboardCheck.readyWFs(j).erase(wIt);
109  } else {
110  wIt++;
111  }
112  }
113  }
114 
115  // Attempt to add another wave for each EXE type to schList queues
116  // VMEM resources are iterated first, effectively giving priority
117  // to VMEM over VALU for scheduling read of operands to the RFs.
118  // Scalar Memory are iterated after VMEM
119 
120  // Iterate VMEM and SMEM
121  int firstMemUnit = computeUnit.firstMemUnit();
122  int lastMemUnit = computeUnit.lastMemUnit();
123  for (int j = firstMemUnit; j <= lastMemUnit; j++) {
124  int readyListSize = fromScoreboardCheck.readyWFs(j).size();
125  // If no wave is ready to be scheduled on the execution resource
126  // then skip scheduling for this execution resource
127  if (!readyListSize) {
128  stats.rdyListEmpty[j]++;
129  continue;
130  }
132 
133  // Pick a wave and attempt to add it to schList
134  Wavefront *wf = scheduler[j].chooseWave();
135  GPUDynInstPtr &gpu_dyn_inst = wf->instructionBuffer.front();
136  assert(gpu_dyn_inst);
137  if (!addToSchList(j, gpu_dyn_inst)) {
138  // For waves not added to schList, increment count of cycles
139  // this wave spends in SCH stage.
140  wf->stats.schCycles++;
142  } else {
143  if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
144  wf->incLGKMInstsIssued();
145  } else {
146  wf->incVMemInstsIssued();
147  if (gpu_dyn_inst->isFlat()) {
148  wf->incLGKMInstsIssued();
149  }
150  }
151  if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
152  wf->incExpInstsIssued();
153  }
154  }
155  }
156 
157  // Iterate everything else
158  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
159  // skip the VMEM resources
160  if (j >= firstMemUnit && j <= lastMemUnit) {
161  continue;
162  }
163  int readyListSize = fromScoreboardCheck.readyWFs(j).size();
164  // If no wave is ready to be scheduled on the execution resource
165  // then skip scheduling for this execution resource
166  if (!readyListSize) {
167  stats.rdyListEmpty[j]++;
168  continue;
169  }
171 
172  // Pick a wave and attempt to add it to schList
173  Wavefront *wf = scheduler[j].chooseWave();
174  GPUDynInstPtr &gpu_dyn_inst = wf->instructionBuffer.front();
175  assert(gpu_dyn_inst);
176  if (!addToSchList(j, gpu_dyn_inst)) {
177  // For waves not added to schList, increment count of cycles
178  // this wave spends in SCH stage.
179  wf->stats.schCycles++;
181  }
182  }
183 
184  // At this point, the schList queue per EXE type may contain
185  // multiple waves, in order of age (oldest to youngest).
186  // Wave may be in RFBUSY, indicating they are waiting for registers
187  // to be read, or in RFREADY, indicating they are candidates for
188  // the dispatchList and execution
189 
190  // Iterate schList queues and check if any of the waves have finished
191  // reading their operands, moving those waves to RFREADY status
193 
194  // Fill the dispatch list with the oldest wave of each EXE type that
195  // is ready to execute
196  // Wave is picked if status in schList is RFREADY and it passes resource
197  // ready checks similar to those currently in SCB
199 
200  // Resource arbitration on waves in dispatchList
201  // Losing waves are re-inserted to the schList at a location determined
202  // by wave age
203 
204  // Arbitrate access to the VRF->LDS bus
206 
207  // Schedule write operations to the register files
209 
210  // Lastly, reserve resources for waves that are ready to execute.
212 }
213 
214 void
216  const GPUDynInstPtr &gpu_dyn_inst)
217 {
218  toExecute.dispatchTransition(gpu_dyn_inst, unitId, s);
219 }
220 
221 void
223 {
224  toExecute.dispatchTransition(unitId, s);
225 }
226 
227 bool
228 ScheduleStage::schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
229 {
230  assert(gpu_dyn_inst);
231  Wavefront *wf = gpu_dyn_inst->wavefront();
232  bool accessVrfWr = true;
233  if (!gpu_dyn_inst->isScalar()) {
234  accessVrfWr = computeUnit.vrf[wf->simdId]
235  ->canScheduleWriteOperands(wf, gpu_dyn_inst);
236  }
237  bool accessSrfWr = computeUnit.srf[wf->simdId]
238  ->canScheduleWriteOperands(wf, gpu_dyn_inst);
239  bool accessRf = accessVrfWr && accessSrfWr;
240  if (accessRf) {
241  if (!gpu_dyn_inst->isScalar()) {
242  computeUnit.vrf[wf->simdId]->scheduleWriteOperands(wf,
243  gpu_dyn_inst);
244  }
245  computeUnit.srf[wf->simdId]->scheduleWriteOperands(wf, gpu_dyn_inst);
246  return true;
247  } else {
249  if (!accessSrfWr) {
251  }
252  if (!accessVrfWr) {
254  }
255 
256  // Increment stall counts for WF
257  wf->stats.schStalls++;
258  wf->stats.schRfAccessStalls++;
259  }
260  return false;
261 }
262 
263 void
265 {
266  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
267  if (toExecute.dispatchStatus(j) == EMPTY ||
269  continue;
270  }
271 
272  // get the wave on dispatch list and attempt to allocate write
273  // resources in the RFs
274  const GPUDynInstPtr &gpu_dyn_inst = toExecute.readyInst(j);
275  assert(gpu_dyn_inst);
276  Wavefront *wf = gpu_dyn_inst->wavefront();
277  if (!schedRfWrites(j, gpu_dyn_inst)) {
278  reinsertToSchList(j, gpu_dyn_inst);
280  // if this is a flat inst, also transition the LM pipe to empty
281  // Note: since FLAT/LM arbitration occurs before scheduling
282  // destination operands to the RFs, it is possible that a LM
283  // instruction lost arbitration, but would have been able to
284  // pass the RF destination operand check here, and execute
285  // instead of the FLAT.
286  if (wf->instructionBuffer.front()->isFlat()) {
287  assert(toExecute.dispatchStatus(wf->localMem)
288  == SKIP);
290  }
291  }
292  }
293 }
294 
295 bool
296 ScheduleStage::addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
297 {
298  // Attempt to add the wave to the schList if the VRF can support the
299  // wave's next instruction
300  assert(gpu_dyn_inst);
301  Wavefront *wf = gpu_dyn_inst->wavefront();
302  bool accessVrf = true;
303  if (!gpu_dyn_inst->isScalar()) {
304  accessVrf = computeUnit.vrf[wf->simdId]
305  ->canScheduleReadOperands(wf, gpu_dyn_inst);
306  }
307  bool accessSrf = computeUnit.srf[wf->simdId]
308  ->canScheduleReadOperands(wf, gpu_dyn_inst);
309  // If RFs can support instruction, add to schList in RFBUSY state,
310  // place wave in wavesInSch and pipeMap, and schedule Rd/Wr operands
311  // to the VRF
312  bool accessRf = accessVrf && accessSrf;
313  if (accessRf) {
314  DPRINTF(GPUSched, "schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
315  exeType, wf->simdId, wf->wfDynId,
316  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
317 
319  wavesInSch.emplace(wf->wfDynId);
320  schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst, RFBUSY));
321  if (wf->isOldestInstBarrier() && wf->hasBarrier()) {
323  }
324  if (wf->isOldestInstWaitcnt()) {
326  }
327  if (wf->isOldestInstSleep()) {
329  }
330  if (!gpu_dyn_inst->isScalar()) {
331  computeUnit.vrf[wf->simdId]
332  ->scheduleReadOperands(wf, gpu_dyn_inst);
333  }
334  computeUnit.srf[wf->simdId]->scheduleReadOperands(wf, gpu_dyn_inst);
335 
336  DPRINTF(GPUSched, "schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
337  exeType, wf->simdId, wf->wfDynId,
338  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
339  return true;
340  } else {
341  // Number of stall cycles due to RF access denied
343  // Count number of denials due to each reason
344  // Multiple items may contribute to the denied request
345  if (!accessVrf) {
347  }
348  if (!accessSrf) {
350  }
351 
352  // Increment stall counts for WF
353  wf->stats.schStalls++;
354  wf->stats.schRfAccessStalls++;
355  DPRINTF(GPUSched, "schList[%d]: Could not add: "
356  "SIMD[%d] WV[%d]: %d: %s\n",
357  exeType, wf->simdId, wf->wfDynId,
358  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
359  }
360  return false;
361 }
362 
363 void
365  const GPUDynInstPtr &gpu_dyn_inst)
366 {
367  // Insert wave w into schList for specified exeType.
368  // Wave is inserted in age order, with oldest wave being at the
369  // front of the schList
370  assert(gpu_dyn_inst);
371  auto schIter = schList.at(exeType).begin();
372  while (schIter != schList.at(exeType).end()
373  && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
374  schIter++;
375  }
376  schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst, RFREADY));
377 }
378 
379 void
381 {
382  // Check for resource availability in the next cycle
383  scalarMemBusRdy = false;
384  scalarMemIssueRdy = false;
385  // check if there is a SRF->Global Memory bus available and
387  scalarMemBusRdy = true;
388  }
389  // check if we can issue a scalar memory instruction
391  scalarMemIssueRdy = true;
392  }
393 
394  glbMemBusRdy = false;
395  glbMemIssueRdy = false;
396  // check if there is a VRF->Global Memory bus available
398  glbMemBusRdy = true;
399  }
400  // check if we can issue a Global memory instruction
402  glbMemIssueRdy = true;
403  }
404 
405  locMemBusRdy = false;
406  locMemIssueRdy = false;
407  // check if there is a VRF->LDS bus available
409  locMemBusRdy = true;
410  }
411  // check if we can issue a LDS instruction
413  locMemIssueRdy = true;
414  }
415 }
416 
417 bool
419 {
420  assert(gpu_dyn_inst);
421  Wavefront *wf = gpu_dyn_inst->wavefront();
422  vectorAluRdy = false;
423  scalarAluRdy = false;
424  // check for available vector/scalar ALUs in the next cycle
425  if (computeUnit.vectorALUs[wf->simdId].rdy(Cycles(1))) {
426  vectorAluRdy = true;
427  }
428  if (computeUnit.scalarALUs[wf->scalarAlu].rdy(Cycles(1))) {
429  scalarAluRdy = true;
430  }
431 
432  if (gpu_dyn_inst->isNop()) {
433  // S_NOP requires SALU. V_NOP requires VALU.
434  // TODO: Scalar NOP does not require SALU in hardware,
435  // and is executed out of IB directly.
436  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
438  return false;
439  } else if (!gpu_dyn_inst->isScalar() && !vectorAluRdy) {
441  return false;
442  }
443  } else if (gpu_dyn_inst->isEndOfKernel()) {
444  // EndPgm instruction
445  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
447  return false;
448  }
449  } else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
450  || gpu_dyn_inst->isALU()) {
451  // Barrier, Branch, or ALU instruction
452  if (gpu_dyn_inst->isScalar() && !scalarAluRdy) {
454  return false;
455  } else if (!gpu_dyn_inst->isScalar() && !vectorAluRdy) {
457  return false;
458  }
459  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
460  // Vector Global Memory instruction
461  bool rdy = true;
462  if (!glbMemIssueRdy) {
463  rdy = false;
465  }
466  if (!glbMemBusRdy) {
467  rdy = false;
469  }
470  if (!computeUnit.globalMemoryPipe.coalescerReady(gpu_dyn_inst)) {
471  rdy = false;
473  }
475  rdy = false;
477  }
478  if (!rdy) {
479  return false;
480  }
481  } else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
482  // Scalar Global Memory instruction
483  bool rdy = true;
484  if (!scalarMemIssueRdy) {
485  rdy = false;
487  }
488  if (!scalarMemBusRdy) {
489  rdy = false;
491  }
494  + wf->scalarWrGmReqsInPipe))
495  {
496  rdy = false;
498  }
499  if (!rdy) {
500  return false;
501  }
502  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
503  // Vector Local Memory instruction
504  bool rdy = true;
505  if (!locMemIssueRdy) {
506  rdy = false;
508  }
509  if (!locMemBusRdy) {
510  rdy = false;
512  }
514  isLMReqFIFOWrRdy(wf->rdLmReqsInPipe + wf->wrLmReqsInPipe)) {
515  rdy = false;
517  }
518  if (!rdy) {
519  return false;
520  }
521  } else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
522  // Vector Flat memory instruction
523  bool rdy = true;
524  if (!glbMemIssueRdy || !locMemIssueRdy) {
525  rdy = false;
527  }
528  if (!glbMemBusRdy || !locMemBusRdy) {
529  rdy = false;
531  }
532  if (!computeUnit.globalMemoryPipe.coalescerReady(gpu_dyn_inst)) {
533  rdy = false;
535  }
537  rdy = false;
539  }
541  isLMReqFIFOWrRdy(wf->rdLmReqsInPipe + wf->wrLmReqsInPipe)) {
542  rdy = false;
544  }
545  if (!rdy) {
546  return false;
547  }
548  } else {
549  panic("%s: unknown instr checked for readiness",
550  gpu_dyn_inst->disassemble());
551  return false;
552  }
554  return true;
555 }
556 
557 void
559 {
560  // update execution resource status
562  // iterate execution resources
563  for (int j = 0; j < computeUnit.numExeUnits(); j++) {
564  assert(toExecute.dispatchStatus(j) == EMPTY);
565 
566  // iterate waves in schList to pick one for dispatch
567  auto schIter = schList.at(j).begin();
568  bool dispatched = false;
569  while (schIter != schList.at(j).end()) {
570  // only attempt to dispatch if status is RFREADY
571  if (schIter->second == RFREADY) {
572  // Check if this wave is ready for dispatch
573  bool dispRdy = dispatchReady(schIter->first);
574  if (!dispatched && dispRdy) {
575  // No other wave has been dispatched for this exe
576  // resource, and this wave is ready. Place this wave
577  // on dispatchList and make it ready for execution
578  // next cycle.
579 
580  // Acquire a coalescer token if it is a global mem
581  // operation.
582  GPUDynInstPtr mp = schIter->first;
583  if (!mp->isMemSync() && !mp->isScalar() &&
584  (mp->isGlobalMem() || mp->isFlat())) {
586  }
587 
588  // Set instruction's exec_mask if it's a mem operation
589  if (mp->isMemRef()) {
590  mp->exec_mask = mp->wavefront()->execMask();
591  }
592 
593  doDispatchListTransition(j, EXREADY, schIter->first);
594  DPRINTF(GPUSched, "dispatchList[%d]: fillDispatchList: "
595  "EMPTY->EXREADY\n", j);
596  schIter->first = nullptr;
597  schIter = schList.at(j).erase(schIter);
598  dispatched = true;
599  } else {
600  // Either another wave has been dispatched, or this wave
601  // was not ready, so it is stalled this cycle
602  schIter->first->wavefront()->stats.schStalls++;
603  if (!dispRdy) {
604  // not ready for dispatch, increment stall stat
605  schIter->first->wavefront()->stats.schResourceStalls++;
606  }
607  // Examine next wave for this resource
608  schIter++;
609  }
610  } else {
611  // Wave not in RFREADY, try next wave
612  schIter++;
613  }
614  }
615 
616  // Increment stall count if no wave sent to dispatchList for
617  // current execution resource
618  if (!dispatched) {
620  } else {
622  }
623  }
624 }
625 
626 void
628 {
629  // Arbitrate the VRF->GM and VRF->LDS buses for Flat memory ops
630  // Note: a Flat instruction in GFx8 reserves both VRF->Glb memory bus
631  // and a VRF->LDS bus. In GFx9, this is not the case.
632 
633  // iterate the GM pipelines
634  for (int i = 0; i < computeUnit.numVectorGlobalMemUnits; i++) {
635  // get the GM pipe index in the dispatchList
636  int gm_exe_unit = computeUnit.firstMemUnit() + i;
637  // get the wave in the dispatchList
638  GPUDynInstPtr &gpu_dyn_inst
639  = toExecute.readyInst(gm_exe_unit);
640  // If the WF is valid, ready to execute, and the instruction
641  // is a flat access, arbitrate with the WF's assigned LM pipe
642  if (gpu_dyn_inst && toExecute.dispatchStatus(gm_exe_unit)
643  == EXREADY && gpu_dyn_inst->isFlat()) {
644  Wavefront *wf = gpu_dyn_inst->wavefront();
645  // If the associated LM pipe also has a wave selected, block
646  // that wave and let the Flat instruction issue. The WF in the
647  // LM pipe is added back to the schList for consideration next
648  // cycle.
651  .readyInst(wf->localMem));
652  // Increment stall stats for LDS-VRF arbitration
655  ->wavefront()->stats.schLdsArbStalls++;
656  }
657  // With arbitration of LM pipe complete, transition the
658  // LM pipe to SKIP state in the dispatchList to inform EX stage
659  // that a Flat instruction is executing next cycle
660  doDispatchListTransition(wf->localMem, SKIP, gpu_dyn_inst);
661  DPRINTF(GPUSched, "dispatchList[%d]: arbVrfLds: "
662  "EXREADY->SKIP\n", wf->localMem);
663  }
664  }
665 }
666 
667 void
669 {
670  // Iterate the schList queues and check if operand reads
671  // have completed in the RFs. If so, mark the wave as ready for
672  // selection for dispatchList
673  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
674  for (auto &p : schList.at(j)) {
675  const GPUDynInstPtr &gpu_dyn_inst = p.first;
676  assert(gpu_dyn_inst);
677  Wavefront *wf = gpu_dyn_inst->wavefront();
678 
679  // Increment the number of cycles the wave spends in the
680  // SCH stage, since this loop visits every wave in SCH.
681  wf->stats.schCycles++;
682 
683  bool vrfRdy = true;
684  if (!gpu_dyn_inst->isScalar()) {
685  vrfRdy = computeUnit.vrf[wf->simdId]
686  ->operandReadComplete(wf, gpu_dyn_inst);
687  }
688  bool srfRdy = computeUnit.srf[wf->simdId]
689  ->operandReadComplete(wf, gpu_dyn_inst);
690  bool operandsReady = vrfRdy && srfRdy;
691  if (operandsReady) {
692  DPRINTF(GPUSched, "schList[%d]: WV[%d] operands ready for: "
693  "%d: %s\n", j, wf->wfDynId, gpu_dyn_inst->seqNum(),
694  gpu_dyn_inst->disassemble());
695  DPRINTF(GPUSched, "schList[%d]: WV[%d] RFBUSY->RFREADY\n",
696  j, wf->wfDynId);
697  p.second = RFREADY;
698  } else {
699  DPRINTF(GPUSched, "schList[%d]: WV[%d] operands not ready "
700  "for: %d: %s\n", j, wf->wfDynId,
701  gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
702 
703  // operands not ready yet, increment SCH stage stats
704  // aggregate to all wavefronts on the CU
705  p.second = RFBUSY;
706 
707  // Increment stall stats
708  wf->stats.schStalls++;
709  wf->stats.schOpdNrdyStalls++;
710 
712  if (!vrfRdy) {
714  }
715  if (!srfRdy) {
717  }
718  }
719  }
720  }
721 }
722 
723 void
725 {
726  std::vector<bool> exeUnitReservations;
727  exeUnitReservations.resize(computeUnit.numExeUnits(), false);
728 
729  for (int j = 0; j < computeUnit.numExeUnits(); ++j) {
730  GPUDynInstPtr &gpu_dyn_inst = toExecute.readyInst(j);
731  if (gpu_dyn_inst) {
733  Wavefront *wf = gpu_dyn_inst->wavefront();
734  if (s == EMPTY) {
735  continue;
736  } else if (s == EXREADY) {
737  // Wave is ready for execution
738  std::vector<int> execUnitIds = wf->reserveResources();
739 
740  if (!gpu_dyn_inst->isScalar()) {
741  computeUnit.vrf[wf->simdId]
742  ->dispatchInstruction(gpu_dyn_inst);
743  }
744  computeUnit.srf[wf->simdId]->dispatchInstruction(gpu_dyn_inst);
745 
746  std::stringstream ss;
747  for (auto id : execUnitIds) {
748  ss << id << " ";
749  }
750  DPRINTF(GPUSched, "dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
751  " Reserving ExeRes[ %s]\n",
752  j, wf->simdId, wf->wfDynId, gpu_dyn_inst->seqNum(),
753  gpu_dyn_inst->disassemble(), ss.str());
754  // mark the resources as reserved for this cycle
755  for (auto execUnitId : execUnitIds) {
756  panic_if(exeUnitReservations.at(execUnitId),
757  "Execution unit %d is reserved!!!\n"
758  "SIMD[%d] WV[%d]: %d: %s",
759  execUnitId, wf->simdId, wf->wfDynId,
760  gpu_dyn_inst->seqNum(),
761  gpu_dyn_inst->disassemble());
762  exeUnitReservations.at(execUnitId) = true;
763  }
764 
765  // If wavefront::reserveResources reserved multiple resources,
766  // then we're executing a flat memory instruction. This means
767  // that we've reserved a global and local memory unit. Thus,
768  // we need to mark the latter execution unit as not available.
769  if (execUnitIds.size() > 1) {
770  GEM5_VAR_USED int lm_exec_unit = wf->localMem;
771  assert(toExecute.dispatchStatus(lm_exec_unit)
772  == SKIP);
773  }
774  } else if (s == SKIP) {
775  // Shared Memory pipe reserved for FLAT instruction.
776  // Verify the GM pipe for this wave is ready to execute
777  // and the wave in the GM pipe is the same as the wave
778  // in the LM pipe
779  GEM5_VAR_USED int gm_exec_unit = wf->globalMem;
780  assert(wf->wfDynId == toExecute
781  .readyInst(gm_exec_unit)->wfDynId);
782  assert(toExecute.dispatchStatus(gm_exec_unit)
783  == EXREADY);
784  }
785  }
786  }
787 }
788 
789 void
791 {
792  wavesInSch.erase(w->wfDynId);
793 }
794 
796  statistics::Group *parent, int num_exec_units)
797  : statistics::Group(parent, "ScheduleStage"),
798  ADD_STAT(rdyListEmpty ,"number of cycles no wave on ready list per "
799  "execution resource"),
800  ADD_STAT(rdyListNotEmpty, "number of cycles one or more wave on ready "
801  "list per execution resource"),
802  ADD_STAT(addToSchListStalls, "number of cycles a wave is not added to "
803  "schList per execution resource when ready list is not empty"),
804  ADD_STAT(schListToDispList, "number of cycles a wave is added to "
805  "dispatchList per execution resource"),
806  ADD_STAT(schListToDispListStalls, "number of cycles no wave is added to"
807  " dispatchList per execution resource"),
808  ADD_STAT(rfAccessStalls, "number of stalls due to RF access denied"),
809  ADD_STAT(ldsBusArbStalls, "number of stalls due to VRF->LDS bus "
810  "conflicts"),
811  ADD_STAT(opdNrdyStalls, "number of stalls in SCH due to operands not "
812  "ready"),
813  ADD_STAT(dispNrdyStalls, "number of stalls in SCH due to resource not "
814  "ready")
815 {
816  rdyListNotEmpty.init(num_exec_units);
817  rdyListEmpty.init(num_exec_units);
818  addToSchListStalls.init(num_exec_units);
819  schListToDispList.init(num_exec_units);
820  schListToDispListStalls.init(num_exec_units);
824 
828 
832  csprintf("VectorMemIssue"));
834  csprintf("VectorMemBusBusy"));
836  csprintf("VectorMemCoalescer"));
839  csprintf("ScalarMemIssue"));
841  csprintf("ScalarMemBusBusy"));
843  csprintf("ScalarMemFIFO"));
845  csprintf("LocalMemIssue"));
847  csprintf("LocalMemBusBusy"));
849  csprintf("LocalMemFIFO"));
851  csprintf("FlatMemIssue"));
853  csprintf("FlatMemBusBusy"));
855  csprintf("FlatMemCoalescer"));
857  csprintf("FlatMemFIFO"));
859 
865 }
866 
867 } // namespace gem5
gem5::ScheduleStage::wavesInSch
std::unordered_set< uint64_t > wavesInSch
Definition: schedule_stage.hh:176
gem5::ScheduleStage::fromScoreboardCheck
ScoreboardCheckToSchedule & fromScoreboardCheck
Definition: schedule_stage.hh:128
gem5::ScheduleStage::scheduleRfDestOperands
void scheduleRfDestOperands()
Definition: schedule_stage.cc:264
gem5::ScheduleStage::reinsertToSchList
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:364
gem5::ScheduleStage::ScheduleStageStats::addToSchListStalls
statistics::Vector addToSchListStalls
Definition: schedule_stage.hh:202
gem5::ScheduleStage::ScheduleStageStats::ldsBusArbStalls
statistics::Scalar ldsBusArbStalls
Definition: schedule_stage.hh:223
gem5::ScheduleStage::SCH_FLAT_MEM_FIFO_NRDY
@ SCH_FLAT_MEM_FIFO_NRDY
Definition: schedule_stage.hh:95
gem5::MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:281
gem5::ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:287
gem5::ScoreboardCheckToSchedule::updateReadyList
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
Definition: comm.cc:91
gem5::Wavefront::WavefrontStats::schStalls
statistics::Scalar schStalls
Definition: wavefront.hh:348
gem5::Wavefront::isOldestInstBarrier
bool isOldestInstBarrier()
Definition: wavefront.cc:651
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:299
gem5::ScheduleStage::SCH_NRDY_CONDITIONS
@ SCH_NRDY_CONDITIONS
Definition: schedule_stage.hh:97
gem5::ScheduleStage::RFREADY
@ RFREADY
Definition: schedule_stage.hh:123
gem5::ScheduleStage::SCH_SRF_RD_ACCESS_NRDY
@ SCH_SRF_RD_ACCESS_NRDY
Definition: schedule_stage.hh:110
gem5::ScheduleStage::SCH_RF_ACCESS_NRDY
@ SCH_RF_ACCESS_NRDY
Definition: schedule_stage.hh:112
gem5::ScheduleToExecute
Communication interface between Schedule and Execute stages.
Definition: comm.hh:100
gem5::ScheduleStage::SCH_RDY
@ SCH_RDY
Definition: schedule_stage.hh:96
gem5::ScheduleStage::scheduler
std::vector< Scheduler > scheduler
Definition: schedule_stage.hh:133
gem5::Wavefront
Definition: wavefront.hh:62
gem5::ScheduleStage::scalarMemIssueRdy
bool scalarMemIssueRdy
Definition: schedule_stage.hh:148
gem5::ScheduleStage::reserveResources
void reserveResources()
Definition: schedule_stage.cc:724
gem5::ScheduleStage::schList
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
Definition: schedule_stage.hh:187
gem5::ScheduleStage::SCH_FLAT_MEM_REQS_NRDY
@ SCH_FLAT_MEM_REQS_NRDY
Definition: schedule_stage.hh:94
compute_unit.hh
gem5::ComputeUnit::firstMemUnit
int firstMemUnit() const
Definition: compute_unit.cc:241
gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:76
gem5::Wavefront::WavefrontStats::schOpdNrdyStalls
statistics::Scalar schOpdNrdyStalls
Definition: wavefront.hh:359
gem5::ScheduleStage::locMemIssueRdy
bool locMemIssueRdy
Definition: schedule_stage.hh:152
gpu_static_inst.hh
gem5::SKIP
@ SKIP
Definition: exec_stage.hh:65
gem5::ScalarMemPipeline::isGMReqFIFOWrRdy
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
Definition: scalar_memory_pipeline.hh:85
gem5::ScheduleStage::SCH_VECTOR_ALU_NRDY
@ SCH_VECTOR_ALU_NRDY
Definition: schedule_stage.hh:79
gem5::ScheduleStage::ScheduleStageStats::rfAccessStalls
statistics::Vector rfAccessStalls
Definition: schedule_stage.hh:218
gem5::ArmISA::mp
Bitfield< 11 > mp
Definition: misc_types.hh:768
gem5::statistics::DataWrapVec::subname
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:399
gem5::EXREADY
@ EXREADY
Definition: exec_stage.hh:64
gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:241
gem5::ScheduleToExecute::dispatchStatus
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
Definition: comm.cc:152
std::vector< bool >
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
gem5::ScheduleStage::dispatchReady
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:418
gem5::ScheduleStage::SCH_FLAT_MEM_ISSUE_NRDY
@ SCH_FLAT_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:91
gem5::Wavefront::WavefrontStats::schCycles
statistics::Scalar schCycles
Definition: wavefront.hh:345
gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:225
gem5::Wavefront::scalarRdGmReqsInPipe
int scalarRdGmReqsInPipe
Definition: wavefront.hh:190
gem5::ScheduleStage::vectorAluRdy
bool vectorAluRdy
Definition: schedule_stage.hh:145
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::ScheduleStage::scalarMemBusRdy
bool scalarMemBusRdy
Definition: schedule_stage.hh:147
gem5::ComputeUnit::numVectorSharedMemUnits
int numVectorSharedMemUnits
Definition: compute_unit.hh:229
gem5::ScheduleStage::scalarAluRdy
bool scalarAluRdy
Definition: schedule_stage.hh:146
gem5::ScheduleStage::~ScheduleStage
~ScheduleStage()
Definition: schedule_stage.cc:70
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:297
wavefront.hh
gem5::ScheduleStage::SCH_LOCAL_MEM_FIFO_NRDY
@ SCH_LOCAL_MEM_FIFO_NRDY
Definition: schedule_stage.hh:90
gem5::ScheduleStage::ScheduleStageStats::rdyListEmpty
statistics::Vector rdyListEmpty
Definition: schedule_stage.hh:196
gem5::Wavefront::isOldestInstSleep
bool isOldestInstSleep()
Definition: wavefront.cc:590
gem5::ScheduleStage::toExecute
ScheduleToExecute & toExecute
Definition: schedule_stage.hh:129
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition: wavefront.hh:188
gem5::Wavefront::setStatus
void setStatus(status_e newStatus)
Definition: wavefront.cc:520
gem5::ArmISA::j
Bitfield< 24 > j
Definition: misc_types.hh:57
gem5::ComputeUnit
Definition: compute_unit.hh:203
gem5::ScheduleStage::fillDispatchList
void fillDispatchList()
Definition: schedule_stage.cc:558
gem5::Wavefront::WavefrontStats::schRfAccessStalls
statistics::Scalar schRfAccessStalls
Definition: wavefront.hh:355
vector_register_file.hh
gem5::ScheduleToExecute::readyInst
GPUDynInstPtr & readyInst(int func_unit_id)
Definition: comm.cc:129
gem5::ScheduleStage::SCH_RF_OPD_NRDY_CONDITIONS
@ SCH_RF_OPD_NRDY_CONDITIONS
Definition: schedule_stage.hh:104
gem5::ScheduleStage::init
void init()
Definition: schedule_stage.cc:78
gem5::ScheduleStage::ScheduleStageStats::schListToDispList
statistics::Vector schListToDispList
Definition: schedule_stage.hh:207
gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:235
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:243
gem5::Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition: wavefront.hh:186
gem5::Wavefront::incLGKMInstsIssued
void incLGKMInstsIssued()
Definition: wavefront.cc:1341
gem5::ScheduleStage::SCH_RF_OPD_NRDY
@ SCH_RF_OPD_NRDY
Definition: schedule_stage.hh:103
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Wavefront::scalarWrGmReqsInPipe
int scalarWrGmReqsInPipe
Definition: wavefront.hh:191
gem5::ScheduleStage::addToSchList
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:296
gem5::ScheduleStage::SCH_VECTOR_MEM_ISSUE_NRDY
@ SCH_VECTOR_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:80
gem5::ArmISA::s
Bitfield< 4 > s
Definition: misc_types.hh:561
gem5::ScheduleStage::ScheduleStage
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
Definition: schedule_stage.cc:50
gem5::ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:286
gem5::EMPTY
@ EMPTY
Definition: exec_stage.hh:63
gem5::ScheduleStage::SCH_CEDE_SIMD_NRDY
@ SCH_CEDE_SIMD_NRDY
Definition: schedule_stage.hh:84
gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:94
gem5::ScheduleToExecute::reset
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:117
scalar_register_file.hh
gem5::ScheduleStage::arbitrateVrfToLdsBus
void arbitrateVrfToLdsBus()
Definition: schedule_stage.cc:627
gem5::ScheduleStage::deleteFromSch
void deleteFromSch(Wavefront *w)
Definition: schedule_stage.cc:790
gem5::ScheduleStage::checkRfOperandReadComplete
void checkRfOperandReadComplete()
Definition: schedule_stage.cc:668
ss
std::stringstream ss
Definition: trace.test.cc:45
gem5::Wavefront::isOldestInstWaitcnt
bool isOldestInstWaitcnt()
Definition: wavefront.cc:604
compiler.hh
gem5::ScheduleStage::SCH_FLAT_MEM_BUS_BUSY_NRDY
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:92
gem5::ScheduleStage::ScheduleStageStats::ScheduleStageStats
ScheduleStageStats(statistics::Group *parent, int num_exec_units)
Definition: schedule_stage.cc:795
gem5::ScheduleStage::ScheduleStageStats::schListToDispListStalls
statistics::Vector schListToDispListStalls
Definition: schedule_stage.hh:211
gem5::ScheduleStage::ScheduleStageStats::rdyListNotEmpty
statistics::Vector rdyListNotEmpty
Definition: schedule_stage.hh:197
gem5::ScheduleStage::ScheduleStageStats::opdNrdyStalls
statistics::Vector opdNrdyStalls
Definition: schedule_stage.hh:227
gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:251
gem5::GlobalMemPipeline::acqCoalescerToken
void acqCoalescerToken(GPUDynInstPtr mp)
Definition: global_memory_pipeline.cc:84
gem5::ComputeUnit::lastMemUnit
int lastMemUnit() const
Definition: compute_unit.cc:248
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:228
gem5::DISPATCH_STATUS
DISPATCH_STATUS
Definition: exec_stage.hh:61
name
const std::string & name()
Definition: trace.cc:49
gem5::ScheduleStage::SCH_VECTOR_MEM_REQS_NRDY
@ SCH_VECTOR_MEM_REQS_NRDY
Definition: schedule_stage.hh:83
gem5::GlobalMemPipeline::outstandingReqsCheck
bool outstandingReqsCheck(GPUDynInstPtr mp) const
Definition: global_memory_pipeline.cc:96
schedule_stage.hh
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::ScheduleStage::SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:89
gem5::ScheduleStage::SCH_SCALAR_MEM_ISSUE_NRDY
@ SCH_SCALAR_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:85
gem5::Wavefront::hasBarrier
bool hasBarrier() const
Definition: wavefront.cc:1430
gem5::ScheduleStage::SCH_LOCAL_MEM_ISSUE_NRDY
@ SCH_LOCAL_MEM_ISSUE_NRDY
Definition: schedule_stage.hh:88
gem5::ScheduleStage::SCH_VRF_WR_ACCESS_NRDY
@ SCH_VRF_WR_ACCESS_NRDY
Definition: schedule_stage.hh:109
gem5::Wavefront::incExpInstsIssued
void incExpInstsIssued()
Definition: wavefront.cc:1335
gem5::ScoreboardCheckToSchedule::readyWFs
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
Definition: comm.cc:81
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::ComputeUnit::numVectorGlobalMemUnits
int numVectorGlobalMemUnits
Definition: compute_unit.hh:221
gem5::ScheduleStage::checkMemResources
void checkMemResources()
Definition: schedule_stage.cc:380
gem5::GlobalMemPipeline::coalescerReady
bool coalescerReady(GPUDynInstPtr mp) const
Definition: global_memory_pipeline.cc:65
gem5::ScheduleStage::SCH_SCALAR_MEM_FIFO_NRDY
@ SCH_SCALAR_MEM_FIFO_NRDY
Definition: schedule_stage.hh:87
gem5::Wavefront::reserveResources
std::vector< int > reserveResources()
Definition: wavefront.cc:810
gem5::ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:64
gem5::ScheduleStage::stats
gem5::ScheduleStage::ScheduleStageStats stats
gem5::ScheduleStage::computeUnit
ComputeUnit & computeUnit
Definition: schedule_stage.hh:127
gem5::ScheduleStage::glbMemBusRdy
bool glbMemBusRdy
Definition: schedule_stage.hh:149
gem5::Wavefront::globalMem
int globalMem
Definition: wavefront.hh:128
gem5::ScheduleStage::SCH_SRF_OPD_NRDY
@ SCH_SRF_OPD_NRDY
Definition: schedule_stage.hh:102
gem5::ScheduleStage::exec
void exec()
Definition: schedule_stage.cc:92
gem5::ScheduleStage::RFBUSY
@ RFBUSY
Definition: schedule_stage.hh:122
gem5::ComputeUnit::insertInPipeMap
void insertInPipeMap(Wavefront *w)
Definition: compute_unit.cc:500
gem5::Wavefront::scalarAlu
int scalarAlu
Definition: wavefront.hh:123
gem5::ScheduleStage::ScheduleStageStats::dispNrdyStalls
statistics::Vector dispNrdyStalls
Definition: schedule_stage.hh:232
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:90
gem5::ScheduleStage::SCH_SRF_WR_ACCESS_NRDY
@ SCH_SRF_WR_ACCESS_NRDY
Definition: schedule_stage.hh:111
gem5::ScheduleToExecute::dispatchTransition
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
Definition: comm.cc:135
gem5::ScheduleStage::SCH_VRF_OPD_NRDY
@ SCH_VRF_OPD_NRDY
Definition: schedule_stage.hh:101
gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:233
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::ScheduleStage::glbMemIssueRdy
bool glbMemIssueRdy
Definition: schedule_stage.hh:150
gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:247
gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:111
gem5::Wavefront::stats
gem5::Wavefront::WavefrontStats stats
gem5::ScheduleStage::SCH_VRF_RD_ACCESS_NRDY
@ SCH_VRF_RD_ACCESS_NRDY
Definition: schedule_stage.hh:108
gem5::ScheduleStage::SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:81
gem5::ScheduleStage::SCH_VECTOR_MEM_COALESCER_NRDY
@ SCH_VECTOR_MEM_COALESCER_NRDY
Definition: schedule_stage.hh:82
gem5::MipsISA::dq
Bitfield< 2 > dq
Definition: dt_constants.hh:131
gem5::ScheduleStage::SCH_RF_ACCESS_NRDY_CONDITIONS
@ SCH_RF_ACCESS_NRDY_CONDITIONS
Definition: schedule_stage.hh:113
gem5::Wavefront::incVMemInstsIssued
void incVMemInstsIssued()
Definition: wavefront.cc:1329
gem5::ScheduleStage::SCH_SCALAR_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
Definition: schedule_stage.hh:86
gem5::ComputeUnit::scalarMemoryPipe
ScalarMemPipeline scalarMemoryPipe
Definition: compute_unit.hh:288
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
gem5::ScheduleStage::schedRfWrites
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:228
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:227
gem5::ScheduleStage::locMemBusRdy
bool locMemBusRdy
Definition: schedule_stage.hh:151
gem5::statistics::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1037
gem5::ScheduleStage::doDispatchListTransition
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
Definition: schedule_stage.cc:215
gem5::WaitClass::rdy
bool rdy(Cycles cycles=Cycles(0)) const
Definition: misc.hh:95
gem5::ScheduleStage::SCH_SCALAR_ALU_NRDY
@ SCH_SCALAR_ALU_NRDY
Definition: schedule_stage.hh:78
gem5::Wavefront::localMem
int localMem
Definition: wavefront.hh:129
gem5::ScheduleStage::SCH_FLAT_MEM_COALESCER_NRDY
@ SCH_FLAT_MEM_COALESCER_NRDY
Definition: schedule_stage.hh:93
gem5::ScoreboardCheckToSchedule::numReadyLists
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
Definition: comm.cc:75
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:233
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:101

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17