gem5  v22.1.0.0
scoreboard_check_stage.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include "debug/GPUExec.hh"
35 #include "debug/GPUSched.hh"
36 #include "debug/GPUSync.hh"
40 #include "gpu-compute/shader.hh"
42 #include "gpu-compute/wavefront.hh"
43 #include "params/ComputeUnit.hh"
44 
45 namespace gem5
46 {
47 
49  ComputeUnit &cu,
51  &to_schedule)
52  : computeUnit(cu), toSchedule(to_schedule),
53  _name(cu.name() + ".ScoreboardCheckStage"), stats(&cu)
54 {
55 }
56 
58 {
59 }
60 
61 void
63 {
64  panic_if(rdyStatus == NRDY_ILLEGAL || rdyStatus >= NRDY_CONDITIONS,
65  "Instruction ready status %d is illegal!!!", rdyStatus);
66  stats.stallCycles[rdyStatus]++;
67 }
68 
69 // Return true if this wavefront is ready
70 // to execute an instruction of the specified type.
71 // It also returns the reason (in rdyStatus) if the instruction is not
72 // ready. Finally it sets the execution resource type (in exesResType)
73 // of the instruction, only if it ready.
74 bool
76  int *exeResType, int wfSlot)
77 {
87  // waitCnt instruction has been dispatched or executed: next
88  // instruction should be blocked until waitCnts are satisfied.
89  if (w->getStatus() == Wavefront::S_WAITCNT) {
90  if (!w->waitCntsSatisfied()) {
91  *rdyStatus = NRDY_WAIT_CNT;
92  return false;
93  }
94  }
95 
96  // sleep instruction has been dispatched or executed: next
97  // instruction should be blocked until the sleep period expires.
98  if (w->getStatus() == Wavefront::S_STALLED_SLEEP) {
99  if (!w->sleepDone()) {
100  *rdyStatus = NRDY_SLEEP;
101  return false;
102  }
103  }
104 
105  // Is the wave waiting at a barrier. Check this condition BEFORE checking
106  // for instruction buffer occupancy to avoid a deadlock when the barrier is
107  // the last instruction in the instruction buffer.
108  if (w->getStatus() == Wavefront::S_BARRIER) {
109  assert(w->hasBarrier());
110  int bar_id = w->barrierId();
111  if (!computeUnit.allAtBarrier(bar_id)) {
112  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalled at "
113  "barrier Id%d. %d waves remain.\n", w->computeUnit->cu_id,
114  w->simdId, w->wfSlotId, w->wfDynId, bar_id,
115  w->computeUnit->numYetToReachBarrier(bar_id));
116  // Are all threads at barrier?
117  *rdyStatus = NRDY_BARRIER_WAIT;
118  return false;
119  }
120  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves at barrier "
121  "Id%d. Resetting barrier resources.\n", w->computeUnit->cu_id,
122  w->simdId, w->wfSlotId, w->wfDynId, bar_id);
123  computeUnit.resetBarrier(bar_id);
125  }
126 
127  // Check WF status: it has to be running
128  if (w->getStatus() == Wavefront::S_STOPPED ||
129  w->getStatus() == Wavefront::S_RETURNING ||
130  w->getStatus() == Wavefront::S_STALLED) {
131  *rdyStatus = NRDY_WF_STOP;
132  return false;
133  }
134 
135  // is the Instruction buffer empty
136  if ( w->instructionBuffer.empty()) {
137  *rdyStatus = NRDY_IB_EMPTY;
138  return false;
139  }
140 
141  // Check next instruction from instruction buffer
142  GPUDynInstPtr ii = w->nextInstr();
143  // Only instruction in the instruction buffer has been dispatched.
144  // No need to check it again for readiness
145  if (!ii) {
146  *rdyStatus = NRDY_IB_EMPTY;
147  return false;
148  }
149 
150  // The following code is very error prone and the entire process for
151  // checking readiness will be fixed eventually. In the meantime, let's
152  // make sure that we do not silently let an instruction type slip
153  // through this logic and always return not ready.
154  if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
155  ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
156  ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
157  ii->isFlatGlobal() || ii->isSleep() || ii->isLocalMem())) {
158  panic("next instruction: %s is of unknown type\n", ii->disassemble());
159  }
160 
161  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Ready for Inst : %s\n",
162  computeUnit.cu_id, w->simdId, w->wfSlotId, ii->disassemble());
163 
164  // Non-scalar (i.e., vector) instructions may use VGPRs
165  if (!ii->isScalar()) {
166  if (!computeUnit.vrf[w->simdId]->operandsReady(w, ii)) {
167  *rdyStatus = NRDY_VGPR_NRDY;
168  return false;
169  }
170  }
171  // Scalar and non-scalar instructions may use SGPR
172  if (!computeUnit.srf[w->simdId]->operandsReady(w, ii)) {
173  *rdyStatus = NRDY_SGPR_NRDY;
174  return false;
175  }
176 
177  // The hardware implicitly executes S_WAITCNT 0 before executing
178  // the S_ENDPGM instruction. Implementing this implicit S_WAITCNT.
179  // isEndOfKernel() is used to identify the S_ENDPGM instruction
180  // On identifying it, we do the following:
181  // 1. Wait for all older instruction to execute
182  // 2. Once all the older instruction are executed, we add a wait
183  // count for the executed instruction(s) to complete.
184  if (ii->isEndOfKernel()) {
185  // Waiting for older instruction to execute
186  if (w->instructionBuffer.front()->seqNum() != ii->seqNum()) {
187  *rdyStatus = NRDY_WAIT_CNT;
188  return false;
189  }
190  // Older instructions have executed, adding implicit wait count
191  w->setStatus(Wavefront::S_WAITCNT);
192  w->setWaitCnts(0, 0, 0);
193  if (!w->waitCntsSatisfied()) {
194  *rdyStatus = NRDY_WAIT_CNT;
195  return false;
196  }
197  }
198  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit.cu_id,
199  w->simdId, w->wfSlotId, ii->disassemble());
200  *exeResType = mapWaveToExeUnit(w);
201  *rdyStatus = INST_RDY;
202  return true;
203 }
204 
205 int
207 {
208  GPUDynInstPtr ii = w->nextInstr();
209  assert(ii);
210  if (ii->isFlat()) {
220  return w->globalMem;
221  } else if (ii->isLocalMem()) {
222  return w->localMem;
223  } else if (ii->isGlobalMem()) {
224  if (!ii->isScalar()) {
225  return w->globalMem;
226  } else {
227  return w->scalarMem;
228  }
229  } else if (ii->isBranch() ||
230  ii->isALU() ||
231  (ii->isKernArgSeg() && ii->isLoad()) ||
232  ii->isArgSeg() ||
233  ii->isReturn() ||
234  ii->isEndOfKernel() ||
235  ii->isNop() ||
236  ii->isBarrier()) {
237  if (!ii->isScalar()) {
238  return w->simdId;
239  } else {
240  return w->scalarAluGlobalIdx;
241  }
242  }
243  panic("%s: unmapped to an execution resource", ii->disassemble());
244  return computeUnit.numExeUnits();
245 }
246 
247 void
249 {
254  toSchedule.reset();
255 
256  // Iterate over all WF slots across all SIMDs.
257  for (int simdId = 0; simdId < computeUnit.numVectorALUs; ++simdId) {
258  for (int wfSlot = 0; wfSlot < computeUnit.shader->n_wf; ++wfSlot) {
259  // reset the ready status of each wavefront
260  Wavefront *curWave = computeUnit.wfList[simdId][wfSlot];
261  nonrdytype_e rdyStatus = NRDY_ILLEGAL;
262  int exeResType = -1;
263  // check WF readiness: If the WF's oldest
264  // instruction is ready to issue then add the WF to the ready list
265  if (ready(curWave, &rdyStatus, &exeResType, wfSlot)) {
266  assert(curWave->simdId == simdId);
267  DPRINTF(GPUSched,
268  "Adding to readyList[%d]: SIMD[%d] WV[%d]: %d: %s\n",
269  exeResType,
270  curWave->simdId, curWave->wfDynId,
271  curWave->nextInstr()->seqNum(),
272  curWave->nextInstr()->disassemble());
273  toSchedule.markWFReady(curWave, exeResType);
274  }
275  collectStatistics(rdyStatus);
276  }
277  }
278 }
279 
282  : statistics::Group(parent, "ScoreboardCheckStage"),
283  ADD_STAT(stallCycles, "number of cycles wave stalled in SCB")
284 {
286 
293  stallCycles.subname(INST_RDY, csprintf("InstrReady"));
294 }
295 
296 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
void releaseWFsFromBarrier(int bar_id)
void resetBarrier(int bar_id)
int numExeUnits() const
std::vector< ScalarRegisterFile * > srf
std::vector< std::vector< Wavefront * > > wfList
std::vector< VectorRegisterFile * > vrf
bool allAtBarrier(int bar_id)
bool ready(Wavefront *w, nonrdytype_e *rdyStatus, int *exeResType, int wfSlot)
ScoreboardCheckStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &to_schedule)
gem5::ScoreboardCheckStage::ScoreboardCheckStageStats stats
void collectStatistics(nonrdytype_e rdyStatus)
ScoreboardCheckToSchedule & toSchedule
Interface between scoreboard check and schedule stages.
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:63
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:59
void markWFReady(Wavefront *wf, int func_unit_id)
Mark the WF as ready for execution on a particular functional unit.
Definition: comm.cc:67
const int simdId
Definition: wavefront.hh:99
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1191
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:92
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88
uint64_t wfDynId
Definition: wavefront.hh:226
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:402
Statistics container.
Definition: group.hh:94
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1040
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
Bitfield< 6 > w
Definition: pagetable.hh:59
Bitfield< 54 > p
Definition: pagetable.hh:70
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1