gem5  v20.1.0.0
scoreboard_check_stage.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "debug/GPUExec.hh"
37 #include "debug/GPUSched.hh"
38 #include "debug/GPUSync.hh"
42 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "params/ComputeUnit.hh"
46 
48  ComputeUnit &cu,
50  &to_schedule)
51  : computeUnit(cu), toSchedule(to_schedule),
52  _name(cu.name() + ".ScoreboardCheckStage")
53 {
54 }
55 
57 {
58 }
59 
60 void
62 {
63  panic_if(rdyStatus == NRDY_ILLEGAL || rdyStatus >= NRDY_CONDITIONS,
64  "Instruction ready status %d is illegal!!!", rdyStatus);
65  stallCycles[rdyStatus]++;
66 }
67 
68 // Return true if this wavefront is ready
69 // to execute an instruction of the specified type.
70 // It also returns the reason (in rdyStatus) if the instruction is not
71 // ready. Finally it sets the execution resource type (in exesResType)
72 // of the instruction, only if it ready.
73 bool
75  int *exeResType, int wfSlot)
76 {
86  // waitCnt instruction has been dispatched or executed: next
87  // instruction should be blocked until waitCnts are satisfied.
88  if (w->getStatus() == Wavefront::S_WAITCNT) {
89  if (!w->waitCntsSatisfied()) {
90  *rdyStatus = NRDY_WAIT_CNT;
91  return false;
92  }
93  }
94 
95  // Is the wave waiting at a barrier. Check this condition BEFORE checking
96  // for instruction buffer occupancy to avoid a deadlock when the barrier is
97  // the last instruction in the instruction buffer.
98  if (w->getStatus() == Wavefront::S_BARRIER) {
99  assert(w->hasBarrier());
100  int bar_id = w->barrierId();
101  if (!computeUnit.allAtBarrier(bar_id)) {
102  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalled at "
103  "barrier Id%d. %d waves remain.\n", w->computeUnit->cu_id,
104  w->simdId, w->wfSlotId, w->wfDynId, bar_id,
105  w->computeUnit->numYetToReachBarrier(bar_id));
106  // Are all threads at barrier?
107  *rdyStatus = NRDY_BARRIER_WAIT;
108  return false;
109  }
110  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves at barrier "
111  "Id%d. Resetting barrier resources.\n", w->computeUnit->cu_id,
112  w->simdId, w->wfSlotId, w->wfDynId, bar_id);
113  computeUnit.resetBarrier(bar_id);
115  }
116 
117  // Check WF status: it has to be running
118  if (w->getStatus() == Wavefront::S_STOPPED ||
119  w->getStatus() == Wavefront::S_RETURNING ||
120  w->getStatus() == Wavefront::S_STALLED) {
121  *rdyStatus = NRDY_WF_STOP;
122  return false;
123  }
124 
125  // is the Instruction buffer empty
126  if ( w->instructionBuffer.empty()) {
127  *rdyStatus = NRDY_IB_EMPTY;
128  return false;
129  }
130 
131  // Check next instruction from instruction buffer
132  GPUDynInstPtr ii = w->nextInstr();
133  // Only instruction in the instruction buffer has been dispatched.
134  // No need to check it again for readiness
135  if (!ii) {
136  *rdyStatus = NRDY_IB_EMPTY;
137  return false;
138  }
139 
140  // The following code is very error prone and the entire process for
141  // checking readiness will be fixed eventually. In the meantime, let's
142  // make sure that we do not silently let an instruction type slip
143  // through this logic and always return not ready.
144  if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
145  ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
146  ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat())) {
147  panic("next instruction: %s is of unknown type\n", ii->disassemble());
148  }
149 
150  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Ready for Inst : %s\n",
151  computeUnit.cu_id, w->simdId, w->wfSlotId, ii->disassemble());
152 
153  // Non-scalar (i.e., vector) instructions may use VGPRs
154  if (!ii->isScalar()) {
155  if (!computeUnit.vrf[w->simdId]->operandsReady(w, ii)) {
156  *rdyStatus = NRDY_VGPR_NRDY;
157  return false;
158  }
159  }
160  // Scalar and non-scalar instructions may use SGPR
161  if (!computeUnit.srf[w->simdId]->operandsReady(w, ii)) {
162  *rdyStatus = NRDY_SGPR_NRDY;
163  return false;
164  }
165 
166  // The hardware implicitly executes S_WAITCNT 0 before executing
167  // the S_ENDPGM instruction. Implementing this implicit S_WAITCNT.
168  // isEndOfKernel() is used to identify the S_ENDPGM instruction
169  // On identifying it, we do the following:
170  // 1. Wait for all older instruction to execute
171  // 2. Once all the older instruction are executed, we add a wait
172  // count for the executed instruction(s) to complete.
173  if (ii->isEndOfKernel()) {
174  // Waiting for older instruction to execute
175  if (w->instructionBuffer.front()->seqNum() != ii->seqNum()) {
176  *rdyStatus = NRDY_WAIT_CNT;
177  return false;
178  }
179  // Older instructions have executed, adding implicit wait count
180  w->setStatus(Wavefront::S_WAITCNT);
181  w->setWaitCnts(0, 0, 0);
182  if (!w->waitCntsSatisfied()) {
183  *rdyStatus = NRDY_WAIT_CNT;
184  return false;
185  }
186  }
187  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit.cu_id,
188  w->simdId, w->wfSlotId, ii->disassemble());
189  *exeResType = mapWaveToExeUnit(w);
190  *rdyStatus = INST_RDY;
191  return true;
192 }
193 
194 int
196 {
197  GPUDynInstPtr ii = w->nextInstr();
198  assert(ii);
199  if (ii->isFlat()) {
209  return w->globalMem;
210  } else if (ii->isLocalMem()) {
211  return w->localMem;
212  } else if (ii->isGlobalMem()) {
213  if (!ii->isScalar()) {
214  return w->globalMem;
215  } else {
216  return w->scalarMem;
217  }
218  } else if (ii->isBranch() ||
219  ii->isALU() ||
220  (ii->isKernArgSeg() && ii->isLoad()) ||
221  ii->isArgSeg() ||
222  ii->isReturn() ||
223  ii->isEndOfKernel() ||
224  ii->isNop() ||
225  ii->isBarrier()) {
226  if (!ii->isScalar()) {
227  return w->simdId;
228  } else {
229  return w->scalarAluGlobalIdx;
230  }
231  }
232  panic("%s: unmapped to an execution resource", ii->disassemble());
233  return computeUnit.numExeUnits();
234 }
235 
236 void
238 {
243  toSchedule.reset();
244 
245  // Iterate over all WF slots across all SIMDs.
246  for (int simdId = 0; simdId < computeUnit.numVectorALUs; ++simdId) {
247  for (int wfSlot = 0; wfSlot < computeUnit.shader->n_wf; ++wfSlot) {
248  // reset the ready status of each wavefront
249  Wavefront *curWave = computeUnit.wfList[simdId][wfSlot];
250  nonrdytype_e rdyStatus = NRDY_ILLEGAL;
251  int exeResType = -1;
252  // check WF readiness: If the WF's oldest
253  // instruction is ready to issue then add the WF to the ready list
254  if (ready(curWave, &rdyStatus, &exeResType, wfSlot)) {
255  assert(curWave->simdId == simdId);
256  DPRINTF(GPUSched,
257  "Adding to readyList[%d]: SIMD[%d] WV[%d]: %d: %s\n",
258  exeResType,
259  curWave->simdId, curWave->wfDynId,
260  curWave->nextInstr()->seqNum(),
261  curWave->nextInstr()->disassemble());
262  toSchedule.markWFReady(curWave, exeResType);
263  }
264  collectStatistics(rdyStatus);
265  }
266  }
267 }
268 
269 void
271 {
274  .name(name() + ".stall_cycles")
275  .desc("number of cycles wave stalled in SCB")
276  ;
283  stallCycles.subname(INST_RDY, csprintf("InstrReady"));
284 }
ScoreboardCheckStage::NRDY_ILLEGAL
@ NRDY_ILLEGAL
Definition: scoreboard_check_stage.hh:63
ScoreboardCheckStage::NRDY_IB_EMPTY
@ NRDY_IB_EMPTY
Definition: scoreboard_check_stage.hh:65
ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:666
ScoreboardCheckStage::ScoreboardCheckStage
ScoreboardCheckStage(const ComputeUnitParams *p, ComputeUnit &cu, ScoreboardCheckToSchedule &to_schedule)
Definition: scoreboard_check_stage.cc:47
shader.hh
compute_unit.hh
gpu_static_inst.hh
Shader::n_wf
int n_wf
Definition: shader.hh:228
ScoreboardCheckToSchedule::reset
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:60
ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:289
ScoreboardCheckStage::toSchedule
ScoreboardCheckToSchedule & toSchedule
Interface between scoreboard check and schedule stages.
Definition: scoreboard_check_stage.hh:95
ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:228
scoreboard_check_stage.hh
ScoreboardCheckStage::mapWaveToExeUnit
int mapWaveToExeUnit(Wavefront *w)
Definition: scoreboard_check_stage.cc:195
ScoreboardCheckStage::exec
void exec()
Definition: scoreboard_check_stage.cc:237
ScoreboardCheckStage::nonrdytype_e
nonrdytype_e
Definition: scoreboard_check_stage.hh:62
ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:638
Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:85
ScoreboardCheckToSchedule::markWFReady
void markWFReady(Wavefront *wf, int func_unit_id)
Mark the WF as ready for execution on a particular functional unit.
Definition: comm.cc:68
wavefront.hh
ScoreboardCheckStage::INST_RDY
@ INST_RDY
Definition: scoreboard_check_stage.hh:70
ScoreboardCheckStage::NRDY_SGPR_NRDY
@ NRDY_SGPR_NRDY
Definition: scoreboard_check_stage.hh:69
ComputeUnit
Definition: compute_unit.hh:198
Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:68
ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:294
Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:64
vector_register_file.hh
ScoreboardCheckStage::NRDY_VGPR_NRDY
@ NRDY_VGPR_NRDY
Definition: scoreboard_check_stage.hh:68
MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:278
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:292
Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:81
ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:288
ScoreboardCheckStage::name
const std::string & name() const
Definition: scoreboard_check_stage.hh:80
scalar_register_file.hh
ScoreboardCheckStage::~ScoreboardCheckStage
~ScoreboardCheckStage()
Definition: scoreboard_check_stage.cc:56
Wavefront::simdId
const int simdId
Definition: wavefront.hh:92
Stats::DataWrap::name
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:274
ScoreboardCheckStage::ready
bool ready(Wavefront *w, nonrdytype_e *rdyStatus, int *exeResType, int wfSlot)
Definition: scoreboard_check_stage.cc:74
Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:62
name
const std::string & name()
Definition: trace.cc:50
Stats::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1177
Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1216
ScoreboardCheckStage::NRDY_BARRIER_WAIT
@ NRDY_BARRIER_WAIT
Definition: scoreboard_check_stage.hh:67
ScoreboardCheckStage::NRDY_WAIT_CNT
@ NRDY_WAIT_CNT
Definition: scoreboard_check_stage.hh:66
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:63
Wavefront
Definition: wavefront.hh:57
ScoreboardCheckStage::NRDY_CONDITIONS
@ NRDY_CONDITIONS
Definition: scoreboard_check_stage.hh:71
ScoreboardCheckStage::stallCycles
Stats::Vector stallCycles
Definition: scoreboard_check_stage.hh:98
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
ScoreboardCheckStage::collectStatistics
void collectStatistics(nonrdytype_e rdyStatus)
Definition: scoreboard_check_stage.cc:61
Stats::DataWrapVec::subname
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:374
ScoreboardCheckStage::computeUnit
ComputeUnit & computeUnit
Definition: scoreboard_check_stage.hh:88
ScoreboardCheckStage::regStats
void regStats()
Definition: scoreboard_check_stage.cc:270
ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:688
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Stats::DataWrap::desc
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:307
csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:158
ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:356
ScoreboardCheckStage::NRDY_WF_STOP
@ NRDY_WF_STOP
Definition: scoreboard_check_stage.hh:64
Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:218
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:241

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17