gem5  v21.1.0.2
scoreboard_check_stage.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include "debug/GPUExec.hh"
37 #include "debug/GPUSched.hh"
38 #include "debug/GPUSync.hh"
42 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "params/ComputeUnit.hh"
46 
47 namespace gem5
48 {
49 
51  ComputeUnit &cu,
53  &to_schedule)
54  : computeUnit(cu), toSchedule(to_schedule),
55  _name(cu.name() + ".ScoreboardCheckStage"), stats(&cu)
56 {
57 }
58 
60 {
61 }
62 
63 void
65 {
66  panic_if(rdyStatus == NRDY_ILLEGAL || rdyStatus >= NRDY_CONDITIONS,
67  "Instruction ready status %d is illegal!!!", rdyStatus);
68  stats.stallCycles[rdyStatus]++;
69 }
70 
71 // Return true if this wavefront is ready
72 // to execute an instruction of the specified type.
73 // It also returns the reason (in rdyStatus) if the instruction is not
74 // ready. Finally it sets the execution resource type (in exesResType)
75 // of the instruction, only if it ready.
76 bool
78  int *exeResType, int wfSlot)
79 {
89  // waitCnt instruction has been dispatched or executed: next
90  // instruction should be blocked until waitCnts are satisfied.
91  if (w->getStatus() == Wavefront::S_WAITCNT) {
92  if (!w->waitCntsSatisfied()) {
93  *rdyStatus = NRDY_WAIT_CNT;
94  return false;
95  }
96  }
97 
98  // sleep instruction has been dispatched or executed: next
99  // instruction should be blocked until the sleep period expires.
100  if (w->getStatus() == Wavefront::S_STALLED_SLEEP) {
101  if (!w->sleepDone()) {
102  *rdyStatus = NRDY_SLEEP;
103  return false;
104  }
105  }
106 
107  // Is the wave waiting at a barrier. Check this condition BEFORE checking
108  // for instruction buffer occupancy to avoid a deadlock when the barrier is
109  // the last instruction in the instruction buffer.
110  if (w->getStatus() == Wavefront::S_BARRIER) {
111  assert(w->hasBarrier());
112  int bar_id = w->barrierId();
113  if (!computeUnit.allAtBarrier(bar_id)) {
114  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalled at "
115  "barrier Id%d. %d waves remain.\n", w->computeUnit->cu_id,
116  w->simdId, w->wfSlotId, w->wfDynId, bar_id,
117  w->computeUnit->numYetToReachBarrier(bar_id));
118  // Are all threads at barrier?
119  *rdyStatus = NRDY_BARRIER_WAIT;
120  return false;
121  }
122  DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves at barrier "
123  "Id%d. Resetting barrier resources.\n", w->computeUnit->cu_id,
124  w->simdId, w->wfSlotId, w->wfDynId, bar_id);
125  computeUnit.resetBarrier(bar_id);
127  }
128 
129  // Check WF status: it has to be running
130  if (w->getStatus() == Wavefront::S_STOPPED ||
131  w->getStatus() == Wavefront::S_RETURNING ||
132  w->getStatus() == Wavefront::S_STALLED) {
133  *rdyStatus = NRDY_WF_STOP;
134  return false;
135  }
136 
137  // is the Instruction buffer empty
138  if ( w->instructionBuffer.empty()) {
139  *rdyStatus = NRDY_IB_EMPTY;
140  return false;
141  }
142 
143  // Check next instruction from instruction buffer
144  GPUDynInstPtr ii = w->nextInstr();
145  // Only instruction in the instruction buffer has been dispatched.
146  // No need to check it again for readiness
147  if (!ii) {
148  *rdyStatus = NRDY_IB_EMPTY;
149  return false;
150  }
151 
152  // The following code is very error prone and the entire process for
153  // checking readiness will be fixed eventually. In the meantime, let's
154  // make sure that we do not silently let an instruction type slip
155  // through this logic and always return not ready.
156  if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
157  ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
158  ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
159  ii->isSleep())) {
160  panic("next instruction: %s is of unknown type\n", ii->disassemble());
161  }
162 
163  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Ready for Inst : %s\n",
164  computeUnit.cu_id, w->simdId, w->wfSlotId, ii->disassemble());
165 
166  // Non-scalar (i.e., vector) instructions may use VGPRs
167  if (!ii->isScalar()) {
168  if (!computeUnit.vrf[w->simdId]->operandsReady(w, ii)) {
169  *rdyStatus = NRDY_VGPR_NRDY;
170  return false;
171  }
172  }
173  // Scalar and non-scalar instructions may use SGPR
174  if (!computeUnit.srf[w->simdId]->operandsReady(w, ii)) {
175  *rdyStatus = NRDY_SGPR_NRDY;
176  return false;
177  }
178 
179  // The hardware implicitly executes S_WAITCNT 0 before executing
180  // the S_ENDPGM instruction. Implementing this implicit S_WAITCNT.
181  // isEndOfKernel() is used to identify the S_ENDPGM instruction
182  // On identifying it, we do the following:
183  // 1. Wait for all older instruction to execute
184  // 2. Once all the older instruction are executed, we add a wait
185  // count for the executed instruction(s) to complete.
186  if (ii->isEndOfKernel()) {
187  // Waiting for older instruction to execute
188  if (w->instructionBuffer.front()->seqNum() != ii->seqNum()) {
189  *rdyStatus = NRDY_WAIT_CNT;
190  return false;
191  }
192  // Older instructions have executed, adding implicit wait count
193  w->setStatus(Wavefront::S_WAITCNT);
194  w->setWaitCnts(0, 0, 0);
195  if (!w->waitCntsSatisfied()) {
196  *rdyStatus = NRDY_WAIT_CNT;
197  return false;
198  }
199  }
200  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit.cu_id,
201  w->simdId, w->wfSlotId, ii->disassemble());
202  *exeResType = mapWaveToExeUnit(w);
203  *rdyStatus = INST_RDY;
204  return true;
205 }
206 
207 int
209 {
210  GPUDynInstPtr ii = w->nextInstr();
211  assert(ii);
212  if (ii->isFlat()) {
222  return w->globalMem;
223  } else if (ii->isLocalMem()) {
224  return w->localMem;
225  } else if (ii->isGlobalMem()) {
226  if (!ii->isScalar()) {
227  return w->globalMem;
228  } else {
229  return w->scalarMem;
230  }
231  } else if (ii->isBranch() ||
232  ii->isALU() ||
233  (ii->isKernArgSeg() && ii->isLoad()) ||
234  ii->isArgSeg() ||
235  ii->isReturn() ||
236  ii->isEndOfKernel() ||
237  ii->isNop() ||
238  ii->isBarrier()) {
239  if (!ii->isScalar()) {
240  return w->simdId;
241  } else {
242  return w->scalarAluGlobalIdx;
243  }
244  }
245  panic("%s: unmapped to an execution resource", ii->disassemble());
246  return computeUnit.numExeUnits();
247 }
248 
249 void
251 {
256  toSchedule.reset();
257 
258  // Iterate over all WF slots across all SIMDs.
259  for (int simdId = 0; simdId < computeUnit.numVectorALUs; ++simdId) {
260  for (int wfSlot = 0; wfSlot < computeUnit.shader->n_wf; ++wfSlot) {
261  // reset the ready status of each wavefront
262  Wavefront *curWave = computeUnit.wfList[simdId][wfSlot];
263  nonrdytype_e rdyStatus = NRDY_ILLEGAL;
264  int exeResType = -1;
265  // check WF readiness: If the WF's oldest
266  // instruction is ready to issue then add the WF to the ready list
267  if (ready(curWave, &rdyStatus, &exeResType, wfSlot)) {
268  assert(curWave->simdId == simdId);
269  DPRINTF(GPUSched,
270  "Adding to readyList[%d]: SIMD[%d] WV[%d]: %d: %s\n",
271  exeResType,
272  curWave->simdId, curWave->wfDynId,
273  curWave->nextInstr()->seqNum(),
274  curWave->nextInstr()->disassemble());
275  toSchedule.markWFReady(curWave, exeResType);
276  }
277  collectStatistics(rdyStatus);
278  }
279  }
280 }
281 
284  : statistics::Group(parent, "ScoreboardCheckStage"),
285  ADD_STAT(stallCycles, "number of cycles wave stalled in SCB")
286 {
288 
295  stallCycles.subname(INST_RDY, csprintf("InstrReady"));
296 }
297 
298 } // namespace gem5
gem5::Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:74
gem5::ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:293
gem5::ScoreboardCheckStage::NRDY_VGPR_NRDY
@ NRDY_VGPR_NRDY
Definition: scoreboard_check_stage.hh:74
gem5::ScoreboardCheckToSchedule::markWFReady
void markWFReady(Wavefront *wf, int func_unit_id)
Mark the WF as ready for execution on a particular functional unit.
Definition: comm.cc:69
gem5::MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:281
shader.hh
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:299
gem5::ScoreboardCheckStage::NRDY_WF_STOP
@ NRDY_WF_STOP
Definition: scoreboard_check_stage.hh:69
gem5::Wavefront
Definition: wavefront.hh:62
gem5::ScoreboardCheckStage::ScoreboardCheckStageStats::ScoreboardCheckStageStats
ScoreboardCheckStageStats(statistics::Group *parent)
Definition: scoreboard_check_stage.cc:283
compute_unit.hh
gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:76
gem5::ScoreboardCheckStage::NRDY_BARRIER_WAIT
@ NRDY_BARRIER_WAIT
Definition: scoreboard_check_stage.hh:73
gpu_static_inst.hh
gem5::ScoreboardCheckStage::NRDY_WAIT_CNT
@ NRDY_WAIT_CNT
Definition: scoreboard_check_stage.hh:71
gem5::statistics::DataWrapVec::subname
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:399
scoreboard_check_stage.hh
gem5::ScoreboardCheckStage::~ScoreboardCheckStage
~ScoreboardCheckStage()
Definition: scoreboard_check_stage.cc:59
gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
gem5::Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:68
gem5::ScoreboardCheckStage::computeUnit
ComputeUnit & computeUnit
Definition: scoreboard_check_stage.hh:93
gem5::ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:684
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:355
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:294
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:297
wavefront.hh
gem5::ScoreboardCheckStage::nonrdytype_e
nonrdytype_e
Definition: scoreboard_check_stage.hh:66
gem5::ScoreboardCheckToSchedule::reset
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:61
gem5::ComputeUnit
Definition: compute_unit.hh:203
gem5::ScoreboardCheckStage::collectStatistics
void collectStatistics(nonrdytype_e rdyStatus)
Definition: scoreboard_check_stage.cc:64
gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:246
vector_register_file.hh
gem5::ScoreboardCheckStage::NRDY_SGPR_NRDY
@ NRDY_SGPR_NRDY
Definition: scoreboard_check_stage.hh:75
gem5::ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:706
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:94
scalar_register_file.hh
gem5::ScoreboardCheckStage::ScoreboardCheckStage
ScoreboardCheckStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &to_schedule)
Definition: scoreboard_check_stage.cc:50
gem5::ScoreboardCheckStage::mapWaveToExeUnit
int mapWaveToExeUnit(Wavefront *w)
Definition: scoreboard_check_stage.cc:208
gem5::Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1169
gem5::ScoreboardCheckStage::NRDY_ILLEGAL
@ NRDY_ILLEGAL
Definition: scoreboard_check_stage.hh:68
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:228
name
const std::string & name()
Definition: trace.cc:49
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::ScoreboardCheckStage::toSchedule
ScoreboardCheckToSchedule & toSchedule
Interface between scoreboard check and schedule stages.
Definition: scoreboard_check_stage.hh:100
gem5::ScoreboardCheckStage::ready
bool ready(Wavefront *w, nonrdytype_e *rdyStatus, int *exeResType, int wfSlot)
Definition: scoreboard_check_stage.cc:77
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:70
gem5::ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:64
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:90
gem5::ScoreboardCheckStage::ScoreboardCheckStageStats::stallCycles
statistics::Vector stallCycles
Definition: scoreboard_check_stage.hh:109
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:205
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::ScoreboardCheckStage::INST_RDY
@ INST_RDY
Definition: scoreboard_check_stage.hh:76
gem5::ScoreboardCheckStage::NRDY_IB_EMPTY
@ NRDY_IB_EMPTY
Definition: scoreboard_check_stage.hh:70
gem5::ScoreboardCheckStage::NRDY_CONDITIONS
@ NRDY_CONDITIONS
Definition: scoreboard_check_stage.hh:77
gem5::ScoreboardCheckStage::NRDY_SLEEP
@ NRDY_SLEEP
Definition: scoreboard_check_stage.hh:72
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::ScoreboardCheckStage::exec
void exec()
Definition: scoreboard_check_stage.cc:250
gem5::statistics::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1037
gem5::ScoreboardCheckStage::stats
gem5::ScoreboardCheckStage::ScoreboardCheckStageStats stats
gem5::ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:656
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:233
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:101

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17