release/current/scoreboard__check__stage_8cc_source.html

 /*

  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.

  * All rights reserved.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice,

  * this list of conditions and the following disclaimer.

  *

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  * this list of conditions and the following disclaimer in the documentation

  * and/or other materials provided with the distribution.

  *

  * 3. Neither the name of the copyright holder nor the names of its

  * contributors may be used to endorse or promote products derived from this

  * software without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

  * POSSIBILITY OF SUCH DAMAGE.

  */


 #include "gpu-compute/scoreboard_check_stage.hh"


 #include "debug/GPUExec.hh"

 #include "debug/GPUSched.hh"

 #include "debug/GPUSync.hh"

 #include "gpu-compute/compute_unit.hh"

 #include "gpu-compute/gpu_static_inst.hh"

 #include "gpu-compute/scalar_register_file.hh"

 #include "gpu-compute/shader.hh"

 #include "gpu-compute/vector_register_file.hh"

 #include "gpu-compute/wavefront.hh"

 #include "params/ComputeUnit.hh"


 namespace gem5

 {


 ScoreboardCheckStage::ScoreboardCheckStage(const ComputeUnitParams &p,

                                            ComputeUnit &cu,

                                            ScoreboardCheckToSchedule

                                            &to_schedule)

     : computeUnit(cu), toSchedule(to_schedule),

       _name(cu.name() + ".ScoreboardCheckStage"), stats(&cu)

 {

 }


 ScoreboardCheckStage::~ScoreboardCheckStage()

 {

 }


 void

 ScoreboardCheckStage::collectStatistics(nonrdytype_e rdyStatus)

 {

     panic_if(rdyStatus == NRDY_ILLEGAL || rdyStatus >= NRDY_CONDITIONS,

              "Instruction ready status %d is illegal!!!", rdyStatus);

     stats.stallCycles[rdyStatus]++;

 }


 // Return true if this wavefront is ready

 // to execute an instruction of the specified type.

 // It also returns the reason (in rdyStatus) if the instruction is not

 // ready. Finally it sets the execution resource type (in exesResType)

 // of the instruction, only if it ready.

 bool

 ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus,

                             int *exeResType, int wfSlot)

 {

     // waitCnt instruction has been dispatched or executed: next

     // instruction should be blocked until waitCnts are satisfied.

     if (w->getStatus() == Wavefront::S_WAITCNT) {

         if (!w->waitCntsSatisfied()) {

             *rdyStatus = NRDY_WAIT_CNT;

             return false;

         }

     }


     // sleep instruction has been dispatched or executed: next

     // instruction should be blocked until the sleep period expires.

     if (w->getStatus() == Wavefront::S_STALLED_SLEEP) {

         if (!w->sleepDone()) {

             *rdyStatus = NRDY_SLEEP;

             return false;

         }

     }


     // Is the wave waiting at a barrier. Check this condition BEFORE checking

     // for instruction buffer occupancy to avoid a deadlock when the barrier is

     // the last instruction in the instruction buffer.

     if (w->getStatus() == Wavefront::S_BARRIER) {

         assert(w->hasBarrier());

         int bar_id = w->barrierId();

         if (!computeUnit.allAtBarrier(bar_id)) {

             DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalled at "

                     "barrier Id%d. %d waves remain.\n", w->computeUnit->cu_id,

                     w->simdId, w->wfSlotId, w->wfDynId, bar_id,

                     w->computeUnit->numYetToReachBarrier(bar_id));

             // Are all threads at barrier?

             *rdyStatus = NRDY_BARRIER_WAIT;

             return false;

         }

         DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves at barrier "

                 "Id%d. Resetting barrier resources.\n", w->computeUnit->cu_id,

                 w->simdId, w->wfSlotId, w->wfDynId, bar_id);

         computeUnit.resetBarrier(bar_id);

         computeUnit.releaseWFsFromBarrier(bar_id);

     }


     // Check WF status: it has to be running

     if (w->getStatus() == Wavefront::S_STOPPED ||

         w->getStatus() == Wavefront::S_RETURNING ||

         w->getStatus() == Wavefront::S_STALLED) {

         *rdyStatus = NRDY_WF_STOP;

         return false;

     }


     // is the Instruction buffer empty

     if ( w->instructionBuffer.empty()) {

         *rdyStatus = NRDY_IB_EMPTY;

         return false;

     }


     // Check next instruction from instruction buffer

     GPUDynInstPtr ii = w->nextInstr();

     // Only instruction in the instruction buffer has been dispatched.

     // No need to check it again for readiness

     if (!ii) {

         *rdyStatus = NRDY_IB_EMPTY;

         return false;

     }


     // The following code is very error prone and the entire process for

     // checking readiness will be fixed eventually.  In the meantime, let's

     // make sure that we do not silently let an instruction type slip

     // through this logic and always return not ready.

     if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||

          ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||

          ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||

          ii->isFlatGlobal() || ii->isSleep() || ii->isLocalMem())) {

         panic("next instruction: %s is of unknown type\n", ii->disassemble());

     }


     DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Checking Ready for Inst : %s\n",

             computeUnit.cu_id, w->simdId, w->wfSlotId, ii->disassemble());


     // Non-scalar (i.e., vector) instructions may use VGPRs

     if (!ii->isScalar()) {

         if (!computeUnit.vrf[w->simdId]->operandsReady(w, ii)) {

             *rdyStatus = NRDY_VGPR_NRDY;

             return false;

         }

     }

     // Scalar and non-scalar instructions may use SGPR

     if (!computeUnit.srf[w->simdId]->operandsReady(w, ii)) {

         *rdyStatus = NRDY_SGPR_NRDY;

         return false;

     }


     // The hardware implicitly executes S_WAITCNT 0 before executing

     // the S_ENDPGM instruction. Implementing this implicit S_WAITCNT.

     // isEndOfKernel() is used to identify the S_ENDPGM instruction

     // On identifying it, we do the following:

     // 1. Wait for all older instruction to execute

     // 2. Once all the older instruction are executed, we add a wait

     //    count for the executed instruction(s) to complete.

     if (ii->isEndOfKernel()) {

         // Waiting for older instruction to execute

         if (w->instructionBuffer.front()->seqNum() != ii->seqNum()) {

             *rdyStatus = NRDY_WAIT_CNT;

             return false;

         }

         // Older instructions have executed, adding implicit wait count

         w->setStatus(Wavefront::S_WAITCNT);

         w->setWaitCnts(0, 0, 0);

         if (!w->waitCntsSatisfied()) {

             *rdyStatus = NRDY_WAIT_CNT;

             return false;

         }

     }

     DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit.cu_id,

             w->simdId, w->wfSlotId, ii->disassemble());

     *exeResType = mapWaveToExeUnit(w);

     *rdyStatus = INST_RDY;

     return true;

 }


 int

 ScoreboardCheckStage::mapWaveToExeUnit(Wavefront *w)

 {

     GPUDynInstPtr ii = w->nextInstr();

     assert(ii);

     if (ii->isFlat()) {

         return w->globalMem;

     } else if (ii->isLocalMem()) {

         return w->localMem;

     } else if (ii->isGlobalMem()) {

         if (!ii->isScalar()) {

             return w->globalMem;

         } else {

             return w->scalarMem;

         }

     } else if (ii->isBranch() ||

                ii->isALU() ||

                (ii->isKernArgSeg() && ii->isLoad()) ||

                ii->isArgSeg() ||

                ii->isReturn() ||

                ii->isEndOfKernel() ||

                ii->isNop() ||

                ii->isBarrier()) {

         if (!ii->isScalar()) {

             return w->simdId;

         } else {

             return w->scalarAluGlobalIdx;

         }

     }

     panic("%s: unmapped to an execution resource", ii->disassemble());

     return computeUnit.numExeUnits();

 }


 void

 ScoreboardCheckStage::exec()

 {

     toSchedule.reset();


     // Iterate over all WF slots across all SIMDs.

     for (int simdId = 0; simdId < computeUnit.numVectorALUs; ++simdId) {

         for (int wfSlot = 0; wfSlot < computeUnit.shader->n_wf; ++wfSlot) {

             // reset the ready status of each wavefront

             Wavefront *curWave = computeUnit.wfList[simdId][wfSlot];

             nonrdytype_e rdyStatus = NRDY_ILLEGAL;

             int exeResType = -1;

             // check WF readiness: If the WF's oldest

             // instruction is ready to issue then add the WF to the ready list

             if (ready(curWave, &rdyStatus, &exeResType, wfSlot)) {

                 assert(curWave->simdId == simdId);

                 DPRINTF(GPUSched,

                         "Adding to readyList[%d]: SIMD[%d] WV[%d]: %d: %s\n",

                         exeResType,

                         curWave->simdId, curWave->wfDynId,

                         curWave->nextInstr()->seqNum(),

                         curWave->nextInstr()->disassemble());

                 toSchedule.markWFReady(curWave, exeResType);

             }

             collectStatistics(rdyStatus);

         }

     }

 }


 ScoreboardCheckStage::

 ScoreboardCheckStageStats::ScoreboardCheckStageStats(statistics::Group *parent)

     : statistics::Group(parent, "ScoreboardCheckStage"),

       ADD_STAT(stallCycles, "number of cycles wave stalled in SCB")

 {

     stallCycles.init(NRDY_CONDITIONS);


     stallCycles.subname(NRDY_WF_STOP, csprintf("WFStop"));

     stallCycles.subname(NRDY_IB_EMPTY, csprintf("IBEmpty"));

     stallCycles.subname(NRDY_WAIT_CNT, csprintf("WaitCnt"));

     stallCycles.subname(NRDY_BARRIER_WAIT, csprintf("BarrierWait"));

     stallCycles.subname(NRDY_VGPR_NRDY, csprintf("VgprBusy"));

     stallCycles.subname(NRDY_SGPR_NRDY, csprintf("SgprBusy"));

     stallCycles.subname(INST_RDY, csprintf("InstrReady"));

 }


 } // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186

gem5::ComputeUnit
Definition: compute_unit.hh:202

gem5::ComputeUnit::releaseWFsFromBarrier
void releaseWFsFromBarrier(int bar_id)
Definition: compute_unit.cc:711

gem5::ComputeUnit::resetBarrier
void resetBarrier(int bar_id)
Definition: compute_unit.cc:689

gem5::ComputeUnit::numExeUnits
int numExeUnits() const
Definition: compute_unit.cc:238

gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:297

gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292

gem5::ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:291

gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:295

gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:244

gem5::ComputeUnit::allAtBarrier
bool allAtBarrier(int bar_id)
Definition: compute_unit.cc:661

gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353

gem5::ScoreboardCheckStage::ready
bool ready(Wavefront *w, nonrdytype_e *rdyStatus, int *exeResType, int wfSlot)
Definition: scoreboard_check_stage.cc:75

gem5::ScoreboardCheckStage::ScoreboardCheckStage
ScoreboardCheckStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &to_schedule)
Definition: scoreboard_check_stage.cc:48

gem5::ScoreboardCheckStage::stats
gem5::ScoreboardCheckStage::ScoreboardCheckStageStats stats

gem5::ScoreboardCheckStage::~ScoreboardCheckStage
~ScoreboardCheckStage()
Definition: scoreboard_check_stage.cc:57

gem5::ScoreboardCheckStage::collectStatistics
void collectStatistics(nonrdytype_e rdyStatus)
Definition: scoreboard_check_stage.cc:62

gem5::ScoreboardCheckStage::exec
void exec()
Definition: scoreboard_check_stage.cc:248

gem5::ScoreboardCheckStage::nonrdytype_e
nonrdytype_e
Definition: scoreboard_check_stage.hh:65

gem5::ScoreboardCheckStage::NRDY_SGPR_NRDY
@ NRDY_SGPR_NRDY
Definition: scoreboard_check_stage.hh:73

gem5::ScoreboardCheckStage::NRDY_ILLEGAL
@ NRDY_ILLEGAL
Definition: scoreboard_check_stage.hh:66

gem5::ScoreboardCheckStage::NRDY_SLEEP
@ NRDY_SLEEP
Definition: scoreboard_check_stage.hh:70

gem5::ScoreboardCheckStage::INST_RDY
@ INST_RDY
Definition: scoreboard_check_stage.hh:74

gem5::ScoreboardCheckStage::NRDY_CONDITIONS
@ NRDY_CONDITIONS
Definition: scoreboard_check_stage.hh:75

gem5::ScoreboardCheckStage::NRDY_WAIT_CNT
@ NRDY_WAIT_CNT
Definition: scoreboard_check_stage.hh:69

gem5::ScoreboardCheckStage::NRDY_IB_EMPTY
@ NRDY_IB_EMPTY
Definition: scoreboard_check_stage.hh:68

gem5::ScoreboardCheckStage::NRDY_BARRIER_WAIT
@ NRDY_BARRIER_WAIT
Definition: scoreboard_check_stage.hh:71

gem5::ScoreboardCheckStage::NRDY_VGPR_NRDY
@ NRDY_VGPR_NRDY
Definition: scoreboard_check_stage.hh:72

gem5::ScoreboardCheckStage::NRDY_WF_STOP
@ NRDY_WF_STOP
Definition: scoreboard_check_stage.hh:67

gem5::ScoreboardCheckStage::computeUnit
ComputeUnit & computeUnit
Definition: scoreboard_check_stage.hh:91

gem5::ScoreboardCheckStage::toSchedule
ScoreboardCheckToSchedule & toSchedule
Interface between scoreboard check and schedule stages.
Definition: scoreboard_check_stage.hh:98

gem5::ScoreboardCheckStage::mapWaveToExeUnit
int mapWaveToExeUnit(Wavefront *w)
Definition: scoreboard_check_stage.cc:206

gem5::ScoreboardCheckToSchedule
Communication interface between ScoreboardCheck and Schedule stages.
Definition: comm.hh:63

gem5::ScoreboardCheckToSchedule::reset
void reset() override
Reset the pipe stage interface.
Definition: comm.cc:59

gem5::ScoreboardCheckToSchedule::markWFReady
void markWFReady(Wavefront *wf, int func_unit_id)
Mark the WF as ready for execution on a particular functional unit.
Definition: comm.cc:67

gem5::Shader::n_wf
int n_wf
Definition: shader.hh:235

gem5::Wavefront
Definition: wavefront.hh:61

gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:99

gem5::Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1191

gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:92

gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:74

gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88

gem5::Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:68

gem5::Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:72

gem5::Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:66

gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:226

gem5::statistics::DataWrapVec::subname
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
Definition: statistics.hh:402

gem5::statistics::Group
Statistics container.
Definition: group.hh:94

gem5::statistics::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1040

compute_unit.hh

gpu_static_inst.hh

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204

gem5::VegaISA::w
Bitfield< 6 > w
Definition: pagetable.hh:59

gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70

gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:38

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49

gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161

scalar_register_file.hh

scoreboard_check_stage.hh

shader.hh

gem5::ScoreboardCheckStage::ScoreboardCheckStageStats::stallCycles
statistics::Vector stallCycles
Definition: scoreboard_check_stage.hh:107

gem5::ScoreboardCheckStage::ScoreboardCheckStageStats::ScoreboardCheckStageStats
ScoreboardCheckStageStats(statistics::Group *parent)
Definition: scoreboard_check_stage.cc:281

name
const std::string & name()
Definition: trace.cc:49

vector_register_file.hh

wavefront.hh