Go to the documentation of this file.
   36 #include <unordered_set> 
   38 #include "debug/GPUSched.hh" 
   39 #include "debug/GPUVRF.hh" 
   49     : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
 
   50       toExecute(to_execute),
 
   51       _name(cu.
name() + 
".ScheduleStage"),
 
   52       vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
 
   53       scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
 
   54       locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
 
   78              "Scheduler should have same number of entries as CU's readyList");
 
  119     for (
int j = firstMemUnit; 
j <= lastMemUnit; 
j++) {
 
  123         if (!readyListSize) {
 
  132         assert(gpu_dyn_inst);
 
  139             if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
 
  143                 if (gpu_dyn_inst->isFlat()) {
 
  147             if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
 
  156         if (
j >= firstMemUnit && 
j <= lastMemUnit) {
 
  162         if (!readyListSize) {
 
  171         assert(gpu_dyn_inst);
 
  226     assert(gpu_dyn_inst);
 
  227     Wavefront *wf = gpu_dyn_inst->wavefront();
 
  228     bool accessVrfWr = 
true;
 
  229     if (!gpu_dyn_inst->isScalar()) {
 
  231             ->canScheduleWriteOperands(wf, gpu_dyn_inst);
 
  234         ->canScheduleWriteOperands(wf, gpu_dyn_inst);
 
  235     bool accessRf = accessVrfWr && accessSrfWr;
 
  237         if (!gpu_dyn_inst->isScalar()) {
 
  271         assert(gpu_dyn_inst);
 
  272         Wavefront *wf = gpu_dyn_inst->wavefront();
 
  296     assert(gpu_dyn_inst);
 
  297     Wavefront *wf = gpu_dyn_inst->wavefront();
 
  298     bool accessVrf = 
true;
 
  299     if (!gpu_dyn_inst->isScalar()) {
 
  301             ->canScheduleReadOperands(wf, gpu_dyn_inst);
 
  304         ->canScheduleReadOperands(wf, gpu_dyn_inst);
 
  308     bool accessRf = accessVrf && accessSrf;
 
  310         DPRINTF(GPUSched, 
"schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
 
  312                 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
 
  316         schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst, 
RFBUSY));
 
  326         if (!gpu_dyn_inst->isScalar()) {
 
  328                 ->scheduleReadOperands(wf, gpu_dyn_inst);
 
  332         DPRINTF(GPUSched, 
"schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
 
  334                 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
 
  351         DPRINTF(GPUSched, 
"schList[%d]: Could not add: " 
  352                 "SIMD[%d] WV[%d]: %d: %s\n",
 
  354                 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
 
  366     assert(gpu_dyn_inst);
 
  367     auto schIter = 
schList.at(exeType).begin();
 
  368     while (schIter != 
schList.at(exeType).end()
 
  369            && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
 
  372     schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst, 
RFREADY));
 
  416     assert(gpu_dyn_inst);
 
  417     Wavefront *wf = gpu_dyn_inst->wavefront();
 
  428     if (gpu_dyn_inst->isNop()) {
 
  435         } 
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
 
  439     } 
else if (gpu_dyn_inst->isEndOfKernel()) {
 
  445     } 
else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
 
  446                || gpu_dyn_inst->isALU()) {
 
  451         } 
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
 
  455     } 
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
 
  477     } 
else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
 
  498     } 
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
 
  517     } 
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
 
  545         panic(
"%s: unknown instr checked for readiness",
 
  546               gpu_dyn_inst->disassemble());
 
  563         auto schIter = 
schList.at(
j).begin();
 
  564         bool dispatched = 
false;
 
  565         while (schIter != 
schList.at(
j).end()) {
 
  567             if (schIter->second == 
RFREADY) {
 
  570                 if (!dispatched && dispRdy) {
 
  579                     if (!
mp->isMemSync() && !
mp->isScalar() &&
 
  580                         (
mp->isGlobalMem() || 
mp->isFlat())) {
 
  585                     DPRINTF(GPUSched, 
"dispatchList[%d]: fillDispatchList: " 
  586                             "EMPTY->EXREADY\n", 
j);
 
  587                     schIter->first = 
nullptr;
 
  588                     schIter = 
schList.at(
j).erase(schIter);
 
  593                     schIter->first->wavefront()->stats.schStalls++;
 
  596                         schIter->first->wavefront()->stats.schResourceStalls++;
 
  634             == 
EXREADY && gpu_dyn_inst->isFlat()) {
 
  635             Wavefront *wf = gpu_dyn_inst->wavefront();
 
  646                     ->wavefront()->stats.schLdsArbStalls++;
 
  652             DPRINTF(GPUSched, 
"dispatchList[%d]: arbVrfLds: " 
  667             assert(gpu_dyn_inst);
 
  668             Wavefront *wf = gpu_dyn_inst->wavefront();
 
  675             if (!gpu_dyn_inst->isScalar()) {
 
  677                     ->operandReadComplete(wf, gpu_dyn_inst);
 
  680                 ->operandReadComplete(wf, gpu_dyn_inst);
 
  681             bool operandsReady = vrfRdy && srfRdy;
 
  683                 DPRINTF(GPUSched, 
"schList[%d]: WV[%d] operands ready for: " 
  684                         "%d: %s\n", 
j, wf->
wfDynId, gpu_dyn_inst->seqNum(),
 
  685                         gpu_dyn_inst->disassemble());
 
  686                 DPRINTF(GPUSched, 
"schList[%d]: WV[%d] RFBUSY->RFREADY\n",
 
  690                 DPRINTF(GPUSched, 
"schList[%d]: WV[%d] operands not ready " 
  692                         gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
 
  724             Wavefront *wf = gpu_dyn_inst->wavefront();
 
  731                 if (!gpu_dyn_inst->isScalar()) {
 
  733                         ->dispatchInstruction(gpu_dyn_inst);
 
  737                 std::stringstream 
ss;
 
  738                 for (
auto id : execUnitIds) {
 
  741                 DPRINTF(GPUSched, 
"dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s" 
  742                         "    Reserving ExeRes[ %s]\n",
 
  744                         gpu_dyn_inst->disassemble(), 
ss.str());
 
  746                 for (
auto execUnitId : execUnitIds) {
 
  747                     panic_if(exeUnitReservations.at(execUnitId),
 
  748                              "Execution unit %d is reserved!!!\n" 
  749                              "SIMD[%d] WV[%d]: %d: %s",
 
  751                              gpu_dyn_inst->seqNum(),
 
  752                              gpu_dyn_inst->disassemble());
 
  753                     exeUnitReservations.at(execUnitId) = 
true;
 
  760                 if (execUnitIds.size() > 1) {
 
  761                     M5_VAR_USED 
int lm_exec_unit = wf->
localMem;
 
  765             } 
else if (
s == 
SKIP) {
 
  770                 M5_VAR_USED 
int gm_exec_unit = wf->
globalMem;
 
  788     : 
Stats::Group(parent, 
"ScheduleStage"),
 
  789       ADD_STAT(rdyListEmpty ,
"number of cycles no wave on ready list per " 
  790                "execution resource"),
 
  791       ADD_STAT(rdyListNotEmpty, 
"number of cycles one or more wave on ready " 
  792                "list per execution resource"),
 
  793       ADD_STAT(addToSchListStalls, 
"number of cycles a wave is not added to " 
  794                "schList per execution resource when ready list is not empty"),
 
  795       ADD_STAT(schListToDispList, 
"number of cycles a wave is added to " 
  796                "dispatchList per execution resource"),
 
  797       ADD_STAT(schListToDispListStalls, 
"number of cycles no wave is added to" 
  798                " dispatchList per execution resource"),
 
  799       ADD_STAT(rfAccessStalls, 
"number of stalls due to RF access denied"),
 
  800       ADD_STAT(ldsBusArbStalls, 
"number of stalls due to VRF->LDS bus " 
  802       ADD_STAT(opdNrdyStalls, 
"number of stalls in SCH due to operands not " 
  804       ADD_STAT(dispNrdyStalls, 
"number of stalls in SCH due to resource not " 
  
ScheduleStageStats(Stats::Group *parent, int num_exec_units)
std::vector< WaitClass > vectorALUs
Stats::Vector rfAccessStalls
WaitClass vectorSharedMemUnit
void acqCoalescerToken(GPUDynInstPtr mp)
WaitClass vrfToGlobalMemPipeBus
@ SCH_VECTOR_MEM_COALESCER_NRDY
LocalMemPipeline localMemoryPipe
void reset() override
Reset the pipe stage interface.
@ SCH_VECTOR_MEM_ISSUE_NRDY
GPUDynInstPtr & readyInst(int func_unit_id)
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
WaitClass vectorGlobalMemUnit
@ SCH_FLAT_MEM_COALESCER_NRDY
void deleteFromSch(Wavefront *w)
@ S_BARRIER
WF is stalled at a barrier.
void insertInPipeMap(Wavefront *w)
Wavefront::WavefrontStats stats
std::vector< Scheduler > scheduler
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
int numVectorGlobalMemUnits
std::vector< WaitClass > scalarALUs
@ SCH_RF_ACCESS_NRDY_CONDITIONS
bool rdy(Cycles cycles=Cycles(0)) const
@ SCH_RF_OPD_NRDY_CONDITIONS
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
void setStatus(status_e newStatus)
bool outstandingReqsCheck(GPUDynInstPtr mp) const
@ SCH_SCALAR_MEM_ISSUE_NRDY
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_LOCAL_MEM_FIFO_NRDY
std::vector< ScalarRegisterFile * > srf
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
Stats::Scalar ldsBusArbStalls
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
int numVectorSharedMemUnits
Communication interface between Schedule and Execute stages.
Stats::Scalar schRfAccessStalls
bool isOldestInstWaitcnt()
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
std::vector< VectorRegisterFile * > vrf
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ S_WAITCNT
wavefront has unsatisfied wait counts
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
@ SCH_VECTOR_MEM_REQS_NRDY
Stats::Scalar schOpdNrdyStalls
@ SCH_FLAT_MEM_ISSUE_NRDY
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
ComputeUnit & computeUnit
WaitClass vrfToLocalMemPipeBus
void scheduleRfDestOperands()
ScheduleToExecute & toExecute
ScalarMemPipeline scalarMemoryPipe
Stats::Vector rdyListNotEmpty
WaitClass srfToScalarMemPipeBus
ScoreboardCheckToSchedule & fromScoreboardCheck
const std::string & name()
Derived & init(size_type size)
Set this vector to have the given size.
GlobalMemPipeline globalMemoryPipe
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
@ SCH_SCALAR_MEM_FIFO_NRDY
bool isOldestInstBarrier()
Communication interface between ScoreboardCheck and Schedule stages.
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
Stats::Vector opdNrdyStalls
Stats::Vector schListToDispList
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Cycles is a wrapper class for representing cycle counts, i.e.
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Stats::Vector schListToDispListStalls
std::deque< GPUDynInstPtr > instructionBuffer
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
@ SCH_LOCAL_MEM_ISSUE_NRDY
std::unordered_set< uint64_t > wavesInSch
void incVMemInstsIssued()
Stats::Vector addToSchListStalls
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void checkRfOperandReadComplete()
Stats::Vector rdyListEmpty
bool coalescerReady(GPUDynInstPtr mp) const
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
ScheduleStage::ScheduleStageStats stats
std::string csprintf(const char *format, const Args &...args)
void arbitrateVrfToLdsBus()
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
std::vector< int > reserveResources()
Stats::Vector dispNrdyStalls
#define panic(...)
This implements a cprintf based panic() function.
void incLGKMInstsIssued()
Generated on Tue Jun 22 2021 15:28:28 for gem5 by  doxygen 1.8.17