Go to the documentation of this file.
36 #include <unordered_set>
38 #include "debug/GPUSched.hh"
39 #include "debug/GPUVRF.hh"
49 : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
50 toExecute(to_execute),
51 _name(cu.
name() +
".ScheduleStage"),
52 vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
53 scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
54 locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
78 "Scheduler should have same number of entries as CU's readyList");
119 for (
int j = firstMemUnit;
j <= lastMemUnit;
j++) {
123 if (!readyListSize) {
132 assert(gpu_dyn_inst);
139 if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
143 if (gpu_dyn_inst->isFlat()) {
147 if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
156 if (
j >= firstMemUnit &&
j <= lastMemUnit) {
162 if (!readyListSize) {
171 assert(gpu_dyn_inst);
226 assert(gpu_dyn_inst);
227 Wavefront *wf = gpu_dyn_inst->wavefront();
228 bool accessVrfWr =
true;
229 if (!gpu_dyn_inst->isScalar()) {
231 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
234 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
235 bool accessRf = accessVrfWr && accessSrfWr;
237 if (!gpu_dyn_inst->isScalar()) {
271 assert(gpu_dyn_inst);
272 Wavefront *wf = gpu_dyn_inst->wavefront();
296 assert(gpu_dyn_inst);
297 Wavefront *wf = gpu_dyn_inst->wavefront();
298 bool accessVrf =
true;
299 if (!gpu_dyn_inst->isScalar()) {
301 ->canScheduleReadOperands(wf, gpu_dyn_inst);
304 ->canScheduleReadOperands(wf, gpu_dyn_inst);
308 bool accessRf = accessVrf && accessSrf;
310 DPRINTF(GPUSched,
"schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
312 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
316 schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst,
RFBUSY));
326 if (!gpu_dyn_inst->isScalar()) {
328 ->scheduleReadOperands(wf, gpu_dyn_inst);
332 DPRINTF(GPUSched,
"schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
334 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
351 DPRINTF(GPUSched,
"schList[%d]: Could not add: "
352 "SIMD[%d] WV[%d]: %d: %s\n",
354 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
366 assert(gpu_dyn_inst);
367 auto schIter =
schList.at(exeType).begin();
368 while (schIter !=
schList.at(exeType).end()
369 && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
372 schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst,
RFREADY));
416 assert(gpu_dyn_inst);
417 Wavefront *wf = gpu_dyn_inst->wavefront();
428 if (gpu_dyn_inst->isNop()) {
435 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
439 }
else if (gpu_dyn_inst->isEndOfKernel()) {
445 }
else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
446 || gpu_dyn_inst->isALU()) {
451 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
455 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
477 }
else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
498 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
517 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
545 panic(
"%s: unknown instr checked for readiness",
546 gpu_dyn_inst->disassemble());
563 auto schIter =
schList.at(
j).begin();
564 bool dispatched =
false;
565 while (schIter !=
schList.at(
j).end()) {
567 if (schIter->second ==
RFREADY) {
570 if (!dispatched && dispRdy) {
579 if (!
mp->isMemSync() && !
mp->isScalar() &&
580 (
mp->isGlobalMem() ||
mp->isFlat())) {
585 DPRINTF(GPUSched,
"dispatchList[%d]: fillDispatchList: "
586 "EMPTY->EXREADY\n",
j);
587 schIter->first =
nullptr;
588 schIter =
schList.at(
j).erase(schIter);
593 schIter->first->wavefront()->stats.schStalls++;
596 schIter->first->wavefront()->stats.schResourceStalls++;
634 ==
EXREADY && gpu_dyn_inst->isFlat()) {
635 Wavefront *wf = gpu_dyn_inst->wavefront();
646 ->wavefront()->stats.schLdsArbStalls++;
652 DPRINTF(GPUSched,
"dispatchList[%d]: arbVrfLds: "
667 assert(gpu_dyn_inst);
668 Wavefront *wf = gpu_dyn_inst->wavefront();
675 if (!gpu_dyn_inst->isScalar()) {
677 ->operandReadComplete(wf, gpu_dyn_inst);
680 ->operandReadComplete(wf, gpu_dyn_inst);
681 bool operandsReady = vrfRdy && srfRdy;
683 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands ready for: "
684 "%d: %s\n",
j, wf->
wfDynId, gpu_dyn_inst->seqNum(),
685 gpu_dyn_inst->disassemble());
686 DPRINTF(GPUSched,
"schList[%d]: WV[%d] RFBUSY->RFREADY\n",
690 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands not ready "
692 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
724 Wavefront *wf = gpu_dyn_inst->wavefront();
731 if (!gpu_dyn_inst->isScalar()) {
733 ->dispatchInstruction(gpu_dyn_inst);
737 std::stringstream
ss;
738 for (
auto id : execUnitIds) {
741 DPRINTF(GPUSched,
"dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
742 " Reserving ExeRes[ %s]\n",
744 gpu_dyn_inst->disassemble(),
ss.str());
746 for (
auto execUnitId : execUnitIds) {
747 panic_if(exeUnitReservations.at(execUnitId),
748 "Execution unit %d is reserved!!!\n"
749 "SIMD[%d] WV[%d]: %d: %s",
751 gpu_dyn_inst->seqNum(),
752 gpu_dyn_inst->disassemble());
753 exeUnitReservations.at(execUnitId) =
true;
760 if (execUnitIds.size() > 1) {
761 M5_VAR_USED
int lm_exec_unit = wf->
localMem;
765 }
else if (
s ==
SKIP) {
770 M5_VAR_USED
int gm_exec_unit = wf->
globalMem;
788 :
Stats::Group(parent,
"ScheduleStage"),
789 ADD_STAT(rdyListEmpty ,
"number of cycles no wave on ready list per "
790 "execution resource"),
791 ADD_STAT(rdyListNotEmpty,
"number of cycles one or more wave on ready "
792 "list per execution resource"),
793 ADD_STAT(addToSchListStalls,
"number of cycles a wave is not added to "
794 "schList per execution resource when ready list is not empty"),
795 ADD_STAT(schListToDispList,
"number of cycles a wave is added to "
796 "dispatchList per execution resource"),
797 ADD_STAT(schListToDispListStalls,
"number of cycles no wave is added to"
798 " dispatchList per execution resource"),
799 ADD_STAT(rfAccessStalls,
"number of stalls due to RF access denied"),
800 ADD_STAT(ldsBusArbStalls,
"number of stalls due to VRF->LDS bus "
802 ADD_STAT(opdNrdyStalls,
"number of stalls in SCH due to operands not "
804 ADD_STAT(dispNrdyStalls,
"number of stalls in SCH due to resource not "
ScheduleStageStats(Stats::Group *parent, int num_exec_units)
std::vector< WaitClass > vectorALUs
Stats::Vector rfAccessStalls
WaitClass vectorSharedMemUnit
void acqCoalescerToken(GPUDynInstPtr mp)
WaitClass vrfToGlobalMemPipeBus
@ SCH_VECTOR_MEM_COALESCER_NRDY
LocalMemPipeline localMemoryPipe
void reset() override
Reset the pipe stage interface.
@ SCH_VECTOR_MEM_ISSUE_NRDY
GPUDynInstPtr & readyInst(int func_unit_id)
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
WaitClass vectorGlobalMemUnit
@ SCH_FLAT_MEM_COALESCER_NRDY
void deleteFromSch(Wavefront *w)
@ S_BARRIER
WF is stalled at a barrier.
void insertInPipeMap(Wavefront *w)
Wavefront::WavefrontStats stats
std::vector< Scheduler > scheduler
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
int numVectorGlobalMemUnits
std::vector< WaitClass > scalarALUs
@ SCH_RF_ACCESS_NRDY_CONDITIONS
bool rdy(Cycles cycles=Cycles(0)) const
@ SCH_RF_OPD_NRDY_CONDITIONS
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
void setStatus(status_e newStatus)
bool outstandingReqsCheck(GPUDynInstPtr mp) const
@ SCH_SCALAR_MEM_ISSUE_NRDY
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_LOCAL_MEM_FIFO_NRDY
std::vector< ScalarRegisterFile * > srf
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
Stats::Scalar ldsBusArbStalls
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
int numVectorSharedMemUnits
Communication interface between Schedule and Execute stages.
Stats::Scalar schRfAccessStalls
bool isOldestInstWaitcnt()
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
std::vector< VectorRegisterFile * > vrf
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ S_WAITCNT
wavefront has unsatisfied wait counts
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
@ SCH_VECTOR_MEM_REQS_NRDY
Stats::Scalar schOpdNrdyStalls
@ SCH_FLAT_MEM_ISSUE_NRDY
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
ComputeUnit & computeUnit
WaitClass vrfToLocalMemPipeBus
void scheduleRfDestOperands()
ScheduleToExecute & toExecute
ScalarMemPipeline scalarMemoryPipe
Stats::Vector rdyListNotEmpty
WaitClass srfToScalarMemPipeBus
ScoreboardCheckToSchedule & fromScoreboardCheck
const std::string & name()
Derived & init(size_type size)
Set this vector to have the given size.
GlobalMemPipeline globalMemoryPipe
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
@ SCH_SCALAR_MEM_FIFO_NRDY
bool isOldestInstBarrier()
Communication interface between ScoreboardCheck and Schedule stages.
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
Stats::Vector opdNrdyStalls
Stats::Vector schListToDispList
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Cycles is a wrapper class for representing cycle counts, i.e.
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Stats::Vector schListToDispListStalls
std::deque< GPUDynInstPtr > instructionBuffer
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
@ SCH_LOCAL_MEM_ISSUE_NRDY
std::unordered_set< uint64_t > wavesInSch
void incVMemInstsIssued()
Stats::Vector addToSchListStalls
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void checkRfOperandReadComplete()
Stats::Vector rdyListEmpty
bool coalescerReady(GPUDynInstPtr mp) const
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
ScheduleStage::ScheduleStageStats stats
std::string csprintf(const char *format, const Args &...args)
void arbitrateVrfToLdsBus()
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
std::vector< int > reserveResources()
Stats::Vector dispNrdyStalls
#define panic(...)
This implements a cprintf based panic() function.
void incLGKMInstsIssued()
Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17