Go to the documentation of this file.
36 #include <unordered_set>
38 #include "debug/GPUSched.hh"
39 #include "debug/GPUVRF.hh"
49 : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
50 toExecute(to_execute),
51 _name(cu.
name() +
".ScheduleStage"),
52 vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
53 scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
54 locMemBusRdy(false), locMemIssueRdy(false)
78 "Scheduler should have same number of entries as CU's readyList");
119 for (
int j = firstMemUnit;
j <= lastMemUnit;
j++) {
123 if (!readyListSize) {
132 assert(gpu_dyn_inst);
139 if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
143 if (gpu_dyn_inst->isFlat()) {
153 if (
j >= firstMemUnit &&
j <= lastMemUnit) {
159 if (!readyListSize) {
168 assert(gpu_dyn_inst);
223 assert(gpu_dyn_inst);
224 Wavefront *wf = gpu_dyn_inst->wavefront();
225 bool accessVrfWr =
true;
226 if (!gpu_dyn_inst->isScalar()) {
228 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
231 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
232 bool accessRf = accessVrfWr && accessSrfWr;
234 if (!gpu_dyn_inst->isScalar()) {
268 assert(gpu_dyn_inst);
269 Wavefront *wf = gpu_dyn_inst->wavefront();
293 assert(gpu_dyn_inst);
294 Wavefront *wf = gpu_dyn_inst->wavefront();
295 bool accessVrf =
true;
296 if (!gpu_dyn_inst->isScalar()) {
298 ->canScheduleReadOperands(wf, gpu_dyn_inst);
301 ->canScheduleReadOperands(wf, gpu_dyn_inst);
305 bool accessRf = accessVrf && accessSrf;
307 DPRINTF(GPUSched,
"schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
309 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
313 schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst,
RFBUSY));
317 if (!gpu_dyn_inst->isScalar()) {
319 ->scheduleReadOperands(wf, gpu_dyn_inst);
323 DPRINTF(GPUSched,
"schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
325 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
342 DPRINTF(GPUSched,
"schList[%d]: Could not add: "
343 "SIMD[%d] WV[%d]: %d: %s\n",
345 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
357 assert(gpu_dyn_inst);
358 auto schIter =
schList.at(exeType).begin();
359 while (schIter !=
schList.at(exeType).end()
360 && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
363 schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst,
RFREADY));
407 assert(gpu_dyn_inst);
408 Wavefront *wf = gpu_dyn_inst->wavefront();
419 if (gpu_dyn_inst->isNop()) {
426 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
430 }
else if (gpu_dyn_inst->isEndOfKernel()) {
436 }
else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
437 || gpu_dyn_inst->isALU()) {
442 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
446 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
468 }
else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
489 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
508 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
536 panic(
"%s: unknown instr checked for readiness",
537 gpu_dyn_inst->disassemble());
554 auto schIter =
schList.at(
j).begin();
555 bool dispatched =
false;
556 while (schIter !=
schList.at(
j).end()) {
558 if (schIter->second ==
RFREADY) {
561 if (!dispatched && dispRdy) {
570 if (!
mp->isMemSync() && !
mp->isScalar() &&
571 (
mp->isGlobalMem() ||
mp->isFlat())) {
576 DPRINTF(GPUSched,
"dispatchList[%d]: fillDispatchList: "
577 "EMPTY->EXREADY\n",
j);
578 schIter->first =
nullptr;
579 schIter =
schList.at(
j).erase(schIter);
584 schIter->first->wavefront()->schStalls++;
587 schIter->first->wavefront()->schResourceStalls++;
625 ==
EXREADY && gpu_dyn_inst->isFlat()) {
626 Wavefront *wf = gpu_dyn_inst->wavefront();
637 ->wavefront()->schLdsArbStalls++;
643 DPRINTF(GPUSched,
"dispatchList[%d]: arbVrfLds: "
658 assert(gpu_dyn_inst);
659 Wavefront *wf = gpu_dyn_inst->wavefront();
666 if (!gpu_dyn_inst->isScalar()) {
668 ->operandReadComplete(wf, gpu_dyn_inst);
671 ->operandReadComplete(wf, gpu_dyn_inst);
672 bool operandsReady = vrfRdy && srfRdy;
674 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands ready for: "
675 "%d: %s\n",
j, wf->
wfDynId, gpu_dyn_inst->seqNum(),
676 gpu_dyn_inst->disassemble());
677 DPRINTF(GPUSched,
"schList[%d]: WV[%d] RFBUSY->RFREADY\n",
681 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands not ready "
683 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
715 Wavefront *wf = gpu_dyn_inst->wavefront();
722 if (!gpu_dyn_inst->isScalar()) {
724 ->dispatchInstruction(gpu_dyn_inst);
728 std::stringstream
ss;
729 for (
auto id : execUnitIds) {
732 DPRINTF(GPUSched,
"dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
733 " Reserving ExeRes[ %s]\n",
735 gpu_dyn_inst->disassemble(),
ss.str());
737 for (
auto execUnitId : execUnitIds) {
738 panic_if(exeUnitReservations.at(execUnitId),
739 "Execution unit %d is reserved!!!\n"
740 "SIMD[%d] WV[%d]: %d: %s",
742 gpu_dyn_inst->seqNum(),
743 gpu_dyn_inst->disassemble());
744 exeUnitReservations.at(execUnitId) =
true;
751 if (execUnitIds.size() > 1) {
752 int lm_exec_unit M5_VAR_USED = wf->
localMem;
756 }
else if (
s ==
SKIP) {
761 int gm_exec_unit M5_VAR_USED = wf->
globalMem;
782 .
name(
name() +
".rdy_list_not_empty")
783 .
desc(
"number of cycles one or more wave on ready list per "
784 "execution resource")
790 .
desc(
"number of cycles no wave on ready list per "
791 "execution resource")
796 .
name(
name() +
".sch_list_add_stalls")
797 .
desc(
"number of cycles a wave is not added to schList per "
798 "execution resource when ready list is not empty")
803 .
name(
name() +
".sch_list_to_disp_list")
804 .
desc(
"number of cycles a wave is added to dispatchList per "
805 "execution resource")
810 .
name(
name() +
".sch_list_to_disp_list_stalls")
811 .
desc(
"number of cycles no wave is added to dispatchList per "
812 "execution resource")
819 .
desc(
"number of stalls in SCH due to operands not ready")
829 .
desc(
"number of stalls in SCH due to resource not ready")
866 .
desc(
"number of stalls due to RF access denied")
876 .
name(
name() +
".lds_bus_arb_stalls")
877 .
desc(
"number of stalls due to VRF->LDS bus conflicts")
std::vector< WaitClass > vectorALUs
WaitClass vectorSharedMemUnit
void acqCoalescerToken(GPUDynInstPtr mp)
WaitClass vrfToGlobalMemPipeBus
@ SCH_VECTOR_MEM_COALESCER_NRDY
LocalMemPipeline localMemoryPipe
void reset() override
Reset the pipe stage interface.
@ SCH_VECTOR_MEM_ISSUE_NRDY
Stats::Scalar ldsBusArbStalls
Stats::Scalar schOpdNrdyStalls
GPUDynInstPtr & readyInst(int func_unit_id)
ScheduleStage(const ComputeUnitParams *p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
WaitClass vectorGlobalMemUnit
@ SCH_FLAT_MEM_COALESCER_NRDY
void deleteFromSch(Wavefront *w)
void insertInPipeMap(Wavefront *w)
std::vector< Scheduler > scheduler
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
int numVectorGlobalMemUnits
std::vector< WaitClass > scalarALUs
@ SCH_RF_ACCESS_NRDY_CONDITIONS
bool rdy(Cycles cycles=Cycles(0)) const
@ SCH_RF_OPD_NRDY_CONDITIONS
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
void setStatus(status_e newStatus)
bool outstandingReqsCheck(GPUDynInstPtr mp) const
@ SCH_SCALAR_MEM_ISSUE_NRDY
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_LOCAL_MEM_FIFO_NRDY
std::vector< ScalarRegisterFile * > srf
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
int numVectorSharedMemUnits
Communication interface between Schedule and Execute stages.
Stats::Vector rfAccessStalls
bool isOldestInstWaitcnt()
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
std::vector< VectorRegisterFile * > vrf
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ S_WAITCNT
wavefront has unsatisfied wait counts
Stats::Vector rdyListNotEmpty
@ SCH_VECTOR_MEM_REQS_NRDY
Stats::Vector rdyListEmpty
@ SCH_FLAT_MEM_ISSUE_NRDY
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
ComputeUnit & computeUnit
WaitClass vrfToLocalMemPipeBus
const std::string & name() const
void scheduleRfDestOperands()
ScheduleToExecute & toExecute
ScalarMemPipeline scalarMemoryPipe
WaitClass srfToScalarMemPipeBus
ScoreboardCheckToSchedule & fromScoreboardCheck
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
const std::string & name()
Derived & init(size_type size)
Set this vector to have the given size.
GlobalMemPipeline globalMemoryPipe
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Stats::Vector addToSchListStalls
@ SCH_SCALAR_MEM_FIFO_NRDY
Communication interface between ScoreboardCheck and Schedule stages.
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
Stats::Vector dispNrdyStalls
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Cycles is a wrapper class for representing cycle counts, i.e.
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Stats::Scalar schRfAccessStalls
std::deque< GPUDynInstPtr > instructionBuffer
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
@ SCH_LOCAL_MEM_ISSUE_NRDY
std::unordered_set< uint64_t > wavesInSch
void incVMemInstsIssued()
Stats::Vector schListToDispList
Stats::Vector opdNrdyStalls
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void checkRfOperandReadComplete()
Stats::Vector schListToDispListStalls
bool coalescerReady(GPUDynInstPtr mp) const
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
std::string csprintf(const char *format, const Args &...args)
void arbitrateVrfToLdsBus()
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
std::vector< int > reserveResources()
#define panic(...)
This implements a cprintf based panic() function.
void incLGKMInstsIssued()
Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17