Go to the documentation of this file.
36 #include <unordered_set>
39 #include "debug/GPUSched.hh"
40 #include "debug/GPUVRF.hh"
53 : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
54 toExecute(to_execute),
55 _name(cu.
name() +
".ScheduleStage"),
56 vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
57 scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
58 locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
82 "Scheduler should have same number of entries as CU's readyList");
123 for (
int j = firstMemUnit;
j <= lastMemUnit;
j++) {
127 if (!readyListSize) {
136 assert(gpu_dyn_inst);
143 if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
147 if (gpu_dyn_inst->isFlat()) {
151 if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
160 if (
j >= firstMemUnit &&
j <= lastMemUnit) {
166 if (!readyListSize) {
175 assert(gpu_dyn_inst);
230 assert(gpu_dyn_inst);
231 Wavefront *wf = gpu_dyn_inst->wavefront();
232 bool accessVrfWr =
true;
233 if (!gpu_dyn_inst->isScalar()) {
235 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
238 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
239 bool accessRf = accessVrfWr && accessSrfWr;
241 if (!gpu_dyn_inst->isScalar()) {
275 assert(gpu_dyn_inst);
276 Wavefront *wf = gpu_dyn_inst->wavefront();
300 assert(gpu_dyn_inst);
301 Wavefront *wf = gpu_dyn_inst->wavefront();
302 bool accessVrf =
true;
303 if (!gpu_dyn_inst->isScalar()) {
305 ->canScheduleReadOperands(wf, gpu_dyn_inst);
308 ->canScheduleReadOperands(wf, gpu_dyn_inst);
312 bool accessRf = accessVrf && accessSrf;
314 DPRINTF(GPUSched,
"schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
316 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
320 schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst,
RFBUSY));
330 if (!gpu_dyn_inst->isScalar()) {
332 ->scheduleReadOperands(wf, gpu_dyn_inst);
336 DPRINTF(GPUSched,
"schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
338 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
355 DPRINTF(GPUSched,
"schList[%d]: Could not add: "
356 "SIMD[%d] WV[%d]: %d: %s\n",
358 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
370 assert(gpu_dyn_inst);
371 auto schIter =
schList.at(exeType).begin();
372 while (schIter !=
schList.at(exeType).end()
373 && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
376 schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst,
RFREADY));
420 assert(gpu_dyn_inst);
421 Wavefront *wf = gpu_dyn_inst->wavefront();
432 if (gpu_dyn_inst->isNop()) {
439 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
443 }
else if (gpu_dyn_inst->isEndOfKernel()) {
449 }
else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
450 || gpu_dyn_inst->isALU()) {
455 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
459 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
481 }
else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
502 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
521 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
549 panic(
"%s: unknown instr checked for readiness",
550 gpu_dyn_inst->disassemble());
567 auto schIter =
schList.at(
j).begin();
568 bool dispatched =
false;
569 while (schIter !=
schList.at(
j).end()) {
571 if (schIter->second ==
RFREADY) {
574 if (!dispatched && dispRdy) {
583 if (!
mp->isMemSync() && !
mp->isScalar() &&
584 (
mp->isGlobalMem() ||
mp->isFlat())) {
589 if (
mp->isMemRef()) {
590 mp->exec_mask =
mp->wavefront()->execMask();
594 DPRINTF(GPUSched,
"dispatchList[%d]: fillDispatchList: "
595 "EMPTY->EXREADY\n",
j);
596 schIter->first =
nullptr;
597 schIter =
schList.at(
j).erase(schIter);
602 schIter->first->wavefront()->stats.schStalls++;
605 schIter->first->wavefront()->stats.schResourceStalls++;
643 ==
EXREADY && gpu_dyn_inst->isFlat()) {
644 Wavefront *wf = gpu_dyn_inst->wavefront();
655 ->wavefront()->stats.schLdsArbStalls++;
661 DPRINTF(GPUSched,
"dispatchList[%d]: arbVrfLds: "
676 assert(gpu_dyn_inst);
677 Wavefront *wf = gpu_dyn_inst->wavefront();
684 if (!gpu_dyn_inst->isScalar()) {
686 ->operandReadComplete(wf, gpu_dyn_inst);
689 ->operandReadComplete(wf, gpu_dyn_inst);
690 bool operandsReady = vrfRdy && srfRdy;
692 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands ready for: "
693 "%d: %s\n",
j, wf->
wfDynId, gpu_dyn_inst->seqNum(),
694 gpu_dyn_inst->disassemble());
695 DPRINTF(GPUSched,
"schList[%d]: WV[%d] RFBUSY->RFREADY\n",
699 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands not ready "
701 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
733 Wavefront *wf = gpu_dyn_inst->wavefront();
740 if (!gpu_dyn_inst->isScalar()) {
742 ->dispatchInstruction(gpu_dyn_inst);
746 std::stringstream
ss;
747 for (
auto id : execUnitIds) {
750 DPRINTF(GPUSched,
"dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
751 " Reserving ExeRes[ %s]\n",
753 gpu_dyn_inst->disassemble(),
ss.str());
755 for (
auto execUnitId : execUnitIds) {
756 panic_if(exeUnitReservations.at(execUnitId),
757 "Execution unit %d is reserved!!!\n"
758 "SIMD[%d] WV[%d]: %d: %s",
760 gpu_dyn_inst->seqNum(),
761 gpu_dyn_inst->disassemble());
762 exeUnitReservations.at(execUnitId) =
true;
769 if (execUnitIds.size() > 1) {
770 GEM5_VAR_USED
int lm_exec_unit = wf->
localMem;
774 }
else if (
s ==
SKIP) {
779 GEM5_VAR_USED
int gm_exec_unit = wf->
globalMem;
797 : statistics::
Group(parent,
"ScheduleStage"),
798 ADD_STAT(rdyListEmpty ,
"number of cycles no wave on ready list per "
799 "execution resource"),
800 ADD_STAT(rdyListNotEmpty,
"number of cycles one or more wave on ready "
801 "list per execution resource"),
802 ADD_STAT(addToSchListStalls,
"number of cycles a wave is not added to "
803 "schList per execution resource when ready list is not empty"),
804 ADD_STAT(schListToDispList,
"number of cycles a wave is added to "
805 "dispatchList per execution resource"),
806 ADD_STAT(schListToDispListStalls,
"number of cycles no wave is added to"
807 " dispatchList per execution resource"),
808 ADD_STAT(rfAccessStalls,
"number of stalls due to RF access denied"),
809 ADD_STAT(ldsBusArbStalls,
"number of stalls due to VRF->LDS bus "
811 ADD_STAT(opdNrdyStalls,
"number of stalls in SCH due to operands not "
813 ADD_STAT(dispNrdyStalls,
"number of stalls in SCH due to resource not "
std::unordered_set< uint64_t > wavesInSch
ScoreboardCheckToSchedule & fromScoreboardCheck
void scheduleRfDestOperands()
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
statistics::Vector addToSchListStalls
statistics::Scalar ldsBusArbStalls
LocalMemPipeline localMemoryPipe
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
statistics::Scalar schStalls
bool isOldestInstBarrier()
std::vector< ScalarRegisterFile * > srf
Communication interface between Schedule and Execute stages.
std::vector< Scheduler > scheduler
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
statistics::Scalar schOpdNrdyStalls
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
statistics::Vector rfAccessStalls
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
WaitClass srfToScalarMemPipeBus
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
std::string csprintf(const char *format, const Args &...args)
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_FLAT_MEM_ISSUE_NRDY
statistics::Scalar schCycles
WaitClass vrfToGlobalMemPipeBus
int numVectorSharedMemUnits
std::vector< VectorRegisterFile * > vrf
@ SCH_LOCAL_MEM_FIFO_NRDY
statistics::Vector rdyListEmpty
ScheduleToExecute & toExecute
Cycles is a wrapper class for representing cycle counts, i.e.
void setStatus(status_e newStatus)
statistics::Scalar schRfAccessStalls
GPUDynInstPtr & readyInst(int func_unit_id)
@ SCH_RF_OPD_NRDY_CONDITIONS
statistics::Vector schListToDispList
WaitClass vectorSharedMemUnit
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void incLGKMInstsIssued()
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_VECTOR_MEM_ISSUE_NRDY
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
GlobalMemPipeline globalMemoryPipe
@ S_BARRIER
WF is stalled at a barrier.
void reset() override
Reset the pipe stage interface.
void arbitrateVrfToLdsBus()
void deleteFromSch(Wavefront *w)
void checkRfOperandReadComplete()
bool isOldestInstWaitcnt()
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
ScheduleStageStats(statistics::Group *parent, int num_exec_units)
statistics::Vector schListToDispListStalls
statistics::Vector rdyListNotEmpty
statistics::Vector opdNrdyStalls
std::vector< WaitClass > scalarALUs
void acqCoalescerToken(GPUDynInstPtr mp)
const std::string & name()
@ SCH_VECTOR_MEM_REQS_NRDY
bool outstandingReqsCheck(GPUDynInstPtr mp) const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_ISSUE_NRDY
@ SCH_LOCAL_MEM_ISSUE_NRDY
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
int numVectorGlobalMemUnits
bool coalescerReady(GPUDynInstPtr mp) const
@ SCH_SCALAR_MEM_FIFO_NRDY
std::vector< int > reserveResources()
Communication interface between ScoreboardCheck and Schedule stages.
gem5::ScheduleStage::ScheduleStageStats stats
ComputeUnit & computeUnit
void insertInPipeMap(Wavefront *w)
statistics::Vector dispNrdyStalls
@ S_WAITCNT
wavefront has unsatisfied wait counts
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
WaitClass vrfToLocalMemPipeBus
std::vector< WaitClass > vectorALUs
std::deque< GPUDynInstPtr > instructionBuffer
gem5::Wavefront::WavefrontStats stats
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_VECTOR_MEM_COALESCER_NRDY
@ SCH_RF_ACCESS_NRDY_CONDITIONS
void incVMemInstsIssued()
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
ScalarMemPipeline scalarMemoryPipe
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
WaitClass vectorGlobalMemUnit
Derived & init(size_type size)
Set this vector to have the given size.
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
bool rdy(Cycles cycles=Cycles(0)) const
@ SCH_FLAT_MEM_COALESCER_NRDY
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
#define panic(...)
This implements a cprintf based panic() function.
Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17