Go to the documentation of this file.
34 #include <unordered_set>
37 #include "debug/GPUSched.hh"
38 #include "debug/GPUVRF.hh"
51 : computeUnit(cu), fromScoreboardCheck(from_scoreboard_check),
52 toExecute(to_execute),
53 _name(cu.
name() +
".ScheduleStage"),
54 vectorAluRdy(false), scalarAluRdy(false), scalarMemBusRdy(false),
55 scalarMemIssueRdy(false), glbMemBusRdy(false), glbMemIssueRdy(false),
56 locMemBusRdy(false), locMemIssueRdy(false), stats(&cu, cu.numExeUnits())
80 "Scheduler should have same number of entries as CU's readyList");
121 for (
int j = firstMemUnit;
j <= lastMemUnit;
j++) {
125 if (!readyListSize) {
134 assert(gpu_dyn_inst);
141 if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
145 if (gpu_dyn_inst->isFlat()) {
149 if (gpu_dyn_inst->isStore() && gpu_dyn_inst->isGlobalSeg()) {
158 if (
j >= firstMemUnit &&
j <= lastMemUnit) {
164 if (!readyListSize) {
173 assert(gpu_dyn_inst);
228 assert(gpu_dyn_inst);
229 Wavefront *wf = gpu_dyn_inst->wavefront();
230 bool accessVrfWr =
true;
231 if (!gpu_dyn_inst->isScalar()) {
233 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
236 ->canScheduleWriteOperands(wf, gpu_dyn_inst);
237 bool accessRf = accessVrfWr && accessSrfWr;
239 if (!gpu_dyn_inst->isScalar()) {
273 assert(gpu_dyn_inst);
274 Wavefront *wf = gpu_dyn_inst->wavefront();
298 assert(gpu_dyn_inst);
299 Wavefront *wf = gpu_dyn_inst->wavefront();
300 bool accessVrf =
true;
301 if (!gpu_dyn_inst->isScalar()) {
303 ->canScheduleReadOperands(wf, gpu_dyn_inst);
306 ->canScheduleReadOperands(wf, gpu_dyn_inst);
310 bool accessRf = accessVrf && accessSrf;
312 DPRINTF(GPUSched,
"schList[%d]: Adding: SIMD[%d] WV[%d]: %d: %s\n",
314 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
318 schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst,
RFBUSY));
328 if (!gpu_dyn_inst->isScalar()) {
330 ->scheduleReadOperands(wf, gpu_dyn_inst);
334 DPRINTF(GPUSched,
"schList[%d]: Added: SIMD[%d] WV[%d]: %d: %s\n",
336 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
353 DPRINTF(GPUSched,
"schList[%d]: Could not add: "
354 "SIMD[%d] WV[%d]: %d: %s\n",
356 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
368 assert(gpu_dyn_inst);
369 auto schIter =
schList.at(exeType).begin();
370 while (schIter !=
schList.at(exeType).end()
371 && schIter->first->wfDynId < gpu_dyn_inst->wfDynId) {
374 schList.at(exeType).insert(schIter, std::make_pair(gpu_dyn_inst,
RFREADY));
418 assert(gpu_dyn_inst);
419 Wavefront *wf = gpu_dyn_inst->wavefront();
430 if (gpu_dyn_inst->isNop()) {
437 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
441 }
else if (gpu_dyn_inst->isEndOfKernel()) {
447 }
else if (gpu_dyn_inst->isBarrier() || gpu_dyn_inst->isBranch()
448 || gpu_dyn_inst->isALU()) {
453 }
else if (!gpu_dyn_inst->isScalar() && !
vectorAluRdy) {
457 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
479 }
else if (gpu_dyn_inst->isScalar() && gpu_dyn_inst->isGlobalMem()) {
500 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isLocalMem()) {
519 }
else if (!gpu_dyn_inst->isScalar() && gpu_dyn_inst->isFlat()) {
547 panic(
"%s: unknown instr checked for readiness",
548 gpu_dyn_inst->disassemble());
565 auto schIter =
schList.at(
j).begin();
566 bool dispatched =
false;
567 while (schIter !=
schList.at(
j).end()) {
569 if (schIter->second ==
RFREADY) {
572 if (!dispatched && dispRdy) {
581 if (!
mp->isMemSync() && !
mp->isScalar() &&
582 (
mp->isGlobalMem() ||
mp->isFlat())) {
587 if (
mp->isMemRef()) {
588 mp->exec_mask =
mp->wavefront()->execMask();
592 DPRINTF(GPUSched,
"dispatchList[%d]: fillDispatchList: "
593 "EMPTY->EXREADY\n",
j);
594 schIter->first =
nullptr;
595 schIter =
schList.at(
j).erase(schIter);
600 schIter->first->wavefront()->stats.schStalls++;
603 schIter->first->wavefront()->stats.schResourceStalls++;
641 ==
EXREADY && gpu_dyn_inst->isFlat()) {
642 Wavefront *wf = gpu_dyn_inst->wavefront();
653 ->wavefront()->stats.schLdsArbStalls++;
659 DPRINTF(GPUSched,
"dispatchList[%d]: arbVrfLds: "
674 assert(gpu_dyn_inst);
675 Wavefront *wf = gpu_dyn_inst->wavefront();
682 if (!gpu_dyn_inst->isScalar()) {
684 ->operandReadComplete(wf, gpu_dyn_inst);
687 ->operandReadComplete(wf, gpu_dyn_inst);
688 bool operandsReady = vrfRdy && srfRdy;
690 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands ready for: "
691 "%d: %s\n",
j, wf->
wfDynId, gpu_dyn_inst->seqNum(),
692 gpu_dyn_inst->disassemble());
693 DPRINTF(GPUSched,
"schList[%d]: WV[%d] RFBUSY->RFREADY\n",
697 DPRINTF(GPUSched,
"schList[%d]: WV[%d] operands not ready "
699 gpu_dyn_inst->seqNum(), gpu_dyn_inst->disassemble());
731 Wavefront *wf = gpu_dyn_inst->wavefront();
738 if (!gpu_dyn_inst->isScalar()) {
740 ->dispatchInstruction(gpu_dyn_inst);
744 std::stringstream
ss;
745 for (
auto id : execUnitIds) {
748 DPRINTF(GPUSched,
"dispatchList[%d]: SIMD[%d] WV[%d]: %d: %s"
749 " Reserving ExeRes[ %s]\n",
751 gpu_dyn_inst->disassemble(),
ss.str());
753 for (
auto execUnitId : execUnitIds) {
754 panic_if(exeUnitReservations.at(execUnitId),
755 "Execution unit %d is reserved!!!\n"
756 "SIMD[%d] WV[%d]: %d: %s",
758 gpu_dyn_inst->seqNum(),
759 gpu_dyn_inst->disassemble());
760 exeUnitReservations.at(execUnitId) =
true;
767 if (execUnitIds.size() > 1) {
768 [[maybe_unused]]
int lm_exec_unit = wf->
localMem;
772 }
else if (
s ==
SKIP) {
777 [[maybe_unused]]
int gm_exec_unit = wf->
globalMem;
795 : statistics::
Group(parent,
"ScheduleStage"),
796 ADD_STAT(rdyListEmpty ,
"number of cycles no wave on ready list per "
797 "execution resource"),
798 ADD_STAT(rdyListNotEmpty,
"number of cycles one or more wave on ready "
799 "list per execution resource"),
800 ADD_STAT(addToSchListStalls,
"number of cycles a wave is not added to "
801 "schList per execution resource when ready list is not empty"),
802 ADD_STAT(schListToDispList,
"number of cycles a wave is added to "
803 "dispatchList per execution resource"),
804 ADD_STAT(schListToDispListStalls,
"number of cycles no wave is added to"
805 " dispatchList per execution resource"),
806 ADD_STAT(rfAccessStalls,
"number of stalls due to RF access denied"),
807 ADD_STAT(ldsBusArbStalls,
"number of stalls due to VRF->LDS bus "
809 ADD_STAT(opdNrdyStalls,
"number of stalls in SCH due to operands not "
811 ADD_STAT(dispNrdyStalls,
"number of stalls in SCH due to resource not "
std::unordered_set< uint64_t > wavesInSch
ScoreboardCheckToSchedule & fromScoreboardCheck
void scheduleRfDestOperands()
void reinsertToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
statistics::Vector addToSchListStalls
statistics::Scalar ldsBusArbStalls
LocalMemPipeline localMemoryPipe
void updateReadyList(int func_unit_id)
Delete all wavefronts that have been marked as ready at scoreboard stage but are found to have empty ...
statistics::Scalar schStalls
bool isOldestInstBarrier()
std::vector< ScalarRegisterFile * > srf
Communication interface between Schedule and Execute stages.
std::vector< Scheduler > scheduler
std::vector< std::deque< std::pair< GPUDynInstPtr, SCH_STATUS > > > schList
statistics::Scalar schOpdNrdyStalls
bool isGMReqFIFOWrRdy(uint32_t pendReqs=0) const
statistics::Vector rfAccessStalls
Derived & subname(off_type index, const std::string &name)
Set the subfield name for the given index, and marks this stat to print at the end of simulation.
WaitClass srfToScalarMemPipeBus
DISPATCH_STATUS dispatchStatus(int func_unit_id) const
std::string csprintf(const char *format, const Args &...args)
bool dispatchReady(const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_FLAT_MEM_ISSUE_NRDY
statistics::Scalar schCycles
WaitClass vrfToGlobalMemPipeBus
int numVectorSharedMemUnits
std::vector< VectorRegisterFile * > vrf
@ SCH_LOCAL_MEM_FIFO_NRDY
statistics::Vector rdyListEmpty
ScheduleToExecute & toExecute
Cycles is a wrapper class for representing cycle counts, i.e.
void setStatus(status_e newStatus)
statistics::Scalar schRfAccessStalls
GPUDynInstPtr & readyInst(int func_unit_id)
@ SCH_RF_OPD_NRDY_CONDITIONS
statistics::Vector schListToDispList
WaitClass vectorSharedMemUnit
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void incLGKMInstsIssued()
bool addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
@ SCH_VECTOR_MEM_ISSUE_NRDY
ScheduleStage(const ComputeUnitParams &p, ComputeUnit &cu, ScoreboardCheckToSchedule &from_scoreboard_check, ScheduleToExecute &to_execute)
GlobalMemPipeline globalMemoryPipe
@ S_BARRIER
WF is stalled at a barrier.
void reset() override
Reset the pipe stage interface.
void arbitrateVrfToLdsBus()
void deleteFromSch(Wavefront *w)
void checkRfOperandReadComplete()
bool isOldestInstWaitcnt()
@ SCH_FLAT_MEM_BUS_BUSY_NRDY
ScheduleStageStats(statistics::Group *parent, int num_exec_units)
statistics::Vector schListToDispListStalls
statistics::Vector rdyListNotEmpty
statistics::Vector opdNrdyStalls
std::vector< WaitClass > scalarALUs
void acqCoalescerToken(GPUDynInstPtr mp)
const std::string & name()
@ SCH_VECTOR_MEM_REQS_NRDY
bool outstandingReqsCheck(GPUDynInstPtr mp) const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
@ SCH_LOCAL_MEM_BUS_BUSY_NRDY
@ SCH_SCALAR_MEM_ISSUE_NRDY
@ SCH_LOCAL_MEM_ISSUE_NRDY
std::vector< Wavefront * > & readyWFs(int func_unit_id)
TODO: These methods expose this class' implementation too much by returning references to its interna...
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
int numVectorGlobalMemUnits
bool coalescerReady(GPUDynInstPtr mp) const
@ SCH_SCALAR_MEM_FIFO_NRDY
std::vector< int > reserveResources()
Communication interface between ScoreboardCheck and Schedule stages.
gem5::ScheduleStage::ScheduleStageStats stats
ComputeUnit & computeUnit
void insertInPipeMap(Wavefront *w)
statistics::Vector dispNrdyStalls
@ S_WAITCNT
wavefront has unsatisfied wait counts
void dispatchTransition(const GPUDynInstPtr &gpu_dyn_inst, int func_unit_id, DISPATCH_STATUS disp_status)
Once the scheduler has chosen a winning WF for execution, and after the WF's oldest instruction's ope...
WaitClass vrfToLocalMemPipeBus
std::vector< WaitClass > vectorALUs
std::deque< GPUDynInstPtr > instructionBuffer
gem5::Wavefront::WavefrontStats stats
@ SCH_VECTOR_MEM_BUS_BUSY_NRDY
@ SCH_VECTOR_MEM_COALESCER_NRDY
@ SCH_RF_ACCESS_NRDY_CONDITIONS
void incVMemInstsIssued()
@ SCH_SCALAR_MEM_BUS_BUSY_NRDY
ScalarMemPipeline scalarMemoryPipe
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
bool schedRfWrites(int exeType, const GPUDynInstPtr &gpu_dyn_inst)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
WaitClass vectorGlobalMemUnit
Derived & init(size_type size)
Set this vector to have the given size.
void doDispatchListTransition(int unitId, DISPATCH_STATUS s, const GPUDynInstPtr &gpu_dyn_inst)
bool rdy(Cycles cycles=Cycles(0)) const
@ SCH_FLAT_MEM_COALESCER_NRDY
int numReadyLists() const
Returns the number of ready lists (i.e., the number of functional units).
#define panic(...)
This implements a cprintf based panic() function.
Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17