33#include "debug/GPUSync.hh"
113 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
114 "program and decrementing max barrier count for "
115 "barrier Id%d. New max count: %d.\n", cu->
cu_id,
120 DPRINTF(GPUExec,
"CU%d: decrease ref ctr WG[%d] to [%d]\n",
130 DPRINTF(GPUExec,
"Doing return for CU%d: WF[%d][%d][%d]\n",
150 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - All waves are "
151 "now complete. Releasing barrier Id%d.\n", cu->
cu_id,
174 if (!kernelEnd || !relNeeded) {
194 gpuDynInst->simdId = wf->
simdId;
195 gpuDynInst->wfSlotId = wf->
wfSlotId;
196 gpuDynInst->wfDynId = wf->
wfDynId;
198 DPRINTF(GPUExec,
"inject global memory fence for CU%d: "
348 Addr pc = gpuDynInst->pc();
384 Addr pc = gpuDynInst->pc();
475 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
476 "barrier Id%d. %d waves now at barrier, %d waves "
530 gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
575 gpuDynInst->wavefront()->setSleepTime(64 * simm16);
600 warn_once(
"S_SETPRIO ignored -- Requested priority %d\n", userPrio);
673 setFlag(GPUStaticInst::MemSync);
691 if (gpuDynInst->exec_mask.none()) {
697 gpuDynInst->latency.init(gpuDynInst->computeUnit());
698 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
700 gpuDynInst->resetEntireStatusVector();
701 gpuDynInst->setStatusVector(0, 1);
702 RequestPtr req = std::make_shared<Request>(0, 0, 0,
703 gpuDynInst->computeUnit()->
705 gpuDynInst->wfDynId);
706 gpuDynInst->setRequestFlags(req);
707 gpuDynInst->computeUnit()->scalarMemoryPipe.
708 injectScalarMemFence(gpuDynInst,
false, req);
811 :
Inst_SOPP(iFmt,
"s_cbranch_cdbgsys_or_user")
834 :
Inst_SOPP(iFmt,
"s_cbranch_cdbgsys_and_user")
void releaseBarrier(int bar_id)
int numYetToReachBarrier(int bar_id)
LdsState & getLds() const
int maxBarrierCnt(int bar_id)
int numAtBarrier(int bar_id)
void incNumAtBarrier(int bar_id)
RegisterManager * registerManager
void decMaxBarrierCnt(int bar_id)
gem5::ComputeUnit::ComputeUnitStats stats
FetchUnit & fetchUnit(int simdId)
void flushBuf(int wfSlotId)
bool isReachingKernelEnd(Wavefront *wf)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
void freeRegisters(Wavefront *w)
GPUDispatcher & dispatcher()
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Base class for branch operations.
void execute(GPUDynInstPtr) override
Inst_SOPP__S_BARRIER(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_BRANCH(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
~Inst_SOPP__S_CBRANCH_CDBGSYS()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGSYS(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGUSER(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_CDBGUSER()
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_EXECNZ()
Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *)
Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_EXECZ()
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_SCC0()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_SCC1()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_VCCNZ()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_VCCZ()
Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *)
~Inst_SOPP__S_DECPERFLEVEL()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *)
~Inst_SOPP__S_ENDPGM_SAVED()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ENDPGM(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *)
~Inst_SOPP__S_ICACHE_INV()
Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *)
~Inst_SOPP__S_INCPERFLEVEL()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_NOP(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_SENDMSGHALT()
Inst_SOPP__S_SENDMSG(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETHALT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETKILL(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETPRIO(InFmt_SOPP *)
~Inst_SOPP__S_SET_GPR_IDX_MODE()
Inst_SOPP__S_SET_GPR_IDX_MODE(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_SET_GPR_IDX_OFF()
Inst_SOPP__S_SET_GPR_IDX_OFF(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SLEEP(InFmt_SOPP *)
Inst_SOPP__S_TRAP(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_TTRACEDATA()
Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *)
Inst_SOPP__S_WAITCNT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_WAKEUP(InFmt_SOPP *)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void panicUnimplemented() const
static const int InvalidID
void setStatus(status_e newStatus)
ComputeUnit * computeUnit
std::vector< int > vecReads
std::deque< GPUDynInstPtr > instructionBuffer
std::unordered_map< int, uint64_t > rawDist
void decLGKMInstsIssued()
void barrierId(int bar_id)
@ S_BARRIER
WF is stalled at a barrier.
@ S_WAITCNT
wavefront has unsatisfied wait counts
gem5::Wavefront::WavefrontStats stats
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
statistics::Scalar completedWGs
statistics::Scalar completedWfs
statistics::Distribution readsPerWrite