33#include "debug/GPUSync.hh" 
  113            DPRINTF(GPUSync, 
"CU[%d] WF[%d][%d] Wave[%d] - Exiting the " 
  114                    "program and decrementing max barrier count for " 
  115                    "barrier Id%d. New max count: %d.\n", cu->
cu_id,
 
  120        DPRINTF(GPUExec, 
"CU%d: decrease ref ctr WG[%d] to [%d]\n",
 
  130        DPRINTF(GPUExec, 
"Doing return for CU%d: WF[%d][%d][%d]\n",
 
  150                DPRINTF(GPUSync, 
"CU[%d] WF[%d][%d] Wave[%d] - All waves are " 
  151                        "now complete. Releasing barrier Id%d.\n", cu->
cu_id,
 
  174            if (!kernelEnd || !relNeeded) {
 
  194            gpuDynInst->simdId = wf->
simdId;
 
  195            gpuDynInst->wfSlotId = wf->
wfSlotId;
 
  196            gpuDynInst->wfDynId = wf->
wfDynId;
 
  198            DPRINTF(GPUExec, 
"inject global memory fence for CU%d: " 
 
  230        Addr pc = gpuDynInst->pc();
 
 
  284        Addr pc = gpuDynInst->pc();
 
 
  315        Addr pc = gpuDynInst->pc();
 
 
  348        Addr pc = gpuDynInst->pc();
 
 
  384            Addr pc = gpuDynInst->pc();
 
 
  412            Addr pc = gpuDynInst->pc();
 
 
  440            Addr pc = gpuDynInst->pc();
 
 
  475            DPRINTF(GPUSync, 
"CU[%d] WF[%d][%d] Wave[%d] - Stalling at " 
  476                    "barrier Id%d. %d waves now at barrier, %d waves " 
 
  530        gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
 
 
  575        gpuDynInst->wavefront()->setSleepTime(64 * simm16);
 
 
  600        warn_once(
"S_SETPRIO ignored -- Requested priority %d\n", userPrio);
 
 
  673        setFlag(GPUStaticInst::MemSync);
 
 
  691        if (gpuDynInst->exec_mask.none()) {
 
  697        gpuDynInst->latency.init(gpuDynInst->computeUnit());
 
  698        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
  700        gpuDynInst->resetEntireStatusVector();
 
  701        gpuDynInst->setStatusVector(0, 1);
 
  702        RequestPtr req = std::make_shared<Request>(0, 0, 0,
 
  703                                   gpuDynInst->computeUnit()->
 
  705                                   gpuDynInst->wfDynId);
 
  706        gpuDynInst->setRequestFlags(req);
 
  707        gpuDynInst->computeUnit()->scalarMemoryPipe.
 
  708            injectScalarMemFence(gpuDynInst, 
false, req);
 
 
  811        : 
Inst_SOPP(iFmt, 
"s_cbranch_cdbgsys_or_user")
 
 
  834            : 
Inst_SOPP(iFmt, 
"s_cbranch_cdbgsys_and_user")
 
 
void releaseBarrier(int bar_id)
int numYetToReachBarrier(int bar_id)
LdsState & getLds() const
int maxBarrierCnt(int bar_id)
int numAtBarrier(int bar_id)
void incNumAtBarrier(int bar_id)
RegisterManager * registerManager
void decMaxBarrierCnt(int bar_id)
gem5::ComputeUnit::ComputeUnitStats stats
FetchUnit & fetchUnit(int simdId)
void flushBuf(int wfSlotId)
bool isReachingKernelEnd(Wavefront *wf)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
void freeRegisters(Wavefront *w)
GPUDispatcher & dispatcher()
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Base class for branch operations.
void execute(GPUDynInstPtr) override
Inst_SOPP__S_BARRIER(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_BRANCH(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
~Inst_SOPP__S_CBRANCH_CDBGSYS()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGSYS(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_CDBGUSER(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_CDBGUSER()
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_EXECNZ()
Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *)
Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_EXECZ()
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_CBRANCH_SCC0()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_SCC1()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_VCCNZ()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *)
~Inst_SOPP__S_CBRANCH_VCCZ()
Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *)
~Inst_SOPP__S_DECPERFLEVEL()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *)
~Inst_SOPP__S_ENDPGM_SAVED()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ENDPGM(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *)
~Inst_SOPP__S_ICACHE_INV()
Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *)
~Inst_SOPP__S_INCPERFLEVEL()
void execute(GPUDynInstPtr) override
Inst_SOPP__S_NOP(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_SENDMSGHALT()
Inst_SOPP__S_SENDMSG(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETHALT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETKILL(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SETPRIO(InFmt_SOPP *)
~Inst_SOPP__S_SET_GPR_IDX_MODE()
Inst_SOPP__S_SET_GPR_IDX_MODE(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_SET_GPR_IDX_OFF()
Inst_SOPP__S_SET_GPR_IDX_OFF(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_SLEEP(InFmt_SOPP *)
Inst_SOPP__S_TRAP(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_SOPP__S_TTRACEDATA()
Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *)
Inst_SOPP__S_WAITCNT(InFmt_SOPP *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_SOPP__S_WAKEUP(InFmt_SOPP *)
Inst_SOPP(InFmt_SOPP *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void panicUnimplemented() const
static const int InvalidID
void setStatus(status_e newStatus)
ComputeUnit * computeUnit
std::vector< int > vecReads
std::deque< GPUDynInstPtr > instructionBuffer
std::unordered_map< int, uint64_t > rawDist
void decLGKMInstsIssued()
void barrierId(int bar_id)
@ S_BARRIER
WF is stalled at a barrier.
@ S_WAITCNT
wavefront has unsatisfied wait counts
gem5::Wavefront::WavefrontStats stats
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
classes that represnt vector/scalar operands in VEGA ISA.
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
statistics::Scalar completedWGs
statistics::Scalar completedWfs
statistics::Distribution readsPerWrite