38 #include "debug/GPUVRF.hh"
43 #include "params/VectorRegisterFile.hh"
61 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
62 for (
const auto& physIdx : srcVecOp.physIndices()) {
64 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
65 w->wfDynId, ii->disassemble(), physIdx);
66 w->stats.numTimesBlockedDueRAWDependencies++;
72 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
73 for (
const auto& physIdx : dstVecOp.physIndices()) {
75 DPRINTF(GPUVRF,
"WAX stall: WV[%d]: %s: physReg[%d]\n",
76 w->wfDynId, ii->disassemble(), physIdx);
77 w->stats.numTimesBlockedDueWAXDependencies++;
89 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
90 for (
const auto& physIdx : dstVecOp.physIndices()) {
93 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
102 if (ii->exec_mask.any()) {
114 int DWords = ii->numSrcVecDWords();
117 uint64_t
mask =
w->execMask().to_ullong();
118 int srams =
w->execMask().size() / 4;
119 for (
int i = 0;
i < srams;
i++) {
127 && !(ii->isAtomic() || ii->isMemSync())) {
130 int opSize = ii->maxOperandSize();
131 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
135 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
136 for (
const auto& physIdx : dstVecOp.physIndices()) {
141 DWords = ii->numDstVecDWords();
144 mask =
w->execMask().to_ullong();
145 srams =
w->execMask().size() / 4;
146 for (
int i = 0;
i < srams;
i++) {
159 assert(ii->isLoad() || ii->isAtomicRet());
160 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
161 for (
const auto& physIdx : dstVecOp.physIndices()) {
166 int DWords = ii->numDstVecDWords();
169 uint64_t
mask = ii->exec_mask.to_ullong();
170 int srams = ii->exec_mask.size() / 4;
171 for (
int i = 0;
i < srams;
i++) {
Tick cyclesToTicks(Cycles c) const
int dpBypassLength() const
Cycles is a wrapper class for representing cycle counts, i.e.
gem5::RegisterFile::RegisterFileStats stats
virtual bool regBusy(int idx) const
virtual void markReg(int regIdx, bool value)
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
ComputeUnit * computeUnit
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
VectorRegisterFile(const VectorRegisterFileParams &p)
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
std::vector< VecRegContainer > regFile
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Tick
Tick count type.
statistics::Scalar sramReads
statistics::Scalar sramWrites
statistics::Scalar registerReads
statistics::Scalar registerWrites