38 #include "debug/GPUVRF.hh"
43 #include "params/VectorRegisterFile.hh"
61 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
62 for (
const auto& physIdx : srcVecOp.physIndices()) {
64 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
65 w->wfDynId, ii->disassemble(), physIdx);
66 w->stats.numTimesBlockedDueRAWDependencies++;
72 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
73 for (
const auto& physIdx : dstVecOp.physIndices()) {
75 DPRINTF(GPUVRF,
"WAX stall: WV[%d]: %s: physReg[%d]\n",
76 w->wfDynId, ii->disassemble(), physIdx);
77 w->stats.numTimesBlockedDueWAXDependencies++;
89 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
90 for (
const auto& physIdx : dstVecOp.physIndices()) {
93 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
102 if (ii->exec_mask.any()) {
114 int DWords = ii->numSrcVecDWords();
117 uint64_t
mask =
w->execMask().to_ullong();
118 int srams =
w->execMask().size() / 4;
119 for (
int i = 0;
i < srams;
i++) {
127 && !(ii->isAtomic() || ii->isMemSync())) {
130 int opSize = ii->maxOperandSize();
131 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
135 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
136 for (
const auto& physIdx : dstVecOp.physIndices()) {
141 DWords = ii->numDstVecDWords();
144 mask =
w->execMask().to_ullong();
145 srams =
w->execMask().size() / 4;
146 for (
int i = 0;
i < srams;
i++) {
159 assert(ii->isLoad() || ii->isAtomicRet());
160 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
161 for (
const auto& physIdx : dstVecOp.physIndices()) {
166 int DWords = ii->numDstVecDWords();
169 uint64_t
mask = ii->exec_mask.to_ullong();
170 int srams = ii->exec_mask.size() / 4;
171 for (
int i = 0;
i < srams;
i++) {