40 #include "debug/GPUVRF.hh"
45 #include "params/VectorRegisterFile.hh"
63 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
64 for (
const auto& physIdx : srcVecOp.physIndices()) {
66 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
67 w->wfDynId, ii->disassemble(), physIdx);
68 w->stats.numTimesBlockedDueRAWDependencies++;
74 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
75 for (
const auto& physIdx : dstVecOp.physIndices()) {
77 DPRINTF(GPUVRF,
"WAX stall: WV[%d]: %s: physReg[%d]\n",
78 w->wfDynId, ii->disassemble(), physIdx);
79 w->stats.numTimesBlockedDueWAXDependencies++;
91 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
92 for (
const auto& physIdx : dstVecOp.physIndices()) {
95 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
104 if (ii->exec_mask.any()) {
116 int DWords = ii->numSrcVecDWords();
119 uint64_t
mask =
w->execMask().to_ullong();
120 int srams =
w->execMask().size() / 4;
121 for (
int i = 0;
i < srams;
i++) {
129 && !(ii->isAtomic() || ii->isMemSync())) {
132 int opSize = ii->maxOperandSize();
133 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
137 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
138 for (
const auto& physIdx : dstVecOp.physIndices()) {
143 DWords = ii->numDstVecDWords();
146 mask =
w->execMask().to_ullong();
147 srams =
w->execMask().size() / 4;
148 for (
int i = 0;
i < srams;
i++) {
161 assert(ii->isLoad() || ii->isAtomicRet());
162 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
163 for (
const auto& physIdx : dstVecOp.physIndices()) {
168 int DWords = ii->numDstVecDWords();
171 uint64_t
mask = ii->exec_mask.to_ullong();
172 int srams = ii->exec_mask.size() / 4;
173 for (
int i = 0;
i < srams;
i++) {