62 bool src_ready =
true, dst_ready=
true;
63 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
64 for (
const auto& physIdx : srcVecOp.physIndices()) {
67 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
68 w->wfDynId, ii->disassemble(), physIdx);
69 w->stats.numTimesBlockedDueRAWDependencies++;
79 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
80 for (
const auto& physIdx : dstVecOp.physIndices()) {
83 DPRINTF(GPUVRF,
"WAX stall: WV[%d]: %s: physReg[%d]\n",
84 w->wfDynId, ii->disassemble(), physIdx);
85 w->stats.numTimesBlockedDueWAXDependencies++;
95 return src_ready && dst_ready;
126 int DWords = ii->numSrcVecDWords();
127 stats.registerReads += (DWords *
w->execMask().
count());
129 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
130 for (
const auto& physIdx : dstVecOp.physIndices()) {
132 stats.rfc_cache_write_hits +=
w->execMask().count();
137 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
138 for (
const auto& physIdx : srcVecOp.physIndices()) {
140 stats.rfc_cache_read_hits +=
w->execMask().count();
145 uint64_t
mask =
w->execMask().to_ullong();
146 int srams =
w->execMask().size() / 4;
147 for (
int i = 0;
i < srams;
i++) {
149 stats.sramReads += DWords;
155 && !(ii->isAtomic() || ii->isMemSync())) {
158 int opSize = ii->maxOperandSize();
159 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
163 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
164 for (
const auto& physIdx : dstVecOp.physIndices()) {
169 DWords = ii->numDstVecDWords();
170 stats.registerWrites += (DWords *
w->execMask().
count());
172 mask =
w->execMask().to_ullong();
173 srams =
w->execMask().size() / 4;
174 for (
int i = 0;
i < srams;
i++) {
176 stats.sramWrites += DWords;
187 assert(ii->isLoad() || ii->isAtomicRet());
188 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
189 for (
const auto& physIdx : dstVecOp.physIndices()) {
194 int DWords = ii->numDstVecDWords();
195 stats.registerWrites += (DWords * ii->exec_mask.count());
197 uint64_t
mask = ii->exec_mask.to_ullong();
198 int srams = ii->exec_mask.size() / 4;
199 for (
int i = 0;
i < srams;
i++) {
201 stats.sramWrites += DWords;