40 #include "debug/GPUVRF.hh"
45 #include "params/VectorRegisterFile.hh"
60 for (
int i = 0;
i < ii->getNumOperands(); ++
i) {
61 if (ii->isVectorRegister(
i) && ii->isSrcOperand(
i)) {
62 int vgprIdx = ii->getRegisterIndex(
i, ii);
66 ii->getOperandSize(
i) <= 4 ? 1 : ii->getOperandSize(
i) / 4;
67 for (
int j = 0;
j < nRegs;
j++) {
71 if (ii->isDstOperand(
i)) {
72 w->numTimesBlockedDueWAXDependencies++;
73 }
else if (ii->isSrcOperand(
i)) {
74 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
75 w->wfDynId, ii->disassemble(), pVgpr);
76 w->numTimesBlockedDueRAWDependencies++;
90 for (
int i = 0;
i < ii->getNumOperands(); ++
i) {
91 if (ii->isVectorRegister(
i) && ii->isDstOperand(
i)) {
92 int vgprIdx = ii->getRegisterIndex(
i, ii);
93 int nRegs = ii->getOperandSize(
i) <= 4 ? 1 :
94 ii->getOperandSize(
i) / 4;
96 for (
int j = 0;
j < nRegs; ++
j) {
103 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
113 if (!ii->isLoad() || (ii->isLoad()
114 && ii->exec_mask.any())) {
127 int DWORDs = ii->numSrcVecDWORDs();
130 uint64_t
mask =
w->execMask().to_ullong();
131 int srams =
w->execMask().size() / 4;
132 for (
int i = 0;
i < srams;
i++) {
140 && !(ii->isAtomic() || ii->isMemSync())) {
142 for (
int i = 0;
i < ii->getNumOperands();
i++) {
143 if (ii->getOperandSize(
i) > opSize) {
144 opSize = ii->getOperandSize(
i);
147 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
151 for (
int i = 0;
i < ii->getNumOperands();
i++) {
152 if (ii->isVectorRegister(
i) && ii->isDstOperand(
i)) {
153 int vgprIdx = ii->getRegisterIndex(
i, ii);
154 int nRegs = ii->getOperandSize(
i) <= 4 ? 1
155 : ii->getOperandSize(
i) / 4;
156 for (
int j = 0;
j < nRegs;
j++) {
165 DWORDs = ii->numDstVecDWORDs();
168 mask =
w->execMask().to_ullong();
169 srams =
w->execMask().size() / 4;
170 for (
int i = 0;
i < srams;
i++) {
183 assert(ii->isLoad() || ii->isAtomicRet());
184 for (
int i = 0;
i < ii->getNumOperands(); ++
i) {
185 if (ii->isVectorRegister(
i) && ii->isDstOperand(
i)) {
186 int vgprIdx = ii->getRegisterIndex(
i, ii);
187 int nRegs = ii->getOperandSize(
i) <= 4 ? 1 :
188 ii->getOperandSize(
i) / 4;
190 for (
int j = 0;
j < nRegs; ++
j) {
198 int DWORDs = ii->numDstVecDWORDs();
201 uint64_t
mask = ii->exec_mask.to_ullong();
202 int srams = ii->exec_mask.size() / 4;
203 for (
int i = 0;
i < srams;
i++) {
212 VectorRegisterFileParams::create()