38#include "debug/GPUVRF.hh"
44#include "params/VectorRegisterFile.hh"
62 bool src_ready =
true, dst_ready=
true;
63 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
64 for (
const auto& physIdx : srcVecOp.physIndices()) {
67 DPRINTF(GPUVRF,
"RAW stall: WV[%d]: %s: physReg[%d]\n",
68 w->wfDynId, ii->disassemble(), physIdx);
69 w->stats.numTimesBlockedDueRAWDependencies++;
79 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
80 for (
const auto& physIdx : dstVecOp.physIndices()) {
83 DPRINTF(GPUVRF,
"WAX stall: WV[%d]: %s: physReg[%d]\n",
84 w->wfDynId, ii->disassemble(), physIdx);
85 w->stats.numTimesBlockedDueWAXDependencies++;
95 return src_ready && dst_ready;
101 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
102 for (
const auto& physIdx : dstVecOp.physIndices()) {
105 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
114 if (ii->exec_mask.any()) {
126 int DWords = ii->numSrcVecDWords();
129 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
130 for (
const auto& physIdx : dstVecOp.physIndices()) {
137 for (
const auto& srcVecOp : ii->srcVecRegOperands()) {
138 for (
const auto& physIdx : srcVecOp.physIndices()) {
145 uint64_t
mask =
w->execMask().to_ullong();
146 int srams =
w->execMask().
size() / 4;
147 for (
int i = 0;
i < srams;
i++) {
155 && !(ii->isAtomic() || ii->isMemSync())) {
158 int opSize = ii->maxOperandSize();
159 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
163 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
164 for (
const auto& physIdx : dstVecOp.physIndices()) {
169 DWords = ii->numDstVecDWords();
172 mask =
w->execMask().to_ullong();
173 srams =
w->execMask().
size() / 4;
174 for (
int i = 0;
i < srams;
i++) {
187 assert(ii->isLoad() || ii->isAtomicRet());
188 for (
const auto& dstVecOp : ii->dstVecRegOperands()) {
189 for (
const auto& physIdx : dstVecOp.physIndices()) {
194 int DWords = ii->numDstVecDWords();
197 uint64_t
mask = ii->exec_mask.to_ullong();
198 int srams = ii->exec_mask.
size() / 4;
199 for (
int i = 0;
i < srams;
i++) {
Tick cyclesToTicks(Cycles c) const
std::vector< RegisterFileCache * > rfc
int dpBypassLength() const
Cycles is a wrapper class for representing cycle counts, i.e.
gem5::RegisterFile::RegisterFileStats stats
virtual bool regBusy(int idx) const
virtual void markReg(int regIdx, bool value)
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
ComputeUnit * computeUnit
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
VectorRegisterFile(const VectorRegisterFileParams &p)
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
std::vector< VecRegContainer > regFile
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
size_type size() const
Return the number of elements, always 1 for a scalar.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Tick
Tick count type.
statistics::Scalar sramReads
statistics::Scalar rfc_cache_write_hits
statistics::Scalar sramWrites
statistics::Scalar rfc_cache_read_hits
statistics::Scalar registerReads
statistics::Scalar registerWrites