gem5 v24.0.0.0
Loading...
Searching...
No Matches
vector_register_file.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <string>
35
36#include "base/logging.hh"
37#include "base/trace.hh"
38#include "debug/GPUVRF.hh"
44#include "params/VectorRegisterFile.hh"
45
46namespace gem5
47{
48
49VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams &p)
51{
52 regFile.resize(numRegs());
53
54 for (auto &reg : regFile) {
55 reg.zero();
56 }
57}
58
59bool
61{
62 bool src_ready = true, dst_ready=true;
63 for (const auto& srcVecOp : ii->srcVecRegOperands()) {
64 for (const auto& physIdx : srcVecOp.physIndices()) {
65 if (regBusy(physIdx) &&
66 !computeUnit->rfc[simdId]->inRFC(physIdx)) {
67 DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
68 w->wfDynId, ii->disassemble(), physIdx);
69 w->stats.numTimesBlockedDueRAWDependencies++;
70 src_ready = false;
71 break;
72 }
73 }
74 if (!src_ready) {
75 break;
76 }
77 }
78
79 for (const auto& dstVecOp : ii->dstVecRegOperands()) {
80 for (const auto& physIdx : dstVecOp.physIndices()) {
81 if (regBusy(physIdx) &&
82 !computeUnit->rfc[simdId]->inRFC(physIdx)) {
83 DPRINTF(GPUVRF, "WAX stall: WV[%d]: %s: physReg[%d]\n",
84 w->wfDynId, ii->disassemble(), physIdx);
85 w->stats.numTimesBlockedDueWAXDependencies++;
86 dst_ready = false;
87 break;
88 }
89 }
90 if (!dst_ready) {
91 break;
92 }
93 }
94
95 return src_ready && dst_ready;
96}
97
98void
100{
101 for (const auto& dstVecOp : ii->dstVecRegOperands()) {
102 for (const auto& physIdx : dstVecOp.physIndices()) {
103 // If the instruction is atomic instruciton and the atomics do
104 // not return value, then do not mark this reg as busy.
105 if (!(ii->isAtomic() && !ii->isAtomicRet())) {
114 if (ii->exec_mask.any()) {
115 markReg(physIdx, true);
116 }
117 }
118 }
119 }
120}
121
122void
124{
125 // increment count of number of DWords read from VRF
126 int DWords = ii->numSrcVecDWords();
127 stats.registerReads += (DWords * w->execMask().count());
128
129 for (const auto& dstVecOp : ii->dstVecRegOperands()) {
130 for (const auto& physIdx : dstVecOp.physIndices()) {
131 if (computeUnit->rfc[simdId]->inRFC(physIdx)) {
132 stats.rfc_cache_write_hits += w->execMask().count();
133 }
134 }
135 }
136
137 for (const auto& srcVecOp : ii->srcVecRegOperands()) {
138 for (const auto& physIdx : srcVecOp.physIndices()) {
139 if (computeUnit->rfc[simdId]->inRFC(physIdx)) {
140 stats.rfc_cache_read_hits += w->execMask().count();
141 }
142 }
143 }
144
145 uint64_t mask = w->execMask().to_ullong();
146 int srams = w->execMask().size() / 4;
147 for (int i = 0; i < srams; i++) {
148 if (mask & 0xF) {
149 stats.sramReads += DWords;
150 }
151 mask = mask >> 4;
152 }
153
154 if (!ii->isLoad()
155 && !(ii->isAtomic() || ii->isMemSync())) {
156 // TODO: compute proper delay
157 // For now, it is based on largest operand size
158 int opSize = ii->maxOperandSize();
159 Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
161 Tick tickDelay = computeUnit->cyclesToTicks(delay);
162
163 for (const auto& dstVecOp : ii->dstVecRegOperands()) {
164 for (const auto& physIdx : dstVecOp.physIndices()) {
165 enqRegFreeEvent(physIdx, tickDelay);
166 }
167 }
168 // increment count of number of DWords written to VRF
169 DWords = ii->numDstVecDWords();
170 stats.registerWrites += (DWords * w->execMask().count());
171
172 mask = w->execMask().to_ullong();
173 srams = w->execMask().size() / 4;
174 for (int i = 0; i < srams; i++) {
175 if (mask & 0xF) {
176 stats.sramWrites += DWords;
177 }
178 mask = mask >> 4;
179 }
180 }
181}
182
183void
186{
187 assert(ii->isLoad() || ii->isAtomicRet());
188 for (const auto& dstVecOp : ii->dstVecRegOperands()) {
189 for (const auto& physIdx : dstVecOp.physIndices()) {
191 }
192 }
193 // increment count of number of DWords written to VRF
194 int DWords = ii->numDstVecDWords();
195 stats.registerWrites += (DWords * ii->exec_mask.count());
196
197 uint64_t mask = ii->exec_mask.to_ullong();
198 int srams = ii->exec_mask.size() / 4;
199 for (int i = 0; i < srams; i++) {
200 if (mask & 0xF) {
201 stats.sramWrites += DWords;
202 }
203 mask = mask >> 4;
204 }
205}
206
207} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Tick cyclesToTicks(Cycles c) const
Tick clockPeriod() const
std::vector< RegisterFileCache * > rfc
int dpBypassLength() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
gem5::RegisterFile::RegisterFileStats stats
virtual bool regBusy(int idx) const
virtual void markReg(int regIdx, bool value)
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
ComputeUnit * computeUnit
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
VectorRegisterFile(const VectorRegisterFileParams &p)
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
std::vector< VecRegContainer > regFile
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
size_type size() const
Return the number of elements, always 1 for a scalar.
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 0 > p
Bitfield< 0 > w
Bitfield< 5, 3 > reg
Definition types.hh:92
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Tick
Tick count type.
Definition types.hh:58

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0