gem5  v20.1.0.0
vector_register_file.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <string>
37 
38 #include "base/logging.hh"
39 #include "base/trace.hh"
40 #include "debug/GPUVRF.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "params/VectorRegisterFile.hh"
46 
47 VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams *p)
48  : RegisterFile(p)
49 {
50  regFile.resize(numRegs(), VecRegContainer());
51 
52  for (auto &reg : regFile) {
53  reg.zero();
54  }
55 }
56 
57 bool
59 {
60  for (int i = 0; i < ii->getNumOperands(); ++i) {
61  if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
62  int vgprIdx = ii->getRegisterIndex(i, ii);
63 
64  // determine number of registers
65  int nRegs =
66  ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4;
67  for (int j = 0; j < nRegs; j++) {
68  int pVgpr = computeUnit->registerManager
69  ->mapVgpr(w, vgprIdx + j);
70  if (regBusy(pVgpr)) {
71  if (ii->isDstOperand(i)) {
72  w->numTimesBlockedDueWAXDependencies++;
73  } else if (ii->isSrcOperand(i)) {
74  DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
75  w->wfDynId, ii->disassemble(), pVgpr);
76  w->numTimesBlockedDueRAWDependencies++;
77  }
78  return false;
79  }
80  }
81  }
82  }
83  return true;
84 }
85 
86 void
88 {
89  // iterate over all register destination operands
90  for (int i = 0; i < ii->getNumOperands(); ++i) {
91  if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
92  int vgprIdx = ii->getRegisterIndex(i, ii);
93  int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
94  ii->getOperandSize(i) / 4;
95 
96  for (int j = 0; j < nRegs; ++j) {
97  int physReg = computeUnit->registerManager
98  ->mapVgpr(w, vgprIdx + j);
99 
100  // If instruction is atomic instruction and
101  // the atomics do not return value, then
102  // do not mark this reg as busy.
103  if (!(ii->isAtomic() && !ii->isAtomicRet())) {
113  if (!ii->isLoad() || (ii->isLoad()
114  && ii->exec_mask.any())) {
115  markReg(physReg, true);
116  }
117  }
118  }
119  }
120  }
121 }
122 
123 void
125 {
126  // increment count of number of DWORDs read from VRF
127  int DWORDs = ii->numSrcVecDWORDs();
128  registerReads += (DWORDs * w->execMask().count());
129 
130  uint64_t mask = w->execMask().to_ullong();
131  int srams = w->execMask().size() / 4;
132  for (int i = 0; i < srams; i++) {
133  if (mask & 0xF) {
134  sramReads += DWORDs;
135  }
136  mask = mask >> 4;
137  }
138 
139  if (!ii->isLoad()
140  && !(ii->isAtomic() || ii->isMemSync())) {
141  int opSize = 4;
142  for (int i = 0; i < ii->getNumOperands(); i++) {
143  if (ii->getOperandSize(i) > opSize) {
144  opSize = ii->getOperandSize(i);
145  }
146  }
147  Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
149  Tick tickDelay = computeUnit->cyclesToTicks(delay);
150 
151  for (int i = 0; i < ii->getNumOperands(); i++) {
152  if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
153  int vgprIdx = ii->getRegisterIndex(i, ii);
154  int nRegs = ii->getOperandSize(i) <= 4 ? 1
155  : ii->getOperandSize(i) / 4;
156  for (int j = 0; j < nRegs; j++) {
157  int physReg = computeUnit->registerManager
158  ->mapVgpr(w, vgprIdx + j);
159  enqRegFreeEvent(physReg, tickDelay);
160  }
161  }
162  }
163 
164  // increment count of number of DWORDs written to VRF
165  DWORDs = ii->numDstVecDWORDs();
166  registerWrites += (DWORDs * w->execMask().count());
167 
168  mask = w->execMask().to_ullong();
169  srams = w->execMask().size() / 4;
170  for (int i = 0; i < srams; i++) {
171  if (mask & 0xF) {
172  sramWrites += DWORDs;
173  }
174  mask = mask >> 4;
175  }
176  }
177 }
178 
179 void
182 {
183  assert(ii->isLoad() || ii->isAtomicRet());
184  for (int i = 0; i < ii->getNumOperands(); ++i) {
185  if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
186  int vgprIdx = ii->getRegisterIndex(i, ii);
187  int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
188  ii->getOperandSize(i) / 4;
189 
190  for (int j = 0; j < nRegs; ++j) {
191  int physReg = computeUnit->registerManager
192  ->mapVgpr(w, vgprIdx + j);
194  }
195  }
196  }
197  // increment count of number of DWORDs written to VRF
198  int DWORDs = ii->numDstVecDWORDs();
199  registerWrites += (DWORDs * ii->exec_mask.count());
200 
201  uint64_t mask = ii->exec_mask.to_ullong();
202  int srams = ii->exec_mask.size() / 4;
203  for (int i = 0; i < srams; i++) {
204  if (mask & 0xF) {
205  sramWrites += DWORDs;
206  }
207  mask = mask >> 4;
208  }
209 }
210 
212 VectorRegisterFileParams::create()
213 {
214  return new VectorRegisterFile(this);
215 }
VectorRegisterFile::operandsReady
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
Definition: vector_register_file.cc:58
RegisterFile::registerReads
Stats::Scalar registerReads
Definition: register_file.hh:159
simple_pool_manager.hh
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
VectorRegisterFile::VecRegContainer
TheGpuISA::VecRegContainerU32 VecRegContainer
Definition: vector_register_file.hh:49
compute_unit.hh
RegisterFile::sramWrites
Stats::Scalar sramWrites
Definition: register_file.hh:168
VectorRegisterFile::scheduleWriteOperands
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:87
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
VectorRegisterFile::waveExecuteInst
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:124
ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:275
RegisterFile::numRegs
int numRegs() const
Definition: register_file.hh:64
X86ISA::reg
Bitfield< 5, 3 > reg
Definition: types.hh:87
wavefront.hh
VectorRegisterFile::VectorRegisterFile
VectorRegisterFile(const VectorRegisterFileParams *p)
Definition: vector_register_file.cc:47
ArmISA::j
Bitfield< 24 > j
Definition: miscregs_types.hh:54
vector_register_file.hh
Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224
MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:278
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
RegisterFile::regBusy
virtual bool regBusy(int idx) const
Definition: register_file.cc:92
gpu_dyn_inst.hh
Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:214
RegisterFile::markReg
virtual void markReg(int regIdx, bool value)
Definition: register_file.cc:98
Wavefront
Definition: wavefront.hh:57
RegisterFile::enqRegFreeEvent
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
Definition: register_file.cc:106
VectorRegisterFile
Definition: vector_register_file.hh:46
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
RegisterFile::registerWrites
Stats::Scalar registerWrites
Definition: register_file.hh:161
logging.hh
Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition: register_manager.cc:94
ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:393
RegisterFile::sramReads
Stats::Scalar sramReads
Definition: register_file.hh:166
RegisterFile
Definition: register_file.hh:58
trace.hh
RegisterFile::computeUnit
ComputeUnit * computeUnit
Definition: register_file.hh:149
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
VectorRegisterFile::scheduleWriteOperandsFromLoad
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:180
VectorRegisterFile::regFile
std::vector< VecRegContainer > regFile
Definition: vector_register_file.hh:106
ArmISA::mask
Bitfield< 28, 24 > mask
Definition: miscregs_types.hh:711

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17