gem5  v21.1.0.2
vector_register_file.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
35 
36 #include <string>
37 
38 #include "base/logging.hh"
39 #include "base/trace.hh"
40 #include "debug/GPUVRF.hh"
44 #include "gpu-compute/wavefront.hh"
45 #include "params/VectorRegisterFile.hh"
46 
47 namespace gem5
48 {
49 
50 VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams &p)
51  : RegisterFile(p)
52 {
53  regFile.resize(numRegs(), VecRegContainer());
54 
55  for (auto &reg : regFile) {
56  reg.zero();
57  }
58 }
59 
60 bool
62 {
63  for (const auto& srcVecOp : ii->srcVecRegOperands()) {
64  for (const auto& physIdx : srcVecOp.physIndices()) {
65  if (regBusy(physIdx)) {
66  DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
67  w->wfDynId, ii->disassemble(), physIdx);
68  w->stats.numTimesBlockedDueRAWDependencies++;
69  return false;
70  }
71  }
72  }
73 
74  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
75  for (const auto& physIdx : dstVecOp.physIndices()) {
76  if (regBusy(physIdx)) {
77  DPRINTF(GPUVRF, "WAX stall: WV[%d]: %s: physReg[%d]\n",
78  w->wfDynId, ii->disassemble(), physIdx);
79  w->stats.numTimesBlockedDueWAXDependencies++;
80  return false;
81  }
82  }
83  }
84 
85  return true;
86 }
87 
88 void
90 {
91  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
92  for (const auto& physIdx : dstVecOp.physIndices()) {
93  // If the instruction is atomic instruciton and the atomics do
94  // not return value, then do not mark this reg as busy.
95  if (!(ii->isAtomic() && !ii->isAtomicRet())) {
104  if (ii->exec_mask.any()) {
105  markReg(physIdx, true);
106  }
107  }
108  }
109  }
110 }
111 
112 void
114 {
115  // increment count of number of DWords read from VRF
116  int DWords = ii->numSrcVecDWords();
117  stats.registerReads += (DWords * w->execMask().count());
118 
119  uint64_t mask = w->execMask().to_ullong();
120  int srams = w->execMask().size() / 4;
121  for (int i = 0; i < srams; i++) {
122  if (mask & 0xF) {
123  stats.sramReads += DWords;
124  }
125  mask = mask >> 4;
126  }
127 
128  if (!ii->isLoad()
129  && !(ii->isAtomic() || ii->isMemSync())) {
130  // TODO: compute proper delay
131  // For now, it is based on largest operand size
132  int opSize = ii->maxOperandSize();
133  Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
135  Tick tickDelay = computeUnit->cyclesToTicks(delay);
136 
137  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
138  for (const auto& physIdx : dstVecOp.physIndices()) {
139  enqRegFreeEvent(physIdx, tickDelay);
140  }
141  }
142  // increment count of number of DWords written to VRF
143  DWords = ii->numDstVecDWords();
144  stats.registerWrites += (DWords * w->execMask().count());
145 
146  mask = w->execMask().to_ullong();
147  srams = w->execMask().size() / 4;
148  for (int i = 0; i < srams; i++) {
149  if (mask & 0xF) {
150  stats.sramWrites += DWords;
151  }
152  mask = mask >> 4;
153  }
154  }
155 }
156 
157 void
160 {
161  assert(ii->isLoad() || ii->isAtomicRet());
162  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
163  for (const auto& physIdx : dstVecOp.physIndices()) {
165  }
166  }
167  // increment count of number of DWords written to VRF
168  int DWords = ii->numDstVecDWords();
169  stats.registerWrites += (DWords * ii->exec_mask.count());
170 
171  uint64_t mask = ii->exec_mask.to_ullong();
172  int srams = ii->exec_mask.size() / 4;
173  for (int i = 0; i < srams; i++) {
174  if (mask & 0xF) {
175  stats.sramWrites += DWords;
176  }
177  mask = mask >> 4;
178  }
179 }
180 
181 } // namespace gem5
gem5::RegisterFile
Definition: register_file.hh:58
gem5::VectorRegisterFile::VecRegContainer
TheGpuISA::VecRegContainerU32 VecRegContainer
Definition: vector_register_file.hh:52
simple_pool_manager.hh
gem5::MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:281
gem5::RegisterFile::enqRegFreeEvent
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
Definition: register_file.cc:106
gem5::VectorRegisterFile::scheduleWriteOperands
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:89
gem5::VectorRegisterFile::regFile
std::vector< VecRegContainer > regFile
Definition: vector_register_file.hh:109
gem5::Wavefront
Definition: wavefront.hh:62
compute_unit.hh
gem5::RegisterFile::stats
gem5::RegisterFile::RegisterFileStats stats
gem5::RegisterFile::markReg
virtual void markReg(int regIdx, bool value)
Definition: register_file.cc:98
gem5::RegisterFile::RegisterFileStats::registerWrites
statistics::Scalar registerWrites
Definition: register_file.hh:164
gem5::VectorRegisterFile::scheduleWriteOperandsFromLoad
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:158
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
wavefront.hh
gem5::mask
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
vector_register_file.hh
gem5::VectorRegisterFile::waveExecuteInst
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
Definition: vector_register_file.cc:113
gem5::RegisterFile::regBusy
virtual bool regBusy(int idx) const
Definition: register_file.cc:92
gem5::RegisterFile::RegisterFileStats::registerReads
statistics::Scalar registerReads
Definition: register_file.hh:162
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gpu_dyn_inst.hh
gem5::ComputeUnit::dpBypassLength
int dpBypassLength() const
Definition: compute_unit.hh:392
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::X86ISA::reg
Bitfield< 5, 3 > reg
Definition: types.hh:92
gem5::VectorRegisterFile::VectorRegisterFile
VectorRegisterFile(const VectorRegisterFileParams &p)
Definition: vector_register_file.cc:50
gem5::RegisterFile::numRegs
int numRegs() const
Definition: register_file.hh:64
gem5::VectorRegisterFile::operandsReady
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
Definition: vector_register_file.cc:61
logging.hh
gem5::RegisterFile::computeUnit
ComputeUnit * computeUnit
Definition: register_file.hh:148
trace.hh
gem5::RegisterFile::RegisterFileStats::sramWrites
statistics::Scalar sramWrites
Definition: register_file.hh:171
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::RegisterFile::RegisterFileStats::sramReads
statistics::Scalar sramReads
Definition: register_file.hh:169
gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition: clocked_object.hh:217

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17