gem5  v22.1.0.0
vector_register_file.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <string>
35 
36 #include "base/logging.hh"
37 #include "base/trace.hh"
38 #include "debug/GPUVRF.hh"
42 #include "gpu-compute/wavefront.hh"
43 #include "params/VectorRegisterFile.hh"
44 
45 namespace gem5
46 {
47 
48 VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams &p)
49  : RegisterFile(p)
50 {
51  regFile.resize(numRegs());
52 
53  for (auto &reg : regFile) {
54  reg.zero();
55  }
56 }
57 
58 bool
60 {
61  for (const auto& srcVecOp : ii->srcVecRegOperands()) {
62  for (const auto& physIdx : srcVecOp.physIndices()) {
63  if (regBusy(physIdx)) {
64  DPRINTF(GPUVRF, "RAW stall: WV[%d]: %s: physReg[%d]\n",
65  w->wfDynId, ii->disassemble(), physIdx);
66  w->stats.numTimesBlockedDueRAWDependencies++;
67  return false;
68  }
69  }
70  }
71 
72  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
73  for (const auto& physIdx : dstVecOp.physIndices()) {
74  if (regBusy(physIdx)) {
75  DPRINTF(GPUVRF, "WAX stall: WV[%d]: %s: physReg[%d]\n",
76  w->wfDynId, ii->disassemble(), physIdx);
77  w->stats.numTimesBlockedDueWAXDependencies++;
78  return false;
79  }
80  }
81  }
82 
83  return true;
84 }
85 
86 void
88 {
89  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
90  for (const auto& physIdx : dstVecOp.physIndices()) {
91  // If the instruction is atomic instruciton and the atomics do
92  // not return value, then do not mark this reg as busy.
93  if (!(ii->isAtomic() && !ii->isAtomicRet())) {
102  if (ii->exec_mask.any()) {
103  markReg(physIdx, true);
104  }
105  }
106  }
107  }
108 }
109 
110 void
112 {
113  // increment count of number of DWords read from VRF
114  int DWords = ii->numSrcVecDWords();
115  stats.registerReads += (DWords * w->execMask().count());
116 
117  uint64_t mask = w->execMask().to_ullong();
118  int srams = w->execMask().size() / 4;
119  for (int i = 0; i < srams; i++) {
120  if (mask & 0xF) {
121  stats.sramReads += DWords;
122  }
123  mask = mask >> 4;
124  }
125 
126  if (!ii->isLoad()
127  && !(ii->isAtomic() || ii->isMemSync())) {
128  // TODO: compute proper delay
129  // For now, it is based on largest operand size
130  int opSize = ii->maxOperandSize();
131  Cycles delay(opSize <= 4 ? computeUnit->spBypassLength()
133  Tick tickDelay = computeUnit->cyclesToTicks(delay);
134 
135  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
136  for (const auto& physIdx : dstVecOp.physIndices()) {
137  enqRegFreeEvent(physIdx, tickDelay);
138  }
139  }
140  // increment count of number of DWords written to VRF
141  DWords = ii->numDstVecDWords();
142  stats.registerWrites += (DWords * w->execMask().count());
143 
144  mask = w->execMask().to_ullong();
145  srams = w->execMask().size() / 4;
146  for (int i = 0; i < srams; i++) {
147  if (mask & 0xF) {
148  stats.sramWrites += DWords;
149  }
150  mask = mask >> 4;
151  }
152  }
153 }
154 
155 void
158 {
159  assert(ii->isLoad() || ii->isAtomicRet());
160  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
161  for (const auto& physIdx : dstVecOp.physIndices()) {
163  }
164  }
165  // increment count of number of DWords written to VRF
166  int DWords = ii->numDstVecDWords();
167  stats.registerWrites += (DWords * ii->exec_mask.count());
168 
169  uint64_t mask = ii->exec_mask.to_ullong();
170  int srams = ii->exec_mask.size() / 4;
171  for (int i = 0; i < srams; i++) {
172  if (mask & 0xF) {
173  stats.sramWrites += DWords;
174  }
175  mask = mask >> 4;
176  }
177 }
178 
179 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
Tick cyclesToTicks(Cycles c) const
Tick clockPeriod() const
int dpBypassLength() const
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:79
gem5::RegisterFile::RegisterFileStats stats
virtual bool regBusy(int idx) const
virtual void markReg(int regIdx, bool value)
int numRegs() const
virtual void enqRegFreeEvent(uint32_t regIdx, uint64_t delay)
ComputeUnit * computeUnit
virtual void scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) override
VectorRegisterFile(const VectorRegisterFileParams &p)
virtual bool operandsReady(Wavefront *w, GPUDynInstPtr ii) const override
std::vector< VecRegContainer > regFile
virtual void waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) override
virtual void scheduleWriteOperandsFromLoad(Wavefront *w, GPUDynInstPtr ii) override
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 6 > w
Definition: pagetable.hh:59
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 5, 3 > reg
Definition: types.hh:92
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Tick
Tick count type.
Definition: types.hh:58

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1