gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Anthony Gutierrez
34  */
35 
36 #ifndef __GPU_DYN_INST_HH__
37 #define __GPU_DYN_INST_HH__
38 
39 #include <cstdint>
40 #include <string>
41 
42 #include "base/amo.hh"
43 #include "base/logging.hh"
44 #include "enums/MemType.hh"
45 #include "enums/StorageClassType.hh"
48 
49 class GPUStaticInst;
50 
51 template<typename T>
53 {
54  public:
55  T c;
56  T s;
57 
59 
60  AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
61  : c(_c), s(_s), computeUnit(compute_unit) { }
62 
63  void
64  execute(T *b)
65  {
66  computeUnit->numCASOps++;
67 
68  if (*b == c) {
69  *b = s;
70  } else {
71  computeUnit->numFailedCASOps++;
72  }
73 
74  if (computeUnit->xact_cas_mode) {
75  computeUnit->xactCasLoadMap.clear();
76  }
77  }
78  AtomicOpFunctor* clone () { return new AtomicOpCAS(c, s, computeUnit); }
79 };
80 
81 typedef enum
82 {
85 } vgpr_type;
86 
87 class GPUDynInst : public GPUExecContext
88 {
89  public:
90  GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
91  uint64_t instSeqNum);
92  ~GPUDynInst();
93  void execute(GPUDynInstPtr gpuDynInst);
94  int numSrcRegOperands();
95  int numDstRegOperands();
96  int getNumOperands();
97  bool isVectorRegister(int operandIdx);
98  bool isScalarRegister(int operandIdx);
99  bool isCondRegister(int operandIdx);
100  int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
101  int getOperandSize(int operandIdx);
102  bool isDstOperand(int operandIdx);
103  bool isSrcOperand(int operandIdx);
104 
105  const std::string &disassemble() const;
106 
107  uint64_t seqNum() const;
108 
109  Enums::StorageClassType executedAs();
110 
111  // The address of the memory operation
114 
115  // The data to get written
116  uint8_t *d_data;
117  // Additional data (for atomics)
118  uint8_t *a_data;
119  // Additional data (for atomics)
120  uint8_t *x_data;
121  // The execution mask
123 
124  // The memory type (M_U32, M_S32, ...)
125  Enums::MemType m_type;
126 
127  // The equivalency class
128  int equiv;
129  // The return VGPR type (VT_32 or VT_64)
131  // Number of VGPR's accessed (1, 2, or 4)
132  int n_reg;
133  // The return VGPR index
134  int dst_reg;
135  // There can be max 4 dest regs>
136  int dst_reg_vec[4];
137  // SIMD where the WF of the memory instruction has been mapped to
138  int simdId;
139  // unique id of the WF where the memory instruction belongs to
140  int wfDynId;
141  // The kernel id of the requesting wf
142  int kern_id;
143  // The CU id of the requesting wf
144  int cu_id;
145  // HW slot id where the WF is mapped to inside a SIMD unit
146  int wfSlotId;
147  // execution pipeline id where the memory instruction has been scheduled
148  int pipeId;
149  // The execution time of this operation
151  // The latency of this operation
153  // A list of bank conflicts for the 4 cycles.
154  uint32_t bc[4];
155 
156  // A pointer to ROM
157  uint8_t *rom;
158  // The size of the READONLY segment
159  int sz_rom;
160 
161  // Initiate the specified memory operation, by creating a
162  // memory request and sending it off to the memory system.
163  void initiateAcc(GPUDynInstPtr gpuDynInst);
164  // Complete the specified memory operation, by writing
165  // value back to the RF in the case of a load or atomic
166  // return or, in the case of a store, we do nothing
167  void completeAcc(GPUDynInstPtr gpuDynInst);
168 
169  void updateStats();
170 
171  GPUStaticInst* staticInstruction() { return _staticInst; }
172 
173  bool isALU() const;
174  bool isBranch() const;
175  bool isNop() const;
176  bool isReturn() const;
177  bool isUnconditionalJump() const;
178  bool isSpecialOp() const;
179  bool isWaitcnt() const;
180 
181  bool isBarrier() const;
182  bool isMemFence() const;
183  bool isMemRef() const;
184  bool isFlat() const;
185  bool isLoad() const;
186  bool isStore() const;
187 
188  bool isAtomic() const;
189  bool isAtomicNoRet() const;
190  bool isAtomicRet() const;
191 
192  bool isScalar() const;
193  bool readsSCC() const;
194  bool writesSCC() const;
195  bool readsVCC() const;
196  bool writesVCC() const;
197 
198  bool isAtomicAnd() const;
199  bool isAtomicOr() const;
200  bool isAtomicXor() const;
201  bool isAtomicCAS() const;
202  bool isAtomicExch() const;
203  bool isAtomicAdd() const;
204  bool isAtomicSub() const;
205  bool isAtomicInc() const;
206  bool isAtomicDec() const;
207  bool isAtomicMax() const;
208  bool isAtomicMin() const;
209 
210  bool isArgLoad() const;
211  bool isGlobalMem() const;
212  bool isLocalMem() const;
213 
214  bool isArgSeg() const;
215  bool isGlobalSeg() const;
216  bool isGroupSeg() const;
217  bool isKernArgSeg() const;
218  bool isPrivateSeg() const;
219  bool isReadOnlySeg() const;
220  bool isSpillSeg() const;
221 
222  bool isWorkitemScope() const;
223  bool isWavefrontScope() const;
224  bool isWorkgroupScope() const;
225  bool isDeviceScope() const;
226  bool isSystemScope() const;
227  bool isNoScope() const;
228 
229  bool isRelaxedOrder() const;
230  bool isAcquire() const;
231  bool isRelease() const;
232  bool isAcquireRelease() const;
233  bool isNoOrder() const;
234 
235  bool isGloballyCoherent() const;
236  bool isSystemCoherent() const;
237 
238  /*
239  * Loads/stores/atomics may have acquire/release semantics associated
240  * withthem. Some protocols want to see the acquire/release as separate
241  * requests from the load/store/atomic. We implement that separation
242  * using continuations (i.e., a function pointer with an object associated
243  * with it). When, for example, the front-end generates a store with
244  * release semantics, we will first issue a normal store and set the
245  * continuation in the GPUDynInst to a function that generate a
246  * release request. That continuation will be called when the normal
247  * store completes (in ComputeUnit::DataPort::recvTimingResponse). The
248  * continuation will be called in the context of the same GPUDynInst
249  * that generated the initial store.
250  */
251  std::function<void(GPUStaticInst*, GPUDynInstPtr)> execContinuation;
252 
253  // when true, call execContinuation when response arrives
255 
256  template<typename c0> AtomicOpFunctor*
257  makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
258  {
259  if (isAtomicAnd()) {
260  return new AtomicOpAnd<c0>(*reg0);
261  } else if (isAtomicOr()) {
262  return new AtomicOpOr<c0>(*reg0);
263  } else if (isAtomicXor()) {
264  return new AtomicOpXor<c0>(*reg0);
265  } else if (isAtomicCAS()) {
266  return new AtomicOpCAS<c0>(*reg0, *reg1, cu);
267  } else if (isAtomicExch()) {
268  return new AtomicOpExch<c0>(*reg0);
269  } else if (isAtomicAdd()) {
270  return new AtomicOpAdd<c0>(*reg0);
271  } else if (isAtomicSub()) {
272  return new AtomicOpSub<c0>(*reg0);
273  } else if (isAtomicInc()) {
274  return new AtomicOpInc<c0>();
275  } else if (isAtomicDec()) {
276  return new AtomicOpDec<c0>();
277  } else if (isAtomicMax()) {
278  return new AtomicOpMax<c0>(*reg0);
279  } else if (isAtomicMin()) {
280  return new AtomicOpMin<c0>(*reg0);
281  } else {
282  fatal("Unrecognized atomic operation");
283  }
284  }
285 
286  void
287  setRequestFlags(RequestPtr req, bool setMemOrder=true)
288  {
289  // currently these are the easy scopes to deduce
290  if (isPrivateSeg()) {
291  req->setMemSpaceConfigFlags(Request::PRIVATE_SEGMENT);
292  } else if (isSpillSeg()) {
293  req->setMemSpaceConfigFlags(Request::SPILL_SEGMENT);
294  } else if (isGlobalSeg()) {
295  req->setMemSpaceConfigFlags(Request::GLOBAL_SEGMENT);
296  } else if (isReadOnlySeg()) {
297  req->setMemSpaceConfigFlags(Request::READONLY_SEGMENT);
298  } else if (isGroupSeg()) {
299  req->setMemSpaceConfigFlags(Request::GROUP_SEGMENT);
300  } else if (isFlat()) {
301  panic("TODO: translate to correct scope");
302  } else {
303  fatal("%s has bad segment type\n", disassemble());
304  }
305 
306  if (isWavefrontScope()) {
307  req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
309  } else if (isWorkgroupScope()) {
310  req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
312  } else if (isDeviceScope()) {
313  req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
315  } else if (isSystemScope()) {
316  req->setMemSpaceConfigFlags(Request::SCOPE_VALID |
318  } else if (!isNoScope() && !isWorkitemScope()) {
319  fatal("%s has bad scope type\n", disassemble());
320  }
321 
322  if (setMemOrder) {
323  // set acquire and release flags
324  if (isAcquire()) {
325  req->setFlags(Request::ACQUIRE);
326  } else if (isRelease()) {
327  req->setFlags(Request::RELEASE);
328  } else if (isAcquireRelease()) {
329  req->setFlags(Request::ACQUIRE | Request::RELEASE);
330  } else if (!isNoOrder()) {
331  fatal("%s has bad memory order\n", disassemble());
332  }
333  }
334 
335  // set atomic type
336  // currently, the instruction genenerator only produces atomic return
337  // but a magic instruction can produce atomic no return
338  if (isAtomicRet()) {
339  req->setFlags(Request::ATOMIC_RETURN_OP);
340  } else if (isAtomicNoRet()) {
341  req->setFlags(Request::ATOMIC_NO_RETURN_OP);
342  }
343  }
344 
345  // Map returned packets and the addresses they satisfy with which lane they
346  // were requested from
347  typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
348  StatusVector memStatusVector;
349 
350  // Track the status of memory requests per lane, a bit per lane
352  // for ld_v# or st_v#
355 
356  private:
358  uint64_t _seqNum;
359 };
360 
361 #endif // __GPU_DYN_INST_HH__
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:167
StatusVector memStatusVector
std::vector< Addr > addr
Private Segment.
Definition: request.hh:243
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:175
std::vector< int > tlbHitLevel
std::map< unsigned, waveQueue > xactCasLoadMap
WaitClass latency
AtomicOpFunctor * makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
std::function< void(GPUStaticInst *, GPUDynInstPtr)> execContinuation
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
uint8_t * rom
void execute(T *b)
Definition: gpu_dyn_inst.hh:64
Spill Segment.
Definition: request.hh:249
ComputeUnit * computeUnit
Definition: gpu_dyn_inst.hh:58
Access has Wavefront scope visibility.
Definition: request.hh:230
uint8_t * a_data
The request is an atomic that does not return data.
Definition: request.hh:167
std::vector< int > statusVector
The request is an atomic that returns data.
Definition: request.hh:165
Bitfield< 7 > b
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition: gpu_dyn_inst.hh:60
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
void setRequestFlags(RequestPtr req, bool setMemOrder=true)
vgpr_type v_type
Access has Workgroup scope visibility.
Definition: request.hh:232
uint64_t _seqNum
Has a synchronization scope been set?
Definition: request.hh:228
uint8_t * d_data
Access has System (e.g., CPU + GPU) scope visibility.
Definition: request.hh:236
uint64_t Tick
Tick count type.
Definition: types.hh:63
Stats::Scalar numCASOps
uint8_t * x_data
Global Segment.
Definition: request.hh:239
VectorMask exec_mask
GPUStaticInst * staticInstruction()
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
bool useContinuation
bool xact_cas_mode
Readonly Segment.
Definition: request.hh:247
The request should be marked with RELEASE.
Definition: request.hh:162
AtomicOpFunctor * clone()
Definition: gpu_dyn_inst.hh:78
Group Segment.
Definition: request.hh:241
GPUStaticInst * _staticInst
Stats::Scalar numFailedCASOps
VectorMask statusBitVector
std::unordered_map< Addr, std::vector< int > > StatusVector
vgpr_type
Definition: gpu_dyn_inst.hh:81
Enums::MemType m_type
Access has Device (e.g., GPU) scope visibility.
Definition: request.hh:234
The request should be marked with ACQUIRE.
Definition: request.hh:160

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13