gem5  v20.1.0.0
gpu_dyn_inst.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __GPU_DYN_INST_HH__
35 #define __GPU_DYN_INST_HH__
36 
37 #include <cstdint>
38 #include <string>
39 
40 #include "base/amo.hh"
41 #include "base/logging.hh"
42 #include "base/trace.hh"
43 #include "debug/GPUMem.hh"
44 #include "enums/StorageClassType.hh"
47 
48 class GPUStaticInst;
49 
50 template<typename T>
52 {
53  public:
54  T c;
55  T s;
56 
58 
59  AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
60  : c(_c), s(_s), computeUnit(compute_unit) { }
61 
62  void
63  execute(T *b)
64  {
66 
67  if (*b == c) {
68  *b = s;
69  } else {
71  }
72  }
74 };
75 
76 class GPUDynInst : public GPUExecContext
77 {
78  public:
79  GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
80  uint64_t instSeqNum);
81  ~GPUDynInst();
82  void execute(GPUDynInstPtr gpuDynInst);
83  int numSrcRegOperands();
84  int numDstRegOperands();
85  int numDstVecOperands();
86  int numSrcVecOperands();
87  int numSrcVecDWORDs();
88  int numDstVecDWORDs();
89  int numOpdDWORDs(int operandIdx);
90  int getNumOperands();
91  bool isVectorRegister(int operandIdx);
92  bool isScalarRegister(int operandIdx);
93  int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
94  int getOperandSize(int operandIdx);
95  bool isDstOperand(int operandIdx);
96  bool isSrcOperand(int operandIdx);
97 
98  bool hasDestinationSgpr() const;
99  bool hasSourceSgpr() const;
100  bool hasDestinationVgpr() const;
101  bool hasSourceVgpr() const;
102 
105 
106  // returns true if the string "opcodeStr" is found in the
107  // opcode of the instruction
108  bool isOpcode(const std::string& opcodeStr) const;
109  bool isOpcode(const std::string& opcodeStr,
110  const std::string& extStr) const;
111  // returns true if source operand at "index" is a vector register
112  bool srcIsVgpr(int index) const;
113 
114  const std::string &disassemble() const;
115 
116  InstSeqNum seqNum() const;
117 
118  Enums::StorageClassType executedAs();
119 
120  // virtual address for scalar memory operations
122  // virtual addressies for vector memory operations
125 
126  // vector data to get written
127  uint8_t *d_data;
128  // scalar data to be transferred
129  uint8_t *scalar_data;
130  // Additional data (for atomics)
131  uint8_t *a_data;
132  // Additional data (for atomics)
133  uint8_t *x_data;
134  // The execution mask
136 
137  // SIMD where the WF of the memory instruction has been mapped to
138  int simdId;
139  // unique id of the WF where the memory instruction belongs to
140  int wfDynId;
141  // The kernel id of the requesting wf
142  int kern_id;
143  // The CU id of the requesting wf
144  int cu_id;
145  // The workgroup id of the requesting wf
146  int wg_id;
147  // HW slot id where the WF is mapped to inside a SIMD unit
148  int wfSlotId;
149  // execution pipeline id where the memory instruction has been scheduled
151  // The execution time of this operation
153  // The latency of this operation
155 
156  // Initiate the specified memory operation, by creating a
157  // memory request and sending it off to the memory system.
158  void initiateAcc(GPUDynInstPtr gpuDynInst);
159  // Complete the specified memory operation, by writing
160  // value back to the RF in the case of a load or atomic
161  // return or, in the case of a store, we do nothing
162  void completeAcc(GPUDynInstPtr gpuDynInst);
163 
164  void updateStats();
165 
167 
169 
170  bool isALU() const;
171  bool isBranch() const;
172  bool isCondBranch() const;
173  bool isNop() const;
174  bool isReturn() const;
175  bool isEndOfKernel() const;
176  bool isKernelLaunch() const;
177  bool isSDWAInst() const;
178  bool isDPPInst() const;
179  bool isUnconditionalJump() const;
180  bool isSpecialOp() const;
181  bool isWaitcnt() const;
182 
183  bool isBarrier() const;
184  bool isMemSync() const;
185  bool isMemRef() const;
186  bool isFlat() const;
187  bool isLoad() const;
188  bool isStore() const;
189 
190  bool isAtomic() const;
191  bool isAtomicNoRet() const;
192  bool isAtomicRet() const;
193 
194  bool isScalar() const;
195  bool isVector() const;
196  bool readsSCC() const;
197  bool writesSCC() const;
198  bool readsVCC() const;
199  bool writesVCC() const;
200  bool readsEXEC() const;
201  bool writesEXEC() const;
202  bool readsMode() const;
203  bool writesMode() const;
204  bool ignoreExec() const;
205  bool readsFlatScratch() const;
206  bool writesFlatScratch() const;
207  bool readsExecMask() const;
208  bool writesExecMask() const;
209 
210  bool isAtomicAnd() const;
211  bool isAtomicOr() const;
212  bool isAtomicXor() const;
213  bool isAtomicCAS() const;
214  bool isAtomicExch() const;
215  bool isAtomicAdd() const;
216  bool isAtomicSub() const;
217  bool isAtomicInc() const;
218  bool isAtomicDec() const;
219  bool isAtomicMax() const;
220  bool isAtomicMin() const;
221 
222  bool isArgLoad() const;
223  bool isGlobalMem() const;
224  bool isLocalMem() const;
225 
226  bool isArgSeg() const;
227  bool isGlobalSeg() const;
228  bool isGroupSeg() const;
229  bool isKernArgSeg() const;
230  bool isPrivateSeg() const;
231  bool isReadOnlySeg() const;
232  bool isSpillSeg() const;
233 
234  bool isGloballyCoherent() const;
235  bool isSystemCoherent() const;
236 
237  bool isF16() const;
238  bool isF32() const;
239  bool isF64() const;
240 
241  bool isFMA() const;
242  bool isMAC() const;
243  bool isMAD() const;
244 
245  // for FLAT memory ops. check the segment address
246  // against the APE registers to see if it falls
247  // within one of the APE ranges for LDS/SCRATCH/GPUVM.
248  // if it does not fall into one of the three APEs, it
249  // will be a regular global access.
250  void doApertureCheck(const VectorMask &mask);
251  // Function to resolve a flat accesses during execution stage.
252  void resolveFlatSegment(const VectorMask &mask);
253 
254  template<typename c0> AtomicOpFunctorPtr
255  makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
256  {
257  if (isAtomicAnd()) {
258  return m5::make_unique<AtomicOpAnd<c0>>(*reg0);
259  } else if (isAtomicOr()) {
260  return m5::make_unique<AtomicOpOr<c0>>(*reg0);
261  } else if (isAtomicXor()) {
262  return m5::make_unique<AtomicOpXor<c0>>(*reg0);
263  } else if (isAtomicCAS()) {
264  return m5::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, cu);
265  } else if (isAtomicExch()) {
266  return m5::make_unique<AtomicOpExch<c0>>(*reg0);
267  } else if (isAtomicAdd()) {
268  return m5::make_unique<AtomicOpAdd<c0>>(*reg0);
269  } else if (isAtomicSub()) {
270  return m5::make_unique<AtomicOpSub<c0>>(*reg0);
271  } else if (isAtomicInc()) {
272  return m5::make_unique<AtomicOpInc<c0>>();
273  } else if (isAtomicDec()) {
274  return m5::make_unique<AtomicOpDec<c0>>();
275  } else if (isAtomicMax()) {
276  return m5::make_unique<AtomicOpMax<c0>>(*reg0);
277  } else if (isAtomicMin()) {
278  return m5::make_unique<AtomicOpMin<c0>>(*reg0);
279  } else {
280  fatal("Unrecognized atomic operation");
281  }
282  }
283 
284  void
286  {
287  if (isGloballyCoherent()) {
288  req->setCacheCoherenceFlags(Request::GLC_BIT);
289  }
290 
291  if (isSystemCoherent()) {
292  req->setCacheCoherenceFlags(Request::SLC_BIT);
293  }
294 
295  if (isAtomicRet()) {
296  req->setFlags(Request::ATOMIC_RETURN_OP);
297  } else if (isAtomicNoRet()) {
298  req->setFlags(Request::ATOMIC_NO_RETURN_OP);
299  }
300 
301  if (isMemSync()) {
302  // the path for kernel launch and kernel end is different
303  // from non-kernel mem sync.
304  assert(!isKernelLaunch());
305  assert(!isEndOfKernel());
306 
307  // must be wbinv inst if not kernel launch/end
308  req->setCacheCoherenceFlags(Request::ACQUIRE);
309  }
310  }
311 
312  // reset the number of pending memory requests for all lanes
313  void
315  {
317  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
318  resetStatusVector(lane);
319  }
320  }
321 
322  // reset the number of pending memory requests for the inputted lane
323  void
325  {
326  setStatusVector(lane, 0);
327  }
328 
329  // set the number of pending memory requests for the inputted lane
330  void
331  setStatusVector(int lane, int newVal)
332  {
333  // currently we can have up to 2 memory requests per lane (if the
334  // lane's request goes across multiple cache lines)
335  assert((newVal >= 0) && (newVal <= 2));
336  statusVector[lane] = newVal;
337  }
338 
339  // subtracts the number of pending memory requests for the inputted lane
340  // by 1
341  void
343  {
344  // this lane may have multiple requests, so only subtract one for
345  // this request
346  assert(statusVector[lane] >= 1);
347  statusVector[lane]--;
348  }
349 
350  // return the current number of pending memory requests for the inputted
351  // lane
352  int
353  getLaneStatus(int lane) const
354  {
355  return statusVector[lane];
356  }
357 
358  // returns true if all memory requests from all lanes have been received,
359  // else returns false
360  bool
361  allLanesZero() const
362  {
363  // local variables
364  bool allZero = true;
365 
366  // iterate over all lanes, checking the number of pending memory
367  // requests they have
368  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
369  // if any lane still has pending requests, return false
370  if (statusVector[lane] > 0) {
371  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: lane: %d has %d pending "
372  "request(s) for %#x\n", cu_id, simdId, wfSlotId, lane,
373  statusVector[lane], addr[lane]);
374  allZero = false;
375  }
376  }
377 
378  if (allZero) {
379  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: all lanes have no pending"
380  " requests for %#x\n", cu_id, simdId, wfSlotId, addr[0]);
381  }
382  return allZero;
383  }
384 
385  // returns a string representing the current state of the statusVector
386  std::string
388  {
389  std::string statusVec_str = "[";
390 
391  // iterate over all lanes, adding the current number of pending
392  // requests for this lane to the string
393  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
394  statusVec_str += std::to_string(statusVector[lane]);
395  }
396  statusVec_str += "]";
397 
398  return statusVec_str;
399  }
400 
401  // Map returned packets and the addresses they satisfy with which lane they
402  // were requested from
403  typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
405 
406  // Track the status of memory requests per lane, an int per lane to allow
407  // unaligned accesses
409  // for ld_v# or st_v#
411 
412  // for misaligned scalar ops we track the number
413  // of outstanding reqs here
415 
416  Tick getAccessTime() const { return accessTime; }
417 
418  void setAccessTime(Tick currentTime) { accessTime = currentTime; }
419 
420  void profileRoundTripTime(Tick currentTime, int hopId);
422 
423  void profileLineAddressTime(Addr addr, Tick currentTime, int hopId);
424  const std::map<Addr, std::vector<Tick>>& getLineAddressTime() const
425  { return lineAddressTime; }
426 
427  // inst used to save/restore a wavefront context
429  private:
432 
433  // the time the request was started
435 
436  // hold the tick when the instruction arrives at certain hop points
437  // on it's way to main memory
439 
440  // hold each cache block address for the instruction and a vector
441  // to hold the tick when the block arrives at certain hop points
442  std::map<Addr, std::vector<Tick>> lineAddressTime;
443 };
444 
445 #endif // __GPU_DYN_INST_HH__
GPUDynInst::writesEXEC
bool writesEXEC() const
Definition: gpu_dyn_inst.cc:510
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
GPUDynInst::isF32
bool isF32() const
Definition: gpu_dyn_inst.cc:707
AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:239
GPUDynInst::isBranch
bool isBranch() const
Definition: gpu_dyn_inst.cc:336
GPUDynInst::isBarrier
bool isBarrier() const
Definition: gpu_dyn_inst.cc:402
Request::ACQUIRE
@ ACQUIRE
The request should be marked with ACQUIRE.
Definition: request.hh:157
AtomicOpCAS::c
T c
Definition: gpu_dyn_inst.hh:54
GPUDynInst::readsMode
bool readsMode() const
Definition: gpu_dyn_inst.cc:492
GPUDynInst::GPUDynInst
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:42
GPUDynInst::isAtomic
bool isAtomic() const
Definition: gpu_dyn_inst.cc:438
GPUDynInst::initiateAcc
void initiateAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:307
GPUDynInst::memStatusVector
StatusVector memStatusVector
Definition: gpu_dyn_inst.hh:404
GPUDynInst::readsExecMask
bool readsExecMask() const
Definition: gpu_dyn_inst.cc:532
GPUDynInst::accessTime
Tick accessTime
Definition: gpu_dyn_inst.hh:434
AtomicOpCAS
Definition: gpu_dyn_inst.hh:51
GPUDynInst::allLanesZero
bool allLanesZero() const
Definition: gpu_dyn_inst.hh:361
GPUDynInst::d_data
uint8_t * d_data
Definition: gpu_dyn_inst.hh:127
GPUDynInst::numDstVecDWORDs
int numDstVecDWORDs()
Definition: gpu_dyn_inst.cc:131
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
GPUDynInst::hasSgprRawDependence
bool hasSgprRawDependence(GPUDynInstPtr s)
Definition: gpu_dyn_inst.cc:289
amo.hh
GPUDynInst::srcIsVgpr
bool srcIsVgpr(int index) const
Definition: gpu_dyn_inst.cc:218
GPUDynInst::isAtomicAdd
bool isAtomicAdd() const
Definition: gpu_dyn_inst.cc:593
GPUDynInst::writesMode
bool writesMode() const
Definition: gpu_dyn_inst.cc:498
GPUDynInst::isF64
bool isF64() const
Definition: gpu_dyn_inst.cc:713
GPUDynInst::isKernArgSeg
bool isKernArgSeg() const
Definition: gpu_dyn_inst.cc:665
GPUDynInst::isDPPInst
bool isDPPInst() const
Definition: gpu_dyn_inst.cc:372
GPUDynInst::isDstOperand
bool isDstOperand(int operandIdx)
Definition: gpu_dyn_inst.cc:173
GPUDynInst
Definition: gpu_dyn_inst.hh:76
GPUDynInst::isKernelLaunch
bool isKernelLaunch() const
Definition: gpu_dyn_inst.cc:360
AtomicOpCAS::execute
void execute(T *b)
Definition: gpu_dyn_inst.hh:63
GPUDynInst::pAddr
Addr pAddr
Definition: gpu_dyn_inst.hh:124
GPUDynInst::resetStatusVector
void resetStatusVector(int lane)
Definition: gpu_dyn_inst.hh:324
compute_unit.hh
GPUDynInst::isMAC
bool isMAC() const
Definition: gpu_dyn_inst.cc:725
GPUDynInst::numSrcRegOperands
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:101
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
GPUDynInst::hasSourceVgpr
bool hasSourceVgpr() const
Definition: gpu_dyn_inst.cc:196
GPUDynInst::StatusVector
std::unordered_map< Addr, std::vector< int > > StatusVector
Definition: gpu_dyn_inst.hh:403
GPUDynInst::isSpillSeg
bool isSpillSeg() const
Definition: gpu_dyn_inst.cc:683
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
GPUDynInst::isFMA
bool isFMA() const
Definition: gpu_dyn_inst.cc:719
GPUDynInst::getOperandSize
int getOperandSize(int operandIdx)
Definition: gpu_dyn_inst.cc:167
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:82
GPUDynInst::isWaitcnt
bool isWaitcnt() const
Definition: gpu_dyn_inst.cc:396
GPUDynInst::time
Tick time
Definition: gpu_dyn_inst.hh:152
WaitClass
Definition: misc.hh:64
GPUDynInst::isScalarRegister
bool isScalarRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:155
GPUDynInst::numScalarReqs
int numScalarReqs
Definition: gpu_dyn_inst.hh:414
std::vector< Addr >
AtomicOpCAS::AtomicOpCAS
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition: gpu_dyn_inst.hh:59
GPUDynInst::profileRoundTripTime
void profileRoundTripTime(Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:971
GPUDynInst::readsSCC
bool readsSCC() const
Definition: gpu_dyn_inst.cc:468
GPUDynInst::makeAtomicOpFunctor
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
Definition: gpu_dyn_inst.hh:255
GPUDynInst::completeAcc
void completeAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:316
GPUDynInst::isAtomicXor
bool isAtomicXor() const
Definition: gpu_dyn_inst.cc:576
GPUDynInst::isAtomicMax
bool isAtomicMax() const
Definition: gpu_dyn_inst.cc:617
GPUDynInst::isSpecialOp
bool isSpecialOp() const
Definition: gpu_dyn_inst.cc:390
GPUDynInst::wfSlotId
int wfSlotId
Definition: gpu_dyn_inst.hh:148
AtomicOpFunctor
Definition: amo.hh:40
GPUDynInst::numSrcVecOperands
int numSrcVecOperands()
Definition: gpu_dyn_inst.cc:113
GPUDynInst::isArgSeg
bool isArgSeg() const
Definition: gpu_dyn_inst.cc:647
GPUDynInst::lineAddressTime
std::map< Addr, std::vector< Tick > > lineAddressTime
Definition: gpu_dyn_inst.hh:442
GPUDynInst::isReturn
bool isReturn() const
Definition: gpu_dyn_inst.cc:378
GPUDynInst::writesFlatScratch
bool writesFlatScratch() const
Definition: gpu_dyn_inst.cc:542
GPUDynInst::isSystemCoherent
bool isSystemCoherent() const
Definition: gpu_dyn_inst.cc:695
GPUDynInst::doApertureCheck
void doApertureCheck(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:737
ComputeUnit
Definition: compute_unit.hh:198
AtomicOpCAS::computeUnit
ComputeUnit * computeUnit
Definition: gpu_dyn_inst.hh:57
GPUDynInst::isScalar
bool isScalar() const
Definition: gpu_dyn_inst.cc:462
GPUDynInst::isAtomicExch
bool isAtomicExch() const
Definition: gpu_dyn_inst.cc:587
GPUDynInst::addr
std::vector< Addr > addr
Definition: gpu_dyn_inst.hh:123
GPUDynInst::isMemRef
bool isMemRef() const
Definition: gpu_dyn_inst.cc:414
GPUDynInst::isUnconditionalJump
bool isUnconditionalJump() const
Definition: gpu_dyn_inst.cc:384
GPUDynInst::hasVgprRawDependence
bool hasVgprRawDependence(GPUDynInstPtr s)
Definition: gpu_dyn_inst.cc:272
GPUDynInst::scalarAddr
Addr scalarAddr
Definition: gpu_dyn_inst.hh:121
GPUDynInst::isSaveRestore
bool isSaveRestore
Definition: gpu_dyn_inst.hh:428
GPUDynInst::staticInstruction
GPUStaticInst * staticInstruction()
Definition: gpu_dyn_inst.hh:166
GPUDynInst::isLoad
bool isLoad() const
Definition: gpu_dyn_inst.cc:426
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
GPUDynInst::isGlobalSeg
bool isGlobalSeg() const
Definition: gpu_dyn_inst.cc:653
GPUDynInst::executedAs
Enums::StorageClassType executedAs()
Definition: gpu_dyn_inst.cc:266
Request::GLC_BIT
@ GLC_BIT
Definition: request.hh:289
GPUDynInst::hasDestinationVgpr
bool hasDestinationVgpr() const
Definition: gpu_dyn_inst.cc:229
GPUDynInst::printStatusVector
std::string printStatusVector() const
Definition: gpu_dyn_inst.hh:387
GPUDynInst::isAtomicMin
bool isAtomicMin() const
Definition: gpu_dyn_inst.cc:623
GPUDynInst::execUnitId
int execUnitId
Definition: gpu_dyn_inst.hh:150
Request::SLC_BIT
@ SLC_BIT
user-policy flags
Definition: request.hh:288
GPUDynInst::writesVCC
bool writesVCC() const
Definition: gpu_dyn_inst.cc:486
GPUDynInst::resolveFlatSegment
void resolveFlatSegment(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:809
Request::ATOMIC_RETURN_OP
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition: request.hh:162
ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition: registers.hh:66
GPUDynInst::srcLiteral
TheGpuISA::ScalarRegU32 srcLiteral() const
Definition: gpu_dyn_inst.cc:924
GPUDynInst::simdId
int simdId
Definition: gpu_dyn_inst.hh:138
GPUDynInst::readsFlatScratch
bool readsFlatScratch() const
Definition: gpu_dyn_inst.cc:553
GPUDynInst::execute
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:95
ComputeUnit::numFailedCASOps
Stats::Scalar numFailedCASOps
Definition: compute_unit.hh:603
GPUDynInst::isAtomicSub
bool isAtomicSub() const
Definition: gpu_dyn_inst.cc:599
GPUDynInst::a_data
uint8_t * a_data
Definition: gpu_dyn_inst.hh:131
GPUDynInst::isCondBranch
bool isCondBranch() const
Definition: gpu_dyn_inst.cc:342
GPUDynInst::isVector
bool isVector() const
Definition: gpu_dyn_inst.cc:456
GPUDynInst::decrementStatusVector
void decrementStatusVector(int lane)
Definition: gpu_dyn_inst.hh:342
GPUDynInst::isGloballyCoherent
bool isGloballyCoherent() const
Definition: gpu_dyn_inst.cc:689
GPUDynInst::seqNum
InstSeqNum seqNum() const
Definition: gpu_dyn_inst.cc:260
GPUDynInst::updateStats
void updateStats()
Definition: gpu_dyn_inst.cc:930
GPUDynInst::x_data
uint8_t * x_data
Definition: gpu_dyn_inst.hh:133
GPUDynInst::isAtomicNoRet
bool isAtomicNoRet() const
Definition: gpu_dyn_inst.cc:444
GPUDynInst::isAtomicDec
bool isAtomicDec() const
Definition: gpu_dyn_inst.cc:611
GPUDynInst::isArgLoad
bool isArgLoad() const
Definition: gpu_dyn_inst.cc:629
GPUStaticInst
Definition: gpu_static_inst.hh:58
GPUDynInst::isOpcode
bool isOpcode(const std::string &opcodeStr) const
Definition: gpu_dyn_inst.cc:248
GPUDynInst::getNumOperands
int getNumOperands()
Definition: gpu_dyn_inst.cc:143
GPUDynInst::isPrivateSeg
bool isPrivateSeg() const
Definition: gpu_dyn_inst.cc:671
InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:37
GPUDynInst::getRegisterIndex
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:161
GPUDynInst::isReadOnlySeg
bool isReadOnlySeg() const
Definition: gpu_dyn_inst.cc:677
GPUDynInst::setRequestFlags
void setRequestFlags(RequestPtr req) const
Definition: gpu_dyn_inst.hh:285
GPUDynInst::readsVCC
bool readsVCC() const
Definition: gpu_dyn_inst.cc:480
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
GPUDynInst::numDstVecOperands
int numDstVecOperands()
Definition: gpu_dyn_inst.cc:119
GPUDynInst::disassemble
const std::string & disassemble() const
Definition: gpu_dyn_inst.cc:254
GPUDynInst::isAtomicCAS
bool isAtomicCAS() const
Definition: gpu_dyn_inst.cc:582
TypedAtomicOpFunctor
Definition: amo.hh:53
GPUDynInst::profileLineAddressTime
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:981
GPUDynInst::getLineAddressTime
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
Definition: gpu_dyn_inst.hh:424
GPUDynInst::getAccessTime
Tick getAccessTime() const
Definition: gpu_dyn_inst.hh:416
GPUDynInst::isAtomicAnd
bool isAtomicAnd() const
Definition: gpu_dyn_inst.cc:564
GPUDynInst::isEndOfKernel
bool isEndOfKernel() const
Definition: gpu_dyn_inst.cc:354
GPUDynInst::isLocalMem
bool isLocalMem() const
Definition: gpu_dyn_inst.cc:641
GPUDynInst::hasDestinationSgpr
bool hasDestinationSgpr() const
Definition: gpu_dyn_inst.cc:207
AtomicOpCAS::clone
AtomicOpFunctor * clone()
Definition: gpu_dyn_inst.hh:73
GPUDynInst::exec_mask
VectorMask exec_mask
Definition: gpu_dyn_inst.hh:135
GPUDynInst::isSDWAInst
bool isSDWAInst() const
Definition: gpu_dyn_inst.cc:366
GPUDynInst::cu_id
int cu_id
Definition: gpu_dyn_inst.hh:144
GPUDynInst::isGroupSeg
bool isGroupSeg() const
Definition: gpu_dyn_inst.cc:659
GPUDynInst::_staticInst
GPUStaticInst * _staticInst
Definition: gpu_dyn_inst.hh:430
GPUDynInst::resetEntireStatusVector
void resetEntireStatusVector()
Definition: gpu_dyn_inst.hh:314
GPUDynInst::readsEXEC
bool readsEXEC() const
Definition: gpu_dyn_inst.cc:504
GPUDynInst::setAccessTime
void setAccessTime(Tick currentTime)
Definition: gpu_dyn_inst.hh:418
GPUDynInst::writesExecMask
bool writesExecMask() const
Definition: gpu_dyn_inst.cc:522
GPUDynInst::ignoreExec
bool ignoreExec() const
Definition: gpu_dyn_inst.cc:516
GPUDynInst::tlbHitLevel
std::vector< int > tlbHitLevel
Definition: gpu_dyn_inst.hh:410
ArmISA::b
Bitfield< 7 > b
Definition: miscregs_types.hh:376
GPUDynInst::isSrcOperand
bool isSrcOperand(int operandIdx)
Definition: gpu_dyn_inst.cc:179
GPUExecContext::cu
ComputeUnit * cu
Definition: gpu_exec_context.hh:61
GPUDynInst::roundTripTime
std::vector< Tick > roundTripTime
Definition: gpu_dyn_inst.hh:438
GPUDynInst::isAtomicOr
bool isAtomicOr() const
Definition: gpu_dyn_inst.cc:570
gpu_exec_context.hh
GPUDynInst::kern_id
int kern_id
Definition: gpu_dyn_inst.hh:142
Wavefront
Definition: wavefront.hh:57
GPUExecContext
Definition: gpu_exec_context.hh:44
GPUDynInst::isVectorRegister
bool isVectorRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:149
ComputeUnit::numCASOps
Stats::Scalar numCASOps
Definition: compute_unit.hh:602
GPUDynInst::isGlobalMem
bool isGlobalMem() const
Definition: gpu_dyn_inst.cc:635
GPUDynInst::_seqNum
const InstSeqNum _seqNum
Definition: gpu_dyn_inst.hh:431
GPUDynInst::setStatusVector
void setStatusVector(int lane, int newVal)
Definition: gpu_dyn_inst.hh:331
Request::ATOMIC_NO_RETURN_OP
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
Definition: request.hh:164
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
GPUDynInst::writesSCC
bool writesSCC() const
Definition: gpu_dyn_inst.cc:474
GPUDynInst::isAtomicInc
bool isAtomicInc() const
Definition: gpu_dyn_inst.cc:605
logging.hh
GPUDynInst::numDstRegOperands
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:107
Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: registers.hh:154
GPUDynInst::hasSourceSgpr
bool hasSourceSgpr() const
Definition: gpu_dyn_inst.cc:185
GPUDynInst::wg_id
int wg_id
Definition: gpu_dyn_inst.hh:146
GPUDynInst::numOpdDWORDs
int numOpdDWORDs(int operandIdx)
Definition: gpu_dyn_inst.cc:137
GPUDynInst::isNop
bool isNop() const
Definition: gpu_dyn_inst.cc:348
GPUDynInst::numSrcVecDWORDs
int numSrcVecDWORDs()
Definition: gpu_dyn_inst.cc:125
GPUDynInst::latency
WaitClass latency
Definition: gpu_dyn_inst.hh:154
trace.hh
GPUDynInst::getRoundTripTime
std::vector< Tick > getRoundTripTime() const
Definition: gpu_dyn_inst.hh:421
GPUDynInst::isStore
bool isStore() const
Definition: gpu_dyn_inst.cc:432
GPUDynInst::isALU
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
Definition: gpu_dyn_inst.cc:330
GPUDynInst::scalar_data
uint8_t * scalar_data
Definition: gpu_dyn_inst.hh:129
GPUDynInst::getLaneStatus
int getLaneStatus(int lane) const
Definition: gpu_dyn_inst.hh:353
VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:44
ArmISA::s
Bitfield< 4 > s
Definition: miscregs_types.hh:556
GPUDynInst::isMemSync
bool isMemSync() const
Definition: gpu_dyn_inst.cc:408
GPUDynInst::statusVector
std::vector< int > statusVector
Definition: gpu_dyn_inst.hh:408
GPUDynInst::isAtomicRet
bool isAtomicRet() const
Definition: gpu_dyn_inst.cc:450
GPUDynInst::isF16
bool isF16() const
Definition: gpu_dyn_inst.cc:701
GPUDynInst::~GPUDynInst
~GPUDynInst()
Definition: gpu_dyn_inst.cc:85
ArmISA::mask
Bitfield< 28, 24 > mask
Definition: miscregs_types.hh:711
GPUDynInst::isMAD
bool isMAD() const
Definition: gpu_dyn_inst.cc:731
GPUDynInst::wfDynId
int wfDynId
Definition: gpu_dyn_inst.hh:140
AtomicOpCAS::s
T s
Definition: gpu_dyn_inst.hh:55
GPUDynInst::isFlat
bool isFlat() const
Definition: gpu_dyn_inst.cc:420

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17