gem5  v21.0.1.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __GPU_DYN_INST_HH__
35 #define __GPU_DYN_INST_HH__
36 
37 #include <cstdint>
38 #include <memory>
39 #include <string>
40 
41 #include "base/amo.hh"
42 #include "base/logging.hh"
43 #include "base/trace.hh"
44 #include "debug/GPUMem.hh"
45 #include "enums/StorageClassType.hh"
48 
49 class GPUStaticInst;
50 
51 template<typename T>
53 {
54  public:
55  T c;
56  T s;
57 
59 
60  AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
61  : c(_c), s(_s), computeUnit(compute_unit) { }
62 
63  void
64  execute(T *b)
65  {
67 
68  if (*b == c) {
69  *b = s;
70  } else {
72  }
73  }
75 };
76 
77 class GPUDynInst : public GPUExecContext
78 {
79  public:
80  GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
81  uint64_t instSeqNum);
82  ~GPUDynInst();
83  void execute(GPUDynInstPtr gpuDynInst);
84  int numSrcRegOperands();
85  int numDstRegOperands();
86  int numDstVecOperands();
87  int numSrcVecOperands();
88  int numSrcVecDWORDs();
89  int numDstVecDWORDs();
90  int numOpdDWORDs(int operandIdx);
91  int getNumOperands();
92  bool isVectorRegister(int operandIdx);
93  bool isScalarRegister(int operandIdx);
94  int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
95  int getOperandSize(int operandIdx);
96  bool isDstOperand(int operandIdx);
97  bool isSrcOperand(int operandIdx);
98 
99  bool hasDestinationSgpr() const;
100  bool hasSourceSgpr() const;
101  bool hasDestinationVgpr() const;
102  bool hasSourceVgpr() const;
103 
106 
107  // returns true if the string "opcodeStr" is found in the
108  // opcode of the instruction
109  bool isOpcode(const std::string& opcodeStr) const;
110  bool isOpcode(const std::string& opcodeStr,
111  const std::string& extStr) const;
112  // returns true if source operand at "index" is a vector register
113  bool srcIsVgpr(int index) const;
114 
115  const std::string &disassemble() const;
116 
117  InstSeqNum seqNum() const;
118 
119  Enums::StorageClassType executedAs();
120 
121  // virtual address for scalar memory operations
123  // virtual addressies for vector memory operations
126 
127  // vector data to get written
128  uint8_t *d_data;
129  // scalar data to be transferred
130  uint8_t *scalar_data;
131  // Additional data (for atomics)
132  uint8_t *a_data;
133  // Additional data (for atomics)
134  uint8_t *x_data;
135  // The execution mask
137 
138  // SIMD where the WF of the memory instruction has been mapped to
139  int simdId;
140  // unique id of the WF where the memory instruction belongs to
141  int wfDynId;
142  // The kernel id of the requesting wf
143  int kern_id;
144  // The CU id of the requesting wf
145  int cu_id;
146  // The workgroup id of the requesting wf
147  int wg_id;
148  // HW slot id where the WF is mapped to inside a SIMD unit
149  int wfSlotId;
150  // execution pipeline id where the memory instruction has been scheduled
152  // The execution time of this operation
154  // The latency of this operation
156 
157  // Initiate the specified memory operation, by creating a
158  // memory request and sending it off to the memory system.
159  void initiateAcc(GPUDynInstPtr gpuDynInst);
160  // Complete the specified memory operation, by writing
161  // value back to the RF in the case of a load or atomic
162  // return or, in the case of a store, we do nothing
163  void completeAcc(GPUDynInstPtr gpuDynInst);
164 
165  void updateStats();
166 
168 
170 
171  bool isALU() const;
172  bool isBranch() const;
173  bool isCondBranch() const;
174  bool isNop() const;
175  bool isReturn() const;
176  bool isEndOfKernel() const;
177  bool isKernelLaunch() const;
178  bool isSDWAInst() const;
179  bool isDPPInst() const;
180  bool isUnconditionalJump() const;
181  bool isSpecialOp() const;
182  bool isWaitcnt() const;
183  bool isSleep() const;
184 
185  bool isBarrier() const;
186  bool isMemSync() const;
187  bool isMemRef() const;
188  bool isFlat() const;
189  bool isLoad() const;
190  bool isStore() const;
191 
192  bool isAtomic() const;
193  bool isAtomicNoRet() const;
194  bool isAtomicRet() const;
195 
196  bool isScalar() const;
197  bool isVector() const;
198  bool readsSCC() const;
199  bool writesSCC() const;
200  bool readsVCC() const;
201  bool writesVCC() const;
202  bool readsEXEC() const;
203  bool writesEXEC() const;
204  bool readsMode() const;
205  bool writesMode() const;
206  bool ignoreExec() const;
207  bool readsFlatScratch() const;
208  bool writesFlatScratch() const;
209  bool readsExecMask() const;
210  bool writesExecMask() const;
211 
212  bool isAtomicAnd() const;
213  bool isAtomicOr() const;
214  bool isAtomicXor() const;
215  bool isAtomicCAS() const;
216  bool isAtomicExch() const;
217  bool isAtomicAdd() const;
218  bool isAtomicSub() const;
219  bool isAtomicInc() const;
220  bool isAtomicDec() const;
221  bool isAtomicMax() const;
222  bool isAtomicMin() const;
223 
224  bool isArgLoad() const;
225  bool isGlobalMem() const;
226  bool isLocalMem() const;
227 
228  bool isArgSeg() const;
229  bool isGlobalSeg() const;
230  bool isGroupSeg() const;
231  bool isKernArgSeg() const;
232  bool isPrivateSeg() const;
233  bool isReadOnlySeg() const;
234  bool isSpillSeg() const;
235 
236  bool isGloballyCoherent() const;
237  bool isSystemCoherent() const;
238 
239  bool isF16() const;
240  bool isF32() const;
241  bool isF64() const;
242 
243  bool isFMA() const;
244  bool isMAC() const;
245  bool isMAD() const;
246 
247  // for FLAT memory ops. check the segment address
248  // against the APE registers to see if it falls
249  // within one of the APE ranges for LDS/SCRATCH/GPUVM.
250  // if it does not fall into one of the three APEs, it
251  // will be a regular global access.
252  void doApertureCheck(const VectorMask &mask);
253  // Function to resolve a flat accesses during execution stage.
254  void resolveFlatSegment(const VectorMask &mask);
255 
256  template<typename c0> AtomicOpFunctorPtr
257  makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
258  {
259  if (isAtomicAnd()) {
260  return std::make_unique<AtomicOpAnd<c0>>(*reg0);
261  } else if (isAtomicOr()) {
262  return std::make_unique<AtomicOpOr<c0>>(*reg0);
263  } else if (isAtomicXor()) {
264  return std::make_unique<AtomicOpXor<c0>>(*reg0);
265  } else if (isAtomicCAS()) {
266  return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, cu);
267  } else if (isAtomicExch()) {
268  return std::make_unique<AtomicOpExch<c0>>(*reg0);
269  } else if (isAtomicAdd()) {
270  return std::make_unique<AtomicOpAdd<c0>>(*reg0);
271  } else if (isAtomicSub()) {
272  return std::make_unique<AtomicOpSub<c0>>(*reg0);
273  } else if (isAtomicInc()) {
274  return std::make_unique<AtomicOpInc<c0>>();
275  } else if (isAtomicDec()) {
276  return std::make_unique<AtomicOpDec<c0>>();
277  } else if (isAtomicMax()) {
278  return std::make_unique<AtomicOpMax<c0>>(*reg0);
279  } else if (isAtomicMin()) {
280  return std::make_unique<AtomicOpMin<c0>>(*reg0);
281  } else {
282  fatal("Unrecognized atomic operation");
283  }
284  }
285 
286  void
288  {
289  if (isGloballyCoherent()) {
290  req->setCacheCoherenceFlags(Request::GLC_BIT);
291  }
292 
293  if (isSystemCoherent()) {
294  req->setCacheCoherenceFlags(Request::SLC_BIT);
295  }
296 
297  if (isAtomicRet()) {
298  req->setFlags(Request::ATOMIC_RETURN_OP);
299  } else if (isAtomicNoRet()) {
300  req->setFlags(Request::ATOMIC_NO_RETURN_OP);
301  }
302 
303  if (isMemSync()) {
304  // the path for kernel launch and kernel end is different
305  // from non-kernel mem sync.
306  assert(!isKernelLaunch());
307  assert(!isEndOfKernel());
308 
309  // must be wbinv inst if not kernel launch/end
310  req->setCacheCoherenceFlags(Request::INV_L1);
311  }
312  }
313 
314  // reset the number of pending memory requests for all lanes
315  void
317  {
319  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
320  resetStatusVector(lane);
321  }
322  }
323 
324  // reset the number of pending memory requests for the inputted lane
325  void
327  {
328  setStatusVector(lane, 0);
329  }
330 
331  // set the number of pending memory requests for the inputted lane
332  void
333  setStatusVector(int lane, int newVal)
334  {
335  // currently we can have up to 2 memory requests per lane (if the
336  // lane's request goes across multiple cache lines)
337  assert((newVal >= 0) && (newVal <= 2));
338  statusVector[lane] = newVal;
339  }
340 
341  // subtracts the number of pending memory requests for the inputted lane
342  // by 1
343  void
345  {
346  // this lane may have multiple requests, so only subtract one for
347  // this request
348  assert(statusVector[lane] >= 1);
349  statusVector[lane]--;
350  }
351 
352  // return the current number of pending memory requests for the inputted
353  // lane
354  int
355  getLaneStatus(int lane) const
356  {
357  return statusVector[lane];
358  }
359 
360  // returns true if all memory requests from all lanes have been received,
361  // else returns false
362  bool
363  allLanesZero() const
364  {
365  // local variables
366  bool allZero = true;
367 
368  // iterate over all lanes, checking the number of pending memory
369  // requests they have
370  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
371  // if any lane still has pending requests, return false
372  if (statusVector[lane] > 0) {
373  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: lane: %d has %d pending "
374  "request(s) for %#x\n", cu_id, simdId, wfSlotId, lane,
375  statusVector[lane], addr[lane]);
376  allZero = false;
377  }
378  }
379 
380  if (allZero) {
381  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: all lanes have no pending"
382  " requests for %#x\n", cu_id, simdId, wfSlotId, addr[0]);
383  }
384  return allZero;
385  }
386 
387  // returns a string representing the current state of the statusVector
388  std::string
390  {
391  std::string statusVec_str = "[";
392 
393  // iterate over all lanes, adding the current number of pending
394  // requests for this lane to the string
395  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
396  statusVec_str += std::to_string(statusVector[lane]);
397  }
398  statusVec_str += "]";
399 
400  return statusVec_str;
401  }
402 
403  // Map returned packets and the addresses they satisfy with which lane they
404  // were requested from
405  typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
407 
408  // Track the status of memory requests per lane, an int per lane to allow
409  // unaligned accesses
411  // for ld_v# or st_v#
413 
414  // for misaligned scalar ops we track the number
415  // of outstanding reqs here
417 
418  Tick getAccessTime() const { return accessTime; }
419 
420  void setAccessTime(Tick currentTime) { accessTime = currentTime; }
421 
422  void profileRoundTripTime(Tick currentTime, int hopId);
424 
425  void profileLineAddressTime(Addr addr, Tick currentTime, int hopId);
426  const std::map<Addr, std::vector<Tick>>& getLineAddressTime() const
427  { return lineAddressTime; }
428 
429  // inst used to save/restore a wavefront context
431  private:
434 
435  // the time the request was started
437 
438  // hold the tick when the instruction arrives at certain hop points
439  // on it's way to main memory
441 
442  // hold each cache block address for the instruction and a vector
443  // to hold the tick when the block arrives at certain hop points
444  std::map<Addr, std::vector<Tick>> lineAddressTime;
445 };
446 
447 #endif // __GPU_DYN_INST_HH__
GPUDynInst::writesEXEC
bool writesEXEC() const
Definition: gpu_dyn_inst.cc:516
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
GPUDynInst::isF32
bool isF32() const
Definition: gpu_dyn_inst.cc:713
AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:239
Request::INV_L1
@ INV_L1
mem_sync_op flags
Definition: request.hh:295
GPUDynInst::isBranch
bool isBranch() const
Definition: gpu_dyn_inst.cc:336
GPUDynInst::isBarrier
bool isBarrier() const
Definition: gpu_dyn_inst.cc:408
AtomicOpCAS::c
T c
Definition: gpu_dyn_inst.hh:55
GPUDynInst::readsMode
bool readsMode() const
Definition: gpu_dyn_inst.cc:498
Request::GLC_BIT
@ GLC_BIT
Definition: request.hh:299
GPUDynInst::GPUDynInst
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:42
GPUDynInst::isAtomic
bool isAtomic() const
Definition: gpu_dyn_inst.cc:444
GPUDynInst::initiateAcc
void initiateAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:307
GPUDynInst::memStatusVector
StatusVector memStatusVector
Definition: gpu_dyn_inst.hh:406
GPUDynInst::readsExecMask
bool readsExecMask() const
Definition: gpu_dyn_inst.cc:538
GPUDynInst::accessTime
Tick accessTime
Definition: gpu_dyn_inst.hh:436
AtomicOpCAS
Definition: gpu_dyn_inst.hh:52
GPUDynInst::allLanesZero
bool allLanesZero() const
Definition: gpu_dyn_inst.hh:363
GPUDynInst::d_data
uint8_t * d_data
Definition: gpu_dyn_inst.hh:128
GPUDynInst::numDstVecDWORDs
int numDstVecDWORDs()
Definition: gpu_dyn_inst.cc:131
MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:44
GPUDynInst::hasSgprRawDependence
bool hasSgprRawDependence(GPUDynInstPtr s)
Definition: gpu_dyn_inst.cc:289
amo.hh
GPUDynInst::srcIsVgpr
bool srcIsVgpr(int index) const
Definition: gpu_dyn_inst.cc:218
GPUDynInst::isAtomicAdd
bool isAtomicAdd() const
Definition: gpu_dyn_inst.cc:599
GPUDynInst::writesMode
bool writesMode() const
Definition: gpu_dyn_inst.cc:504
GPUDynInst::isF64
bool isF64() const
Definition: gpu_dyn_inst.cc:719
GPUDynInst::isKernArgSeg
bool isKernArgSeg() const
Definition: gpu_dyn_inst.cc:671
GPUDynInst::isDPPInst
bool isDPPInst() const
Definition: gpu_dyn_inst.cc:372
GPUDynInst::isDstOperand
bool isDstOperand(int operandIdx)
Definition: gpu_dyn_inst.cc:173
GPUDynInst
Definition: gpu_dyn_inst.hh:77
GPUDynInst::isKernelLaunch
bool isKernelLaunch() const
Definition: gpu_dyn_inst.cc:360
AtomicOpCAS::execute
void execute(T *b)
Definition: gpu_dyn_inst.hh:64
GPUDynInst::pAddr
Addr pAddr
Definition: gpu_dyn_inst.hh:125
GPUDynInst::resetStatusVector
void resetStatusVector(int lane)
Definition: gpu_dyn_inst.hh:326
compute_unit.hh
GPUDynInst::isMAC
bool isMAC() const
Definition: gpu_dyn_inst.cc:731
GPUDynInst::numSrcRegOperands
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:101
Request::ATOMIC_NO_RETURN_OP
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
Definition: request.hh:168
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
GPUDynInst::hasSourceVgpr
bool hasSourceVgpr() const
Definition: gpu_dyn_inst.cc:196
GPUDynInst::StatusVector
std::unordered_map< Addr, std::vector< int > > StatusVector
Definition: gpu_dyn_inst.hh:405
GPUDynInst::isSpillSeg
bool isSpillSeg() const
Definition: gpu_dyn_inst.cc:689
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
GPUDynInst::isFMA
bool isFMA() const
Definition: gpu_dyn_inst.cc:725
GPUDynInst::getOperandSize
int getOperandSize(int operandIdx)
Definition: gpu_dyn_inst.cc:167
Request::ATOMIC_RETURN_OP
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition: request.hh:166
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:86
GPUDynInst::isWaitcnt
bool isWaitcnt() const
Definition: gpu_dyn_inst.cc:396
GPUDynInst::time
Tick time
Definition: gpu_dyn_inst.hh:153
ComputeUnit::stats
ComputeUnit::ComputeUnitStats stats
WaitClass
Definition: misc.hh:64
GPUDynInst::isScalarRegister
bool isScalarRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:155
GPUDynInst::numScalarReqs
int numScalarReqs
Definition: gpu_dyn_inst.hh:416
std::vector< Addr >
AtomicOpCAS::AtomicOpCAS
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition: gpu_dyn_inst.hh:60
GPUDynInst::profileRoundTripTime
void profileRoundTripTime(Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:976
GPUDynInst::readsSCC
bool readsSCC() const
Definition: gpu_dyn_inst.cc:474
GPUDynInst::makeAtomicOpFunctor
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
Definition: gpu_dyn_inst.hh:257
GPUDynInst::completeAcc
void completeAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:316
GPUDynInst::isAtomicXor
bool isAtomicXor() const
Definition: gpu_dyn_inst.cc:582
ComputeUnit::ComputeUnitStats::numCASOps
Stats::Scalar numCASOps
Definition: compute_unit.hh:1074
GPUDynInst::isAtomicMax
bool isAtomicMax() const
Definition: gpu_dyn_inst.cc:623
GPUDynInst::isSpecialOp
bool isSpecialOp() const
Definition: gpu_dyn_inst.cc:390
GPUDynInst::wfSlotId
int wfSlotId
Definition: gpu_dyn_inst.hh:149
AtomicOpFunctor
Definition: amo.hh:40
GPUDynInst::numSrcVecOperands
int numSrcVecOperands()
Definition: gpu_dyn_inst.cc:113
GPUDynInst::isArgSeg
bool isArgSeg() const
Definition: gpu_dyn_inst.cc:653
GPUDynInst::lineAddressTime
std::map< Addr, std::vector< Tick > > lineAddressTime
Definition: gpu_dyn_inst.hh:444
GPUDynInst::isReturn
bool isReturn() const
Definition: gpu_dyn_inst.cc:378
GPUDynInst::writesFlatScratch
bool writesFlatScratch() const
Definition: gpu_dyn_inst.cc:548
GPUDynInst::isSystemCoherent
bool isSystemCoherent() const
Definition: gpu_dyn_inst.cc:701
GPUDynInst::doApertureCheck
void doApertureCheck(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:743
ComputeUnit
Definition: compute_unit.hh:200
AtomicOpCAS::computeUnit
ComputeUnit * computeUnit
Definition: gpu_dyn_inst.hh:58
GPUDynInst::isScalar
bool isScalar() const
Definition: gpu_dyn_inst.cc:468
GPUDynInst::isAtomicExch
bool isAtomicExch() const
Definition: gpu_dyn_inst.cc:593
GPUDynInst::addr
std::vector< Addr > addr
Definition: gpu_dyn_inst.hh:124
GPUDynInst::isMemRef
bool isMemRef() const
Definition: gpu_dyn_inst.cc:420
GPUDynInst::isUnconditionalJump
bool isUnconditionalJump() const
Definition: gpu_dyn_inst.cc:384
GPUDynInst::hasVgprRawDependence
bool hasVgprRawDependence(GPUDynInstPtr s)
Definition: gpu_dyn_inst.cc:272
GPUDynInst::scalarAddr
Addr scalarAddr
Definition: gpu_dyn_inst.hh:122
GPUDynInst::isSaveRestore
bool isSaveRestore
Definition: gpu_dyn_inst.hh:430
GPUDynInst::staticInstruction
GPUStaticInst * staticInstruction()
Definition: gpu_dyn_inst.hh:167
GPUDynInst::isLoad
bool isLoad() const
Definition: gpu_dyn_inst.cc:432
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
GPUDynInst::isGlobalSeg
bool isGlobalSeg() const
Definition: gpu_dyn_inst.cc:659
GPUDynInst::executedAs
Enums::StorageClassType executedAs()
Definition: gpu_dyn_inst.cc:266
GPUDynInst::hasDestinationVgpr
bool hasDestinationVgpr() const
Definition: gpu_dyn_inst.cc:229
ComputeUnit::ComputeUnitStats::numFailedCASOps
Stats::Scalar numFailedCASOps
Definition: compute_unit.hh:1075
GPUDynInst::printStatusVector
std::string printStatusVector() const
Definition: gpu_dyn_inst.hh:389
GPUDynInst::isAtomicMin
bool isAtomicMin() const
Definition: gpu_dyn_inst.cc:629
GPUDynInst::execUnitId
int execUnitId
Definition: gpu_dyn_inst.hh:151
GPUDynInst::writesVCC
bool writesVCC() const
Definition: gpu_dyn_inst.cc:492
GPUDynInst::resolveFlatSegment
void resolveFlatSegment(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:815
ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition: registers.hh:58
GPUDynInst::srcLiteral
TheGpuISA::ScalarRegU32 srcLiteral() const
Definition: gpu_dyn_inst.cc:929
GPUDynInst::simdId
int simdId
Definition: gpu_dyn_inst.hh:139
GPUDynInst::readsFlatScratch
bool readsFlatScratch() const
Definition: gpu_dyn_inst.cc:559
GPUDynInst::execute
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:95
GPUDynInst::isAtomicSub
bool isAtomicSub() const
Definition: gpu_dyn_inst.cc:605
GPUDynInst::a_data
uint8_t * a_data
Definition: gpu_dyn_inst.hh:132
GPUDynInst::isCondBranch
bool isCondBranch() const
Definition: gpu_dyn_inst.cc:342
GPUDynInst::isVector
bool isVector() const
Definition: gpu_dyn_inst.cc:462
GPUDynInst::decrementStatusVector
void decrementStatusVector(int lane)
Definition: gpu_dyn_inst.hh:344
GPUDynInst::isGloballyCoherent
bool isGloballyCoherent() const
Definition: gpu_dyn_inst.cc:695
GPUDynInst::seqNum
InstSeqNum seqNum() const
Definition: gpu_dyn_inst.cc:260
GPUDynInst::updateStats
void updateStats()
Definition: gpu_dyn_inst.cc:935
GPUDynInst::x_data
uint8_t * x_data
Definition: gpu_dyn_inst.hh:134
GPUDynInst::isAtomicNoRet
bool isAtomicNoRet() const
Definition: gpu_dyn_inst.cc:450
GPUDynInst::isAtomicDec
bool isAtomicDec() const
Definition: gpu_dyn_inst.cc:617
GPUDynInst::isArgLoad
bool isArgLoad() const
Definition: gpu_dyn_inst.cc:635
GPUStaticInst
Definition: gpu_static_inst.hh:58
GPUDynInst::isOpcode
bool isOpcode(const std::string &opcodeStr) const
Definition: gpu_dyn_inst.cc:248
GPUDynInst::getNumOperands
int getNumOperands()
Definition: gpu_dyn_inst.cc:143
GPUDynInst::isPrivateSeg
bool isPrivateSeg() const
Definition: gpu_dyn_inst.cc:677
InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:37
GPUDynInst::getRegisterIndex
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:161
GPUDynInst::isReadOnlySeg
bool isReadOnlySeg() const
Definition: gpu_dyn_inst.cc:683
GPUDynInst::setRequestFlags
void setRequestFlags(RequestPtr req) const
Definition: gpu_dyn_inst.hh:287
GPUDynInst::readsVCC
bool readsVCC() const
Definition: gpu_dyn_inst.cc:486
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
GPUDynInst::numDstVecOperands
int numDstVecOperands()
Definition: gpu_dyn_inst.cc:119
GPUDynInst::disassemble
const std::string & disassemble() const
Definition: gpu_dyn_inst.cc:254
GPUDynInst::isAtomicCAS
bool isAtomicCAS() const
Definition: gpu_dyn_inst.cc:588
TypedAtomicOpFunctor
Definition: amo.hh:53
GPUDynInst::profileLineAddressTime
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:986
GPUDynInst::getLineAddressTime
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
Definition: gpu_dyn_inst.hh:426
GPUDynInst::getAccessTime
Tick getAccessTime() const
Definition: gpu_dyn_inst.hh:418
GPUDynInst::isAtomicAnd
bool isAtomicAnd() const
Definition: gpu_dyn_inst.cc:570
GPUDynInst::isEndOfKernel
bool isEndOfKernel() const
Definition: gpu_dyn_inst.cc:354
GPUDynInst::isLocalMem
bool isLocalMem() const
Definition: gpu_dyn_inst.cc:647
GPUDynInst::hasDestinationSgpr
bool hasDestinationSgpr() const
Definition: gpu_dyn_inst.cc:207
AtomicOpCAS::clone
AtomicOpFunctor * clone()
Definition: gpu_dyn_inst.hh:74
GPUDynInst::exec_mask
VectorMask exec_mask
Definition: gpu_dyn_inst.hh:136
GPUDynInst::isSDWAInst
bool isSDWAInst() const
Definition: gpu_dyn_inst.cc:366
GPUDynInst::cu_id
int cu_id
Definition: gpu_dyn_inst.hh:145
GPUDynInst::isGroupSeg
bool isGroupSeg() const
Definition: gpu_dyn_inst.cc:665
GPUDynInst::_staticInst
GPUStaticInst * _staticInst
Definition: gpu_dyn_inst.hh:432
GPUDynInst::resetEntireStatusVector
void resetEntireStatusVector()
Definition: gpu_dyn_inst.hh:316
GPUDynInst::readsEXEC
bool readsEXEC() const
Definition: gpu_dyn_inst.cc:510
GPUDynInst::setAccessTime
void setAccessTime(Tick currentTime)
Definition: gpu_dyn_inst.hh:420
GPUDynInst::writesExecMask
bool writesExecMask() const
Definition: gpu_dyn_inst.cc:528
GPUDynInst::ignoreExec
bool ignoreExec() const
Definition: gpu_dyn_inst.cc:522
GPUDynInst::tlbHitLevel
std::vector< int > tlbHitLevel
Definition: gpu_dyn_inst.hh:412
ArmISA::b
Bitfield< 7 > b
Definition: miscregs_types.hh:376
GPUDynInst::isSrcOperand
bool isSrcOperand(int operandIdx)
Definition: gpu_dyn_inst.cc:179
GPUExecContext::cu
ComputeUnit * cu
Definition: gpu_exec_context.hh:61
GPUDynInst::roundTripTime
std::vector< Tick > roundTripTime
Definition: gpu_dyn_inst.hh:440
GPUDynInst::isAtomicOr
bool isAtomicOr() const
Definition: gpu_dyn_inst.cc:576
gpu_exec_context.hh
GPUDynInst::kern_id
int kern_id
Definition: gpu_dyn_inst.hh:143
GPUDynInst::isSleep
bool isSleep() const
Definition: gpu_dyn_inst.cc:402
Wavefront
Definition: wavefront.hh:59
GPUExecContext
Definition: gpu_exec_context.hh:44
GPUDynInst::isVectorRegister
bool isVectorRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:149
Request::SLC_BIT
@ SLC_BIT
user-policy flags
Definition: request.hh:298
GPUDynInst::isGlobalMem
bool isGlobalMem() const
Definition: gpu_dyn_inst.cc:641
GPUDynInst::_seqNum
const InstSeqNum _seqNum
Definition: gpu_dyn_inst.hh:433
GPUDynInst::setStatusVector
void setStatusVector(int lane, int newVal)
Definition: gpu_dyn_inst.hh:333
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
GPUDynInst::writesSCC
bool writesSCC() const
Definition: gpu_dyn_inst.cc:480
GPUDynInst::isAtomicInc
bool isAtomicInc() const
Definition: gpu_dyn_inst.cc:611
logging.hh
GPUDynInst::numDstRegOperands
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:107
Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: registers.hh:152
GPUDynInst::hasSourceSgpr
bool hasSourceSgpr() const
Definition: gpu_dyn_inst.cc:185
GPUDynInst::wg_id
int wg_id
Definition: gpu_dyn_inst.hh:147
GPUDynInst::numOpdDWORDs
int numOpdDWORDs(int operandIdx)
Definition: gpu_dyn_inst.cc:137
GPUDynInst::isNop
bool isNop() const
Definition: gpu_dyn_inst.cc:348
GPUDynInst::numSrcVecDWORDs
int numSrcVecDWORDs()
Definition: gpu_dyn_inst.cc:125
GPUDynInst::latency
WaitClass latency
Definition: gpu_dyn_inst.hh:155
trace.hh
GPUDynInst::getRoundTripTime
std::vector< Tick > getRoundTripTime() const
Definition: gpu_dyn_inst.hh:423
GPUDynInst::isStore
bool isStore() const
Definition: gpu_dyn_inst.cc:438
GPUDynInst::isALU
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
Definition: gpu_dyn_inst.cc:330
GPUDynInst::scalar_data
uint8_t * scalar_data
Definition: gpu_dyn_inst.hh:130
GPUDynInst::getLaneStatus
int getLaneStatus(int lane) const
Definition: gpu_dyn_inst.hh:355
VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:44
ArmISA::s
Bitfield< 4 > s
Definition: miscregs_types.hh:556
GPUDynInst::isMemSync
bool isMemSync() const
Definition: gpu_dyn_inst.cc:414
GPUDynInst::statusVector
std::vector< int > statusVector
Definition: gpu_dyn_inst.hh:410
GPUDynInst::isAtomicRet
bool isAtomicRet() const
Definition: gpu_dyn_inst.cc:456
GPUDynInst::isF16
bool isF16() const
Definition: gpu_dyn_inst.cc:707
GPUDynInst::~GPUDynInst
~GPUDynInst()
Definition: gpu_dyn_inst.cc:85
ArmISA::mask
Bitfield< 28, 24 > mask
Definition: miscregs_types.hh:711
GPUDynInst::isMAD
bool isMAD() const
Definition: gpu_dyn_inst.cc:737
GPUDynInst::wfDynId
int wfDynId
Definition: gpu_dyn_inst.hh:141
AtomicOpCAS::s
T s
Definition: gpu_dyn_inst.hh:56
GPUDynInst::isFlat
bool isFlat() const
Definition: gpu_dyn_inst.cc:426

Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17