gem5  v21.2.1.1
gpu_dyn_inst.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __GPU_DYN_INST_HH__
33 #define __GPU_DYN_INST_HH__
34 
35 #include <cstdint>
36 #include <memory>
37 #include <string>
38 
39 #include "base/amo.hh"
40 #include "base/logging.hh"
41 #include "base/trace.hh"
42 #include "debug/GPUMem.hh"
43 #include "enums/StorageClassType.hh"
47 
48 namespace gem5
49 {
50 
51 class GPUStaticInst;
52 
53 template<typename T>
55 {
56  public:
57  T c;
58  T s;
59 
61 
62  AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
63  : c(_c), s(_s), computeUnit(compute_unit) { }
64 
65  void
66  execute(T *b)
67  {
69 
70  if (*b == c) {
71  *b = s;
72  } else {
74  }
75  }
77 };
78 
80 {
81  public:
82  RegisterOperandInfo() = delete;
83  RegisterOperandInfo(int op_idx, int num_dwords,
84  const std::vector<int> &virt_indices,
85  const std::vector<int> &phys_indices)
86  : opIdx(op_idx), numDWORDs(num_dwords), virtIndices(virt_indices),
87  physIndices(phys_indices)
88  {
89  }
90 
95  int operandIdx() const { return opIdx; }
100  int virtIdx(int reg_num=0) const { return virtIndices.at(reg_num); }
101 
102  private:
107  const int opIdx;
111  const int numDWORDs;
114 };
115 
117 {
118  public:
119  GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
120  uint64_t instSeqNum);
121  ~GPUDynInst();
122  void execute(GPUDynInstPtr gpuDynInst);
123 
128 
129  int numSrcRegOperands();
130  int numDstRegOperands();
131 
132  int numSrcVecRegOperands() const;
133  int numDstVecRegOperands() const;
135  int numSrcVecDWords();
136  int numDstVecDWords();
137 
138  int numSrcScalarRegOperands() const;
139  int numDstScalarRegOperands() const;
141  int numSrcScalarDWords();
142  int numDstScalarDWords();
143 
144  int maxOperandSize();
145 
146  int getNumOperands() const;
147 
148  bool hasSourceSgpr() const;
149  bool hasDestinationSgpr() const;
150  bool hasSourceVgpr() const;
151  bool hasDestinationVgpr() const;
152 
153  // returns true if the string "opcodeStr" is found in the
154  // opcode of the instruction
155  bool isOpcode(const std::string& opcodeStr) const;
156  bool isOpcode(const std::string& opcodeStr,
157  const std::string& extStr) const;
158 
159  const std::string &disassemble() const;
160 
161  InstSeqNum seqNum() const;
162 
163  Addr pc();
164  void pc(Addr _pc);
165 
166  enums::StorageClassType executedAs();
167 
168  // virtual address for scalar memory operations
170  // virtual addressies for vector memory operations
173 
174  // vector data to get written
175  uint8_t *d_data;
176  // scalar data to be transferred
177  uint8_t *scalar_data;
178  // Additional data (for atomics)
179  uint8_t *a_data;
180  // Additional data (for atomics)
181  uint8_t *x_data;
182  // The execution mask
184 
185  // SIMD where the WF of the memory instruction has been mapped to
186  int simdId;
187  // unique id of the WF where the memory instruction belongs to
188  int wfDynId;
189  // The kernel id of the requesting wf
190  int kern_id;
191  // The CU id of the requesting wf
192  int cu_id;
193  // The workgroup id of the requesting wf
194  int wg_id;
195  // HW slot id where the WF is mapped to inside a SIMD unit
196  int wfSlotId;
197  // execution pipeline id where the memory instruction has been scheduled
199  // The execution time of this operation
201  // The latency of this operation
203 
204  // Initiate the specified memory operation, by creating a
205  // memory request and sending it off to the memory system.
206  void initiateAcc(GPUDynInstPtr gpuDynInst);
207  // Complete the specified memory operation, by writing
208  // value back to the RF in the case of a load or atomic
209  // return or, in the case of a store, we do nothing
210  void completeAcc(GPUDynInstPtr gpuDynInst);
211 
212  void updateStats();
213 
215 
217 
218  bool isALU() const;
219  bool isBranch() const;
220  bool isCondBranch() const;
221  bool isNop() const;
222  bool isReturn() const;
223  bool isEndOfKernel() const;
224  bool isKernelLaunch() const;
225  bool isSDWAInst() const;
226  bool isDPPInst() const;
227  bool isUnconditionalJump() const;
228  bool isSpecialOp() const;
229  bool isWaitcnt() const;
230  bool isSleep() const;
231 
232  bool isBarrier() const;
233  bool isMemSync() const;
234  bool isMemRef() const;
235  bool isFlat() const;
236  bool isFlatGlobal() const;
237  bool isLoad() const;
238  bool isStore() const;
239 
240  bool isAtomic() const;
241  bool isAtomicNoRet() const;
242  bool isAtomicRet() const;
243 
244  bool isScalar() const;
245  bool isVector() const;
246  bool readsSCC() const;
247  bool writesSCC() const;
248  bool readsVCC() const;
249  bool writesVCC() const;
250  bool readsExec() const;
251  bool writesExec() const;
252  bool readsMode() const;
253  bool writesMode() const;
254  bool ignoreExec() const;
255  bool readsFlatScratch() const;
256  bool writesFlatScratch() const;
257  bool readsExecMask() const;
258  bool writesExecMask() const;
259 
260  bool isAtomicAnd() const;
261  bool isAtomicOr() const;
262  bool isAtomicXor() const;
263  bool isAtomicCAS() const;
264  bool isAtomicExch() const;
265  bool isAtomicAdd() const;
266  bool isAtomicSub() const;
267  bool isAtomicInc() const;
268  bool isAtomicDec() const;
269  bool isAtomicMax() const;
270  bool isAtomicMin() const;
271 
272  bool isArgLoad() const;
273  bool isGlobalMem() const;
274  bool isLocalMem() const;
275 
276  bool isArgSeg() const;
277  bool isGlobalSeg() const;
278  bool isGroupSeg() const;
279  bool isKernArgSeg() const;
280  bool isPrivateSeg() const;
281  bool isReadOnlySeg() const;
282  bool isSpillSeg() const;
283 
284  bool isGloballyCoherent() const;
285  bool isSystemCoherent() const;
286 
287  bool isF16() const;
288  bool isF32() const;
289  bool isF64() const;
290 
291  bool isFMA() const;
292  bool isMAC() const;
293  bool isMAD() const;
294 
295  // for FLAT memory ops. check the segment address
296  // against the APE registers to see if it falls
297  // within one of the APE ranges for LDS/SCRATCH/GPUVM.
298  // if it does not fall into one of the three APEs, it
299  // will be a regular global access.
300  void doApertureCheck(const VectorMask &mask);
301  // Function to resolve a flat accesses during execution stage.
302  void resolveFlatSegment(const VectorMask &mask);
303 
304  template<typename c0> AtomicOpFunctorPtr
305  makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
306  {
307  if (isAtomicAnd()) {
308  return std::make_unique<AtomicOpAnd<c0>>(*reg0);
309  } else if (isAtomicOr()) {
310  return std::make_unique<AtomicOpOr<c0>>(*reg0);
311  } else if (isAtomicXor()) {
312  return std::make_unique<AtomicOpXor<c0>>(*reg0);
313  } else if (isAtomicCAS()) {
314  return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, cu);
315  } else if (isAtomicExch()) {
316  return std::make_unique<AtomicOpExch<c0>>(*reg0);
317  } else if (isAtomicAdd()) {
318  return std::make_unique<AtomicOpAdd<c0>>(*reg0);
319  } else if (isAtomicSub()) {
320  return std::make_unique<AtomicOpSub<c0>>(*reg0);
321  } else if (isAtomicInc()) {
322  return std::make_unique<AtomicOpInc<c0>>();
323  } else if (isAtomicDec()) {
324  return std::make_unique<AtomicOpDec<c0>>();
325  } else if (isAtomicMax()) {
326  return std::make_unique<AtomicOpMax<c0>>(*reg0);
327  } else if (isAtomicMin()) {
328  return std::make_unique<AtomicOpMin<c0>>(*reg0);
329  } else {
330  fatal("Unrecognized atomic operation");
331  }
332  }
333 
334  void
336  {
337  if (isGloballyCoherent()) {
338  req->setCacheCoherenceFlags(Request::GLC_BIT);
339  }
340 
341  if (isSystemCoherent()) {
342  req->setCacheCoherenceFlags(Request::SLC_BIT);
343  }
344 
345  if (isAtomicRet()) {
346  req->setFlags(Request::ATOMIC_RETURN_OP);
347  } else if (isAtomicNoRet()) {
348  req->setFlags(Request::ATOMIC_NO_RETURN_OP);
349  }
350 
351  if (isMemSync()) {
352  // the path for kernel launch and kernel end is different
353  // from non-kernel mem sync.
354  assert(!isKernelLaunch());
355  assert(!isEndOfKernel());
356 
357  // must be wbinv inst if not kernel launch/end
358  req->setCacheCoherenceFlags(Request::INV_L1);
359  }
360  }
361 
362  // reset the number of pending memory requests for all lanes
363  void
365  {
367  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
368  resetStatusVector(lane);
369  }
370  }
371 
372  // reset the number of pending memory requests for the inputted lane
373  void
375  {
376  setStatusVector(lane, 0);
377  }
378 
379  // set the number of pending memory requests for the inputted lane
380  void
381  setStatusVector(int lane, int newVal)
382  {
383  // currently we can have up to 2 memory requests per lane (if the
384  // lane's request goes across multiple cache lines)
385  assert((newVal >= 0) && (newVal <= 2));
386  statusVector[lane] = newVal;
387  }
388 
389  // subtracts the number of pending memory requests for the inputted lane
390  // by 1
391  void
393  {
394  // this lane may have multiple requests, so only subtract one for
395  // this request
396  assert(statusVector[lane] >= 1);
397  statusVector[lane]--;
398  }
399 
400  // return the current number of pending memory requests for the inputted
401  // lane
402  int
403  getLaneStatus(int lane) const
404  {
405  return statusVector[lane];
406  }
407 
408  // returns true if all memory requests from all lanes have been received,
409  // else returns false
410  bool
411  allLanesZero() const
412  {
413  // local variables
414  bool allZero = true;
415 
416  // iterate over all lanes, checking the number of pending memory
417  // requests they have
418  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
419  // if any lane still has pending requests, return false
420  if (statusVector[lane] > 0) {
421  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: lane: %d has %d pending "
422  "request(s) for %#x\n", cu_id, simdId, wfSlotId, lane,
423  statusVector[lane], addr[lane]);
424  allZero = false;
425  }
426  }
427 
428  if (allZero) {
429  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: all lanes have no pending"
430  " requests for %#x\n", cu_id, simdId, wfSlotId, addr[0]);
431  }
432  return allZero;
433  }
434 
435  // returns a string representing the current state of the statusVector
436  std::string
438  {
439  std::string statusVec_str = "[";
440 
441  // iterate over all lanes, adding the current number of pending
442  // requests for this lane to the string
443  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
444  statusVec_str += std::to_string(statusVector[lane]);
445  }
446  statusVec_str += "]";
447 
448  return statusVec_str;
449  }
450 
451  // Map returned packets and the addresses they satisfy with which lane they
452  // were requested from
453  typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
455 
456  // Track the status of memory requests per lane, an int per lane to allow
457  // unaligned accesses
459  // for ld_v# or st_v#
461 
462  // for misaligned scalar ops we track the number
463  // of outstanding reqs here
465 
466  Tick getAccessTime() const { return accessTime; }
467 
468  void setAccessTime(Tick currentTime) { accessTime = currentTime; }
469 
470  void profileRoundTripTime(Tick currentTime, int hopId);
472 
473  void profileLineAddressTime(Addr addr, Tick currentTime, int hopId);
474  const std::map<Addr, std::vector<Tick>>& getLineAddressTime() const
475  { return lineAddressTime; }
476 
477  // inst used to save/restore a wavefront context
479  private:
484 
485  // the time the request was started
487 
488  // hold the tick when the instruction arrives at certain hop points
489  // on it's way to main memory
491 
492  // hold each cache block address for the instruction and a vector
493  // to hold the tick when the block arrives at certain hop points
494  std::map<Addr, std::vector<Tick>> lineAddressTime;
495 };
496 
497 } // namespace gem5
498 
499 #endif // __GPU_DYN_INST_HH__
gem5::AtomicOpCAS::s
T s
Definition: gpu_dyn_inst.hh:58
gem5::GPUDynInst::wfSlotId
int wfSlotId
Definition: gpu_dyn_inst.hh:196
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition: vec.hh:58
gem5::GPUDynInst::isMAD
bool isMAD() const
Definition: gpu_dyn_inst.cc:747
gem5::GPUDynInst::hasDestinationSgpr
bool hasDestinationSgpr() const
Definition: gpu_dyn_inst.cc:254
gem5::GPUDynInst::tlbHitLevel
std::vector< int > tlbHitLevel
Definition: gpu_dyn_inst.hh:460
gem5::RegisterOperandInfo::opIdx
const int opIdx
Index of this operand within the set of its parent instruction's operand list.
Definition: gpu_dyn_inst.hh:107
gem5::GPUDynInst::writesFlatScratch
bool writesFlatScratch() const
Definition: gpu_dyn_inst.cc:560
gem5::GPUDynInst::isAtomic
bool isAtomic() const
Definition: gpu_dyn_inst.cc:448
gem5::GPUDynInst::makeAtomicOpFunctor
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
Definition: gpu_dyn_inst.hh:305
gem5::GPUDynInst::doApertureCheck
void doApertureCheck(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:753
gem5::GPUDynInst::wfDynId
int wfDynId
Definition: gpu_dyn_inst.hh:188
gem5::GPUDynInst::isAtomicDec
bool isAtomicDec() const
Definition: gpu_dyn_inst.cc:627
gem5::GPUDynInst::isCondBranch
bool isCondBranch() const
Definition: gpu_dyn_inst.cc:340
gem5::GPUDynInst::isLoad
bool isLoad() const
Definition: gpu_dyn_inst.cc:436
gem5::GPUDynInst::isKernelLaunch
bool isKernelLaunch() const
Definition: gpu_dyn_inst.cc:358
gem5::GPUDynInst::isAtomicXor
bool isAtomicXor() const
Definition: gpu_dyn_inst.cc:592
gem5::GPUDynInst::StatusVector
std::unordered_map< Addr, std::vector< int > > StatusVector
Definition: gpu_dyn_inst.hh:453
gem5::Request::GLC_BIT
@ GLC_BIT
Definition: request.hh:317
gem5::GPUDynInst::roundTripTime
std::vector< Tick > roundTripTime
Definition: gpu_dyn_inst.hh:490
gem5::GPUDynInst::getLineAddressTime
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
Definition: gpu_dyn_inst.hh:474
gem5::GPUDynInst::setAccessTime
void setAccessTime(Tick currentTime)
Definition: gpu_dyn_inst.hh:468
gem5::GPUDynInst::isKernArgSeg
bool isKernArgSeg() const
Definition: gpu_dyn_inst.cc:681
gem5::GPUDynInst::decrementStatusVector
void decrementStatusVector(int lane)
Definition: gpu_dyn_inst.hh:392
gem5::Request::SLC_BIT
@ SLC_BIT
user-policy flags
Definition: request.hh:315
gem5::AtomicOpFunctor
Definition: amo.hh:43
gem5::GPUDynInst::seqNum
InstSeqNum seqNum() const
Definition: gpu_dyn_inst.cc:280
amo.hh
gem5::GPUDynInst::getAccessTime
Tick getAccessTime() const
Definition: gpu_dyn_inst.hh:466
gem5::GPUDynInst::_seqNum
const InstSeqNum _seqNum
Definition: gpu_dyn_inst.hh:481
gem5::GPUDynInst::readsExec
bool readsExec() const
Definition: gpu_dyn_inst.cc:522
gem5::GPUDynInst::isOpcode
bool isOpcode(const std::string &opcodeStr) const
Definition: gpu_dyn_inst.cc:268
gem5::GPUDynInst::readsMode
bool readsMode() const
Definition: gpu_dyn_inst.cc:510
gem5::GPUDynInst::readsSCC
bool readsSCC() const
Definition: gpu_dyn_inst.cc:478
gem5::GPUDynInst::isFMA
bool isFMA() const
Definition: gpu_dyn_inst.cc:735
gem5::Wavefront
Definition: wavefront.hh:60
gem5::GPUDynInst::_staticInst
GPUStaticInst * _staticInst
Definition: gpu_dyn_inst.hh:480
gem5::GPUDynInst::profileRoundTripTime
void profileRoundTripTime(Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:989
gem5::GPUDynInst::isArgSeg
bool isArgSeg() const
Definition: gpu_dyn_inst.cc:663
gem5::GPUDynInst::kern_id
int kern_id
Definition: gpu_dyn_inst.hh:190
compute_unit.hh
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
gem5::GPUDynInst::scalar_data
uint8_t * scalar_data
Definition: gpu_dyn_inst.hh:177
gem5::GPUDynInst::d_data
uint8_t * d_data
Definition: gpu_dyn_inst.hh:175
gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
gem5::GPUDynInst::srcVecRegOperands
const std::vector< OperandInfo > & srcVecRegOperands() const
Definition: gpu_dyn_inst.cc:112
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::GPUDynInst::isBarrier
bool isBarrier() const
Definition: gpu_dyn_inst.cc:406
gem5::GPUDynInst::maxSrcVecRegOperandSize
int maxSrcVecRegOperandSize()
Definition: gpu_dyn_inst.cc:160
gem5::GPUDynInst::readsVCC
bool readsVCC() const
Definition: gpu_dyn_inst.cc:490
gem5::RegisterOperandInfo::physIndices
const std::vector< int > physIndices
Definition: gpu_dyn_inst.hh:113
gem5::GPUDynInst::writesExec
bool writesExec() const
Definition: gpu_dyn_inst.cc:528
gem5::GPUDynInst::numSrcScalarRegOperands
int numSrcScalarRegOperands() const
Definition: gpu_dyn_inst.cc:186
gem5::GPUDynInst::isGroupSeg
bool isGroupSeg() const
Definition: gpu_dyn_inst.cc:675
gem5::GPUDynInst::numSrcScalarDWords
int numSrcScalarDWords()
Definition: gpu_dyn_inst.cc:212
std::vector< int >
gem5::GPUDynInst::isAtomicInc
bool isAtomicInc() const
Definition: gpu_dyn_inst.cc:621
gem5::GPUDynInst::isAtomicAnd
bool isAtomicAnd() const
Definition: gpu_dyn_inst.cc:580
gem5::GPUDynInst::initiateAcc
void initiateAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:305
gem5::GPUDynInst::accessTime
Tick accessTime
Definition: gpu_dyn_inst.hh:486
gem5::GPUDynInst::numSrcVecRegOperands
int numSrcVecRegOperands() const
Definition: gpu_dyn_inst.cc:148
gem5::AtomicOpCAS::clone
AtomicOpFunctor * clone()
Definition: gpu_dyn_inst.hh:76
gem5::GPUStaticInst
Definition: gpu_static_inst.hh:61
gem5::AtomicOpCAS::execute
void execute(T *b)
Definition: gpu_dyn_inst.hh:66
gem5::GPUDynInst::disassemble
const std::string & disassemble() const
Definition: gpu_dyn_inst.cc:274
gem5::GPUDynInst::isAtomicRet
bool isAtomicRet() const
Definition: gpu_dyn_inst.cc:460
gem5::GPUDynInst::isAtomicNoRet
bool isAtomicNoRet() const
Definition: gpu_dyn_inst.cc:454
gem5::GPUDynInst::isSaveRestore
bool isSaveRestore
Definition: gpu_dyn_inst.hh:478
gem5::GPUDynInst::profileLineAddressTime
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:999
gem5::mask
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
gem5::RegisterOperandInfo::numDWORDs
const int numDWORDs
Size of this operand in DWORDs.
Definition: gpu_dyn_inst.hh:111
gem5::TypedAtomicOpFunctor
Definition: amo.hh:56
gem5::GPUDynInst::getLaneStatus
int getLaneStatus(int lane) const
Definition: gpu_dyn_inst.hh:403
gem5::RegisterOperandInfo::numRegisters
int numRegisters() const
The number of registers required to store this operand.
Definition: gpu_dyn_inst.hh:94
gem5::GPUDynInst::memStatusVector
StatusVector memStatusVector
Definition: gpu_dyn_inst.hh:454
gem5::GPUDynInst::scalarAddr
Addr scalarAddr
Definition: gpu_dyn_inst.hh:169
gem5::GPUDynInst::isSleep
bool isSleep() const
Definition: gpu_dyn_inst.cc:400
gem5::GPUDynInst::time
Tick time
Definition: gpu_dyn_inst.hh:200
gem5::GPUDynInst::hasDestinationVgpr
bool hasDestinationVgpr() const
Definition: gpu_dyn_inst.cc:242
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::GPUDynInst::isVector
bool isVector() const
Definition: gpu_dyn_inst.cc:466
gem5::GPUDynInst::isGloballyCoherent
bool isGloballyCoherent() const
Definition: gpu_dyn_inst.cc:705
gem5::GPUDynInst::statusVector
std::vector< int > statusVector
Definition: gpu_dyn_inst.hh:458
gem5::ArmISA::b
Bitfield< 7 > b
Definition: misc_types.hh:382
gem5::GPUDynInst::isMemSync
bool isMemSync() const
Definition: gpu_dyn_inst.cc:412
gem5::GPUDynInst::GPUDynInst
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:44
gem5::GPUDynInst::hasSourceVgpr
bool hasSourceVgpr() const
Definition: gpu_dyn_inst.cc:236
gem5::GPUDynInst::isNop
bool isNop() const
Definition: gpu_dyn_inst.cc:346
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::GPUDynInst::isUnconditionalJump
bool isUnconditionalJump() const
Definition: gpu_dyn_inst.cc:382
gem5::Request::ATOMIC_NO_RETURN_OP
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
Definition: request.hh:177
gem5::AtomicOpCAS::computeUnit
ComputeUnit * computeUnit
Definition: gpu_dyn_inst.hh:60
gem5::RegisterOperandInfo
Definition: gpu_dyn_inst.hh:79
gem5::GPUDynInst::dstScalarRegOperands
const std::vector< OperandInfo > & dstScalarRegOperands() const
Definition: gpu_dyn_inst.cc:130
gem5::GPUDynInst::isAtomicMin
bool isAtomicMin() const
Definition: gpu_dyn_inst.cc:639
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUDynInst::isAtomicOr
bool isAtomicOr() const
Definition: gpu_dyn_inst.cc:586
gem5::GPUDynInst::numDstRegOperands
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:142
gem5::GPUDynInst::resolveFlatSegment
void resolveFlatSegment(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:825
gem5::GPUDynInst::addr
std::vector< Addr > addr
Definition: gpu_dyn_inst.hh:171
gem5::GPUDynInst::x_data
uint8_t * x_data
Definition: gpu_dyn_inst.hh:181
gem5::GPUDynInst::isFlatGlobal
bool isFlatGlobal() const
Definition: gpu_dyn_inst.cc:430
gem5::GPUDynInst::latency
WaitClass latency
Definition: gpu_dyn_inst.hh:202
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::GPUDynInst::isMAC
bool isMAC() const
Definition: gpu_dyn_inst.cc:741
gem5::WaitClass
Definition: misc.hh:67
gem5::GPUDynInst::isSDWAInst
bool isSDWAInst() const
Definition: gpu_dyn_inst.cc:364
gem5::ComputeUnit::ComputeUnitStats::numCASOps
statistics::Scalar numCASOps
Definition: compute_unit.hh:1075
gem5::GPUDynInst::writesMode
bool writesMode() const
Definition: gpu_dyn_inst.cc:516
gem5::RegisterOperandInfo::operandIdx
int operandIdx() const
Definition: gpu_dyn_inst.hh:95
gem5::GPUDynInst::cu_id
int cu_id
Definition: gpu_dyn_inst.hh:192
gem5::GPUDynInst::isSystemCoherent
bool isSystemCoherent() const
Definition: gpu_dyn_inst.cc:711
gem5::GPUDynInst::getNumOperands
int getNumOperands() const
Definition: gpu_dyn_inst.cc:230
gem5::GPUDynInst::dstVecRegOperands
const std::vector< OperandInfo > & dstVecRegOperands() const
Definition: gpu_dyn_inst.cc:118
gem5::Request::ATOMIC_RETURN_OP
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition: request.hh:175
gem5::Gcn3ISA::RegSizeDWords
const int RegSizeDWords
Size of a single-precision register in DWords.
Definition: gpu_registers.hh:176
gem5::GPUDynInst
Definition: gpu_dyn_inst.hh:116
gem5::AtomicOpCAS::AtomicOpCAS
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition: gpu_dyn_inst.hh:62
gem5::GPUDynInst::numSrcRegOperands
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:136
gem5::GPUDynInst::staticInstruction
GPUStaticInst * staticInstruction()
Definition: gpu_dyn_inst.hh:214
gem5::GPUDynInst::pc
Addr pc()
Definition: gpu_dyn_inst.cc:286
gem5::GPUDynInst::allLanesZero
bool allLanesZero() const
Definition: gpu_dyn_inst.hh:411
gem5::RegisterOperandInfo::virtIndices
const std::vector< int > virtIndices
Definition: gpu_dyn_inst.hh:112
gem5::GPUDynInst::isLocalMem
bool isLocalMem() const
Definition: gpu_dyn_inst.cc:657
gem5::AtomicOpCAS
Definition: gpu_dyn_inst.hh:54
gem5::GPUDynInst::numDstScalarDWords
int numDstScalarDWords()
Definition: gpu_dyn_inst.cc:218
gem5::GPUDynInst::~GPUDynInst
~GPUDynInst()
Definition: gpu_dyn_inst.cc:96
gem5::GPUDynInst::isDPPInst
bool isDPPInst() const
Definition: gpu_dyn_inst.cc:370
gem5::GPUDynInst::isF32
bool isF32() const
Definition: gpu_dyn_inst.cc:723
gem5::GPUDynInst::maxSrcScalarRegOpSize
int maxSrcScalarRegOpSize
Definition: gpu_dyn_inst.hh:483
gem5::GPUDynInst::writesVCC
bool writesVCC() const
Definition: gpu_dyn_inst.cc:500
gem5::GPUDynInst::numDstScalarRegOperands
int numDstScalarRegOperands() const
Definition: gpu_dyn_inst.cc:192
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUDynInst::execUnitId
int execUnitId
Definition: gpu_dyn_inst.hh:198
gem5::GPUDynInst::isStore
bool isStore() const
Definition: gpu_dyn_inst.cc:442
gem5::GPUDynInst::ignoreExec
bool ignoreExec() const
Definition: gpu_dyn_inst.cc:534
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::GPUDynInst::printStatusVector
std::string printStatusVector() const
Definition: gpu_dyn_inst.hh:437
gem5::GPUDynInst::isFlat
bool isFlat() const
Definition: gpu_dyn_inst.cc:424
gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo()=delete
gem5::GPUDynInst::updateStats
void updateStats()
Definition: gpu_dyn_inst.cc:948
gem5::GPUExecContext
Definition: gpu_exec_context.hh:45
gem5::GPUDynInst::isSpillSeg
bool isSpillSeg() const
Definition: gpu_dyn_inst.cc:699
gem5::GPUDynInst::setRequestFlags
void setRequestFlags(RequestPtr req) const
Definition: gpu_dyn_inst.hh:335
gem5::GPUDynInst::getRoundTripTime
std::vector< Tick > getRoundTripTime() const
Definition: gpu_dyn_inst.hh:471
gem5::GPUDynInst::execute
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:106
gem5::GPUDynInst::simdId
int simdId
Definition: gpu_dyn_inst.hh:186
gem5::GPUDynInst::isScalar
bool isScalar() const
Definition: gpu_dyn_inst.cc:472
gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo(int op_idx, int num_dwords, const std::vector< int > &virt_indices, const std::vector< int > &phys_indices)
Definition: gpu_dyn_inst.hh:83
gem5::GPUDynInst::isSpecialOp
bool isSpecialOp() const
Definition: gpu_dyn_inst.cc:388
gem5::GPUDynInst::readsFlatScratch
bool readsFlatScratch() const
Definition: gpu_dyn_inst.cc:570
gem5::GPUDynInst::readsExecMask
bool readsExecMask() const
Definition: gpu_dyn_inst.cc:550
gem5::GPUDynInst::isAtomicAdd
bool isAtomicAdd() const
Definition: gpu_dyn_inst.cc:609
gem5::GPUDynInst::isGlobalSeg
bool isGlobalSeg() const
Definition: gpu_dyn_inst.cc:669
gem5::GPUDynInst::numDstVecRegOperands
int numDstVecRegOperands() const
Definition: gpu_dyn_inst.cc:154
gem5::GPUDynInst::resetEntireStatusVector
void resetEntireStatusVector()
Definition: gpu_dyn_inst.hh:364
gem5::GPUDynInst::completeAcc
void completeAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:314
gpu_exec_context.hh
gem5::GPUDynInst::isMemRef
bool isMemRef() const
Definition: gpu_dyn_inst.cc:418
gem5::GPUDynInst::numDstVecDWords
int numDstVecDWords()
Definition: gpu_dyn_inst.cc:180
gem5::GPUDynInst::isEndOfKernel
bool isEndOfKernel() const
Definition: gpu_dyn_inst.cc:352
gem5::GPUDynInst::isBranch
bool isBranch() const
Definition: gpu_dyn_inst.cc:334
gem5::GPUDynInst::maxSrcVecRegOpSize
int maxSrcVecRegOpSize
Definition: gpu_dyn_inst.hh:482
gem5::GPUDynInst::a_data
uint8_t * a_data
Definition: gpu_dyn_inst.hh:179
gem5::GPUDynInst::srcLiteral
TheGpuISA::ScalarRegU32 srcLiteral() const
Definition: gpu_dyn_inst.cc:942
gem5::GPUDynInst::isF16
bool isF16() const
Definition: gpu_dyn_inst.cc:717
gem5::GPUDynInst::lineAddressTime
std::map< Addr, std::vector< Tick > > lineAddressTime
Definition: gpu_dyn_inst.hh:494
logging.hh
gem5::GPUDynInst::maxSrcScalarRegOperandSize
int maxSrcScalarRegOperandSize()
Definition: gpu_dyn_inst.cc:198
gem5::Request::INV_L1
@ INV_L1
Definition: request.hh:307
gem5::GPUDynInst::isArgLoad
bool isArgLoad() const
Definition: gpu_dyn_inst.cc:645
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::GPUDynInst::isReadOnlySeg
bool isReadOnlySeg() const
Definition: gpu_dyn_inst.cc:693
gem5::GPUDynInst::isAtomicMax
bool isAtomicMax() const
Definition: gpu_dyn_inst.cc:633
trace.hh
gem5::AtomicOpCAS::c
T c
Definition: gpu_dyn_inst.hh:57
gem5::GPUDynInst::isPrivateSeg
bool isPrivateSeg() const
Definition: gpu_dyn_inst.cc:687
gem5::GPUDynInst::wg_id
int wg_id
Definition: gpu_dyn_inst.hh:194
gem5::GPUDynInst::writesExecMask
bool writesExecMask() const
Definition: gpu_dyn_inst.cc:540
gem5::GPUDynInst::hasSourceSgpr
bool hasSourceSgpr() const
Definition: gpu_dyn_inst.cc:248
gem5::GPUDynInst::numScalarReqs
int numScalarReqs
Definition: gpu_dyn_inst.hh:464
gem5::AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
gem5::GPUDynInst::isAtomicCAS
bool isAtomicCAS() const
Definition: gpu_dyn_inst.cc:598
gem5::ComputeUnit::ComputeUnitStats::numFailedCASOps
statistics::Scalar numFailedCASOps
Definition: compute_unit.hh:1076
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::GPUDynInst::numSrcVecDWords
int numSrcVecDWords()
Definition: gpu_dyn_inst.cc:174
gem5::GPUDynInst::writesSCC
bool writesSCC() const
Definition: gpu_dyn_inst.cc:484
gem5::GPUDynInst::resetStatusVector
void resetStatusVector(int lane)
Definition: gpu_dyn_inst.hh:374
gem5::GPUExecContext::cu
ComputeUnit * cu
Definition: gpu_exec_context.hh:62
gem5::GPUDynInst::isWaitcnt
bool isWaitcnt() const
Definition: gpu_dyn_inst.cc:394
gem5::GPUDynInst::isAtomicExch
bool isAtomicExch() const
Definition: gpu_dyn_inst.cc:603
gem5::GPUDynInst::isF64
bool isF64() const
Definition: gpu_dyn_inst.cc:729
operand_info.hh
gem5::RegisterOperandInfo::virtIdx
int virtIdx(int reg_num=0) const
We typically only need the first virtual register for the operand regardless of its size.
Definition: gpu_dyn_inst.hh:100
gem5::GPUDynInst::isAtomicSub
bool isAtomicSub() const
Definition: gpu_dyn_inst.cc:615
gem5::GPUDynInst::isALU
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
Definition: gpu_dyn_inst.cc:328
gem5::GPUDynInst::exec_mask
VectorMask exec_mask
Definition: gpu_dyn_inst.hh:183
gem5::GPUDynInst::isReturn
bool isReturn() const
Definition: gpu_dyn_inst.cc:376
gem5::GPUDynInst::pAddr
Addr pAddr
Definition: gpu_dyn_inst.hh:172
gem5::GPUDynInst::isGlobalMem
bool isGlobalMem() const
Definition: gpu_dyn_inst.cc:651
gem5::GPUDynInst::setStatusVector
void setStatusVector(int lane, int newVal)
Definition: gpu_dyn_inst.hh:381
gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: gpu_registers.hh:153
gem5::GPUDynInst::maxOperandSize
int maxOperandSize()
Definition: gpu_dyn_inst.cc:224
gem5::GPUDynInst::srcScalarRegOperands
const std::vector< OperandInfo > & srcScalarRegOperands() const
Definition: gpu_dyn_inst.cc:124
gem5::GPUDynInst::executedAs
enums::StorageClassType executedAs()
Definition: gpu_dyn_inst.cc:298

Generated on Wed May 4 2022 12:13:58 for gem5 by doxygen 1.8.17