gem5  v21.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __GPU_DYN_INST_HH__
35 #define __GPU_DYN_INST_HH__
36 
37 #include <cstdint>
38 #include <memory>
39 #include <string>
40 
41 #include "base/amo.hh"
42 #include "base/logging.hh"
43 #include "base/trace.hh"
44 #include "debug/GPUMem.hh"
45 #include "enums/StorageClassType.hh"
49 
50 namespace gem5
51 {
52 
53 class GPUStaticInst;
54 
55 template<typename T>
57 {
58  public:
59  T c;
60  T s;
61 
63 
64  AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
65  : c(_c), s(_s), computeUnit(compute_unit) { }
66 
67  void
68  execute(T *b)
69  {
71 
72  if (*b == c) {
73  *b = s;
74  } else {
76  }
77  }
79 };
80 
82 {
83  public:
84  RegisterOperandInfo() = delete;
85  RegisterOperandInfo(int op_idx, int num_dwords,
86  const std::vector<int> &virt_indices,
87  const std::vector<int> &phys_indices)
88  : opIdx(op_idx), numDWORDs(num_dwords), virtIndices(virt_indices),
89  physIndices(phys_indices)
90  {
91  }
92 
97  int operandIdx() const { return opIdx; }
102  int virtIdx(int reg_num=0) const { return virtIndices.at(reg_num); }
103 
104  private:
109  const int opIdx;
113  const int numDWORDs;
116 };
117 
119 {
120  public:
121  GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
122  uint64_t instSeqNum);
123  ~GPUDynInst();
124  void execute(GPUDynInstPtr gpuDynInst);
125 
130 
131  int numSrcRegOperands();
132  int numDstRegOperands();
133 
134  int numSrcVecRegOperands() const;
135  int numDstVecRegOperands() const;
137  int numSrcVecDWords();
138  int numDstVecDWords();
139 
140  int numSrcScalarRegOperands() const;
141  int numDstScalarRegOperands() const;
143  int numSrcScalarDWords();
144  int numDstScalarDWords();
145 
146  int maxOperandSize();
147 
148  int getNumOperands() const;
149 
150  bool hasSourceSgpr() const;
151  bool hasDestinationSgpr() const;
152  bool hasSourceVgpr() const;
153  bool hasDestinationVgpr() const;
154 
155  // returns true if the string "opcodeStr" is found in the
156  // opcode of the instruction
157  bool isOpcode(const std::string& opcodeStr) const;
158  bool isOpcode(const std::string& opcodeStr,
159  const std::string& extStr) const;
160 
161  const std::string &disassemble() const;
162 
163  InstSeqNum seqNum() const;
164 
165  enums::StorageClassType executedAs();
166 
167  // virtual address for scalar memory operations
169  // virtual addressies for vector memory operations
172 
173  // vector data to get written
174  uint8_t *d_data;
175  // scalar data to be transferred
176  uint8_t *scalar_data;
177  // Additional data (for atomics)
178  uint8_t *a_data;
179  // Additional data (for atomics)
180  uint8_t *x_data;
181  // The execution mask
183 
184  // SIMD where the WF of the memory instruction has been mapped to
185  int simdId;
186  // unique id of the WF where the memory instruction belongs to
187  int wfDynId;
188  // The kernel id of the requesting wf
189  int kern_id;
190  // The CU id of the requesting wf
191  int cu_id;
192  // The workgroup id of the requesting wf
193  int wg_id;
194  // HW slot id where the WF is mapped to inside a SIMD unit
195  int wfSlotId;
196  // execution pipeline id where the memory instruction has been scheduled
198  // The execution time of this operation
200  // The latency of this operation
202 
203  // Initiate the specified memory operation, by creating a
204  // memory request and sending it off to the memory system.
205  void initiateAcc(GPUDynInstPtr gpuDynInst);
206  // Complete the specified memory operation, by writing
207  // value back to the RF in the case of a load or atomic
208  // return or, in the case of a store, we do nothing
209  void completeAcc(GPUDynInstPtr gpuDynInst);
210 
211  void updateStats();
212 
214 
216 
217  bool isALU() const;
218  bool isBranch() const;
219  bool isCondBranch() const;
220  bool isNop() const;
221  bool isReturn() const;
222  bool isEndOfKernel() const;
223  bool isKernelLaunch() const;
224  bool isSDWAInst() const;
225  bool isDPPInst() const;
226  bool isUnconditionalJump() const;
227  bool isSpecialOp() const;
228  bool isWaitcnt() const;
229  bool isSleep() const;
230 
231  bool isBarrier() const;
232  bool isMemSync() const;
233  bool isMemRef() const;
234  bool isFlat() const;
235  bool isLoad() const;
236  bool isStore() const;
237 
238  bool isAtomic() const;
239  bool isAtomicNoRet() const;
240  bool isAtomicRet() const;
241 
242  bool isScalar() const;
243  bool isVector() const;
244  bool readsSCC() const;
245  bool writesSCC() const;
246  bool readsVCC() const;
247  bool writesVCC() const;
248  bool readsExec() const;
249  bool writesExec() const;
250  bool readsMode() const;
251  bool writesMode() const;
252  bool ignoreExec() const;
253  bool readsFlatScratch() const;
254  bool writesFlatScratch() const;
255  bool readsExecMask() const;
256  bool writesExecMask() const;
257 
258  bool isAtomicAnd() const;
259  bool isAtomicOr() const;
260  bool isAtomicXor() const;
261  bool isAtomicCAS() const;
262  bool isAtomicExch() const;
263  bool isAtomicAdd() const;
264  bool isAtomicSub() const;
265  bool isAtomicInc() const;
266  bool isAtomicDec() const;
267  bool isAtomicMax() const;
268  bool isAtomicMin() const;
269 
270  bool isArgLoad() const;
271  bool isGlobalMem() const;
272  bool isLocalMem() const;
273 
274  bool isArgSeg() const;
275  bool isGlobalSeg() const;
276  bool isGroupSeg() const;
277  bool isKernArgSeg() const;
278  bool isPrivateSeg() const;
279  bool isReadOnlySeg() const;
280  bool isSpillSeg() const;
281 
282  bool isGloballyCoherent() const;
283  bool isSystemCoherent() const;
284 
285  bool isF16() const;
286  bool isF32() const;
287  bool isF64() const;
288 
289  bool isFMA() const;
290  bool isMAC() const;
291  bool isMAD() const;
292 
293  // for FLAT memory ops. check the segment address
294  // against the APE registers to see if it falls
295  // within one of the APE ranges for LDS/SCRATCH/GPUVM.
296  // if it does not fall into one of the three APEs, it
297  // will be a regular global access.
298  void doApertureCheck(const VectorMask &mask);
299  // Function to resolve a flat accesses during execution stage.
300  void resolveFlatSegment(const VectorMask &mask);
301 
302  template<typename c0> AtomicOpFunctorPtr
303  makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
304  {
305  if (isAtomicAnd()) {
306  return std::make_unique<AtomicOpAnd<c0>>(*reg0);
307  } else if (isAtomicOr()) {
308  return std::make_unique<AtomicOpOr<c0>>(*reg0);
309  } else if (isAtomicXor()) {
310  return std::make_unique<AtomicOpXor<c0>>(*reg0);
311  } else if (isAtomicCAS()) {
312  return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, cu);
313  } else if (isAtomicExch()) {
314  return std::make_unique<AtomicOpExch<c0>>(*reg0);
315  } else if (isAtomicAdd()) {
316  return std::make_unique<AtomicOpAdd<c0>>(*reg0);
317  } else if (isAtomicSub()) {
318  return std::make_unique<AtomicOpSub<c0>>(*reg0);
319  } else if (isAtomicInc()) {
320  return std::make_unique<AtomicOpInc<c0>>();
321  } else if (isAtomicDec()) {
322  return std::make_unique<AtomicOpDec<c0>>();
323  } else if (isAtomicMax()) {
324  return std::make_unique<AtomicOpMax<c0>>(*reg0);
325  } else if (isAtomicMin()) {
326  return std::make_unique<AtomicOpMin<c0>>(*reg0);
327  } else {
328  fatal("Unrecognized atomic operation");
329  }
330  }
331 
332  void
334  {
335  if (isGloballyCoherent()) {
336  req->setCacheCoherenceFlags(Request::GLC_BIT);
337  }
338 
339  if (isSystemCoherent()) {
340  req->setCacheCoherenceFlags(Request::SLC_BIT);
341  }
342 
343  if (isAtomicRet()) {
344  req->setFlags(Request::ATOMIC_RETURN_OP);
345  } else if (isAtomicNoRet()) {
346  req->setFlags(Request::ATOMIC_NO_RETURN_OP);
347  }
348 
349  if (isMemSync()) {
350  // the path for kernel launch and kernel end is different
351  // from non-kernel mem sync.
352  assert(!isKernelLaunch());
353  assert(!isEndOfKernel());
354 
355  // must be wbinv inst if not kernel launch/end
356  req->setCacheCoherenceFlags(Request::INV_L1);
357  }
358  }
359 
360  // reset the number of pending memory requests for all lanes
361  void
363  {
365  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
366  resetStatusVector(lane);
367  }
368  }
369 
370  // reset the number of pending memory requests for the inputted lane
371  void
373  {
374  setStatusVector(lane, 0);
375  }
376 
377  // set the number of pending memory requests for the inputted lane
378  void
379  setStatusVector(int lane, int newVal)
380  {
381  // currently we can have up to 2 memory requests per lane (if the
382  // lane's request goes across multiple cache lines)
383  assert((newVal >= 0) && (newVal <= 2));
384  statusVector[lane] = newVal;
385  }
386 
387  // subtracts the number of pending memory requests for the inputted lane
388  // by 1
389  void
391  {
392  // this lane may have multiple requests, so only subtract one for
393  // this request
394  assert(statusVector[lane] >= 1);
395  statusVector[lane]--;
396  }
397 
398  // return the current number of pending memory requests for the inputted
399  // lane
400  int
401  getLaneStatus(int lane) const
402  {
403  return statusVector[lane];
404  }
405 
406  // returns true if all memory requests from all lanes have been received,
407  // else returns false
408  bool
409  allLanesZero() const
410  {
411  // local variables
412  bool allZero = true;
413 
414  // iterate over all lanes, checking the number of pending memory
415  // requests they have
416  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
417  // if any lane still has pending requests, return false
418  if (statusVector[lane] > 0) {
419  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: lane: %d has %d pending "
420  "request(s) for %#x\n", cu_id, simdId, wfSlotId, lane,
421  statusVector[lane], addr[lane]);
422  allZero = false;
423  }
424  }
425 
426  if (allZero) {
427  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: all lanes have no pending"
428  " requests for %#x\n", cu_id, simdId, wfSlotId, addr[0]);
429  }
430  return allZero;
431  }
432 
433  // returns a string representing the current state of the statusVector
434  std::string
436  {
437  std::string statusVec_str = "[";
438 
439  // iterate over all lanes, adding the current number of pending
440  // requests for this lane to the string
441  for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {
442  statusVec_str += std::to_string(statusVector[lane]);
443  }
444  statusVec_str += "]";
445 
446  return statusVec_str;
447  }
448 
449  // Map returned packets and the addresses they satisfy with which lane they
450  // were requested from
451  typedef std::unordered_map<Addr, std::vector<int>> StatusVector;
453 
454  // Track the status of memory requests per lane, an int per lane to allow
455  // unaligned accesses
457  // for ld_v# or st_v#
459 
460  // for misaligned scalar ops we track the number
461  // of outstanding reqs here
463 
464  Tick getAccessTime() const { return accessTime; }
465 
466  void setAccessTime(Tick currentTime) { accessTime = currentTime; }
467 
468  void profileRoundTripTime(Tick currentTime, int hopId);
470 
471  void profileLineAddressTime(Addr addr, Tick currentTime, int hopId);
472  const std::map<Addr, std::vector<Tick>>& getLineAddressTime() const
473  { return lineAddressTime; }
474 
475  // inst used to save/restore a wavefront context
477  private:
482 
483  // the time the request was started
485 
486  // hold the tick when the instruction arrives at certain hop points
487  // on it's way to main memory
489 
490  // hold each cache block address for the instruction and a vector
491  // to hold the tick when the block arrives at certain hop points
492  std::map<Addr, std::vector<Tick>> lineAddressTime;
493 };
494 
495 } // namespace gem5
496 
497 #endif // __GPU_DYN_INST_HH__
gem5::AtomicOpCAS::s
T s
Definition: gpu_dyn_inst.hh:60
gem5::GPUDynInst::wfSlotId
int wfSlotId
Definition: gpu_dyn_inst.hh:195
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition: vec.hh:58
gem5::GPUDynInst::isMAD
bool isMAD() const
Definition: gpu_dyn_inst.cc:731
gem5::GPUDynInst::hasDestinationSgpr
bool hasDestinationSgpr() const
Definition: gpu_dyn_inst.cc:256
gem5::GPUDynInst::tlbHitLevel
std::vector< int > tlbHitLevel
Definition: gpu_dyn_inst.hh:458
gem5::RegisterOperandInfo::opIdx
const int opIdx
Index of this operand within the set of its parent instruction's operand list.
Definition: gpu_dyn_inst.hh:109
gem5::GPUDynInst::writesFlatScratch
bool writesFlatScratch() const
Definition: gpu_dyn_inst.cc:544
gem5::GPUDynInst::isAtomic
bool isAtomic() const
Definition: gpu_dyn_inst.cc:432
gem5::GPUDynInst::makeAtomicOpFunctor
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
Definition: gpu_dyn_inst.hh:303
gem5::GPUDynInst::doApertureCheck
void doApertureCheck(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:737
gem5::GPUDynInst::wfDynId
int wfDynId
Definition: gpu_dyn_inst.hh:187
gem5::GPUDynInst::isAtomicDec
bool isAtomicDec() const
Definition: gpu_dyn_inst.cc:611
gem5::GPUDynInst::isCondBranch
bool isCondBranch() const
Definition: gpu_dyn_inst.cc:330
gem5::GPUDynInst::isLoad
bool isLoad() const
Definition: gpu_dyn_inst.cc:420
gem5::GPUDynInst::isKernelLaunch
bool isKernelLaunch() const
Definition: gpu_dyn_inst.cc:348
gem5::GPUDynInst::isAtomicXor
bool isAtomicXor() const
Definition: gpu_dyn_inst.cc:576
gem5::GPUDynInst::StatusVector
std::unordered_map< Addr, std::vector< int > > StatusVector
Definition: gpu_dyn_inst.hh:451
gem5::GPUDynInst::roundTripTime
std::vector< Tick > roundTripTime
Definition: gpu_dyn_inst.hh:488
gem5::GPUDynInst::getLineAddressTime
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
Definition: gpu_dyn_inst.hh:472
gem5::GPUDynInst::setAccessTime
void setAccessTime(Tick currentTime)
Definition: gpu_dyn_inst.hh:466
gem5::GPUDynInst::isKernArgSeg
bool isKernArgSeg() const
Definition: gpu_dyn_inst.cc:665
gem5::GPUDynInst::decrementStatusVector
void decrementStatusVector(int lane)
Definition: gpu_dyn_inst.hh:390
gem5::AtomicOpFunctor
Definition: amo.hh:43
gem5::GPUDynInst::seqNum
InstSeqNum seqNum() const
Definition: gpu_dyn_inst.cc:282
amo.hh
gem5::GPUDynInst::getAccessTime
Tick getAccessTime() const
Definition: gpu_dyn_inst.hh:464
gem5::GPUDynInst::_seqNum
const InstSeqNum _seqNum
Definition: gpu_dyn_inst.hh:479
gem5::GPUDynInst::readsExec
bool readsExec() const
Definition: gpu_dyn_inst.cc:506
gem5::GPUDynInst::isOpcode
bool isOpcode(const std::string &opcodeStr) const
Definition: gpu_dyn_inst.cc:270
gem5::GPUDynInst::readsMode
bool readsMode() const
Definition: gpu_dyn_inst.cc:494
gem5::GPUDynInst::readsSCC
bool readsSCC() const
Definition: gpu_dyn_inst.cc:462
gem5::GPUDynInst::isFMA
bool isFMA() const
Definition: gpu_dyn_inst.cc:719
gem5::Wavefront
Definition: wavefront.hh:62
gem5::GPUDynInst::_staticInst
GPUStaticInst * _staticInst
Definition: gpu_dyn_inst.hh:478
gem5::GPUDynInst::profileRoundTripTime
void profileRoundTripTime(Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:973
gem5::GPUDynInst::isArgSeg
bool isArgSeg() const
Definition: gpu_dyn_inst.cc:647
gem5::GPUDynInst::kern_id
int kern_id
Definition: gpu_dyn_inst.hh:189
compute_unit.hh
sc_dt::to_string
const std::string to_string(sc_enc enc)
Definition: sc_fxdefs.cc:91
gem5::GPUDynInst::scalar_data
uint8_t * scalar_data
Definition: gpu_dyn_inst.hh:176
gem5::GPUDynInst::d_data
uint8_t * d_data
Definition: gpu_dyn_inst.hh:174
gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:47
gem5::GPUDynInst::srcVecRegOperands
const std::vector< OperandInfo > & srcVecRegOperands() const
Definition: gpu_dyn_inst.cc:114
gem5::Request::SLC_BIT
@ SLC_BIT
user-policy flags
Definition: request.hh:313
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::GPUDynInst::isBarrier
bool isBarrier() const
Definition: gpu_dyn_inst.cc:396
gem5::GPUDynInst::maxSrcVecRegOperandSize
int maxSrcVecRegOperandSize()
Definition: gpu_dyn_inst.cc:162
gem5::GPUDynInst::readsVCC
bool readsVCC() const
Definition: gpu_dyn_inst.cc:474
gem5::RegisterOperandInfo::physIndices
const std::vector< int > physIndices
Definition: gpu_dyn_inst.hh:115
gem5::GPUDynInst::writesExec
bool writesExec() const
Definition: gpu_dyn_inst.cc:512
gem5::GPUDynInst::numSrcScalarRegOperands
int numSrcScalarRegOperands() const
Definition: gpu_dyn_inst.cc:188
gem5::GPUDynInst::isGroupSeg
bool isGroupSeg() const
Definition: gpu_dyn_inst.cc:659
gem5::GPUDynInst::numSrcScalarDWords
int numSrcScalarDWords()
Definition: gpu_dyn_inst.cc:214
std::vector< int >
gem5::GPUDynInst::isAtomicInc
bool isAtomicInc() const
Definition: gpu_dyn_inst.cc:605
gem5::GPUDynInst::isAtomicAnd
bool isAtomicAnd() const
Definition: gpu_dyn_inst.cc:564
gem5::GPUDynInst::initiateAcc
void initiateAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:295
gem5::GPUDynInst::accessTime
Tick accessTime
Definition: gpu_dyn_inst.hh:484
gem5::GPUDynInst::numSrcVecRegOperands
int numSrcVecRegOperands() const
Definition: gpu_dyn_inst.cc:150
gem5::AtomicOpCAS::clone
AtomicOpFunctor * clone()
Definition: gpu_dyn_inst.hh:78
gem5::GPUStaticInst
Definition: gpu_static_inst.hh:63
gem5::AtomicOpCAS::execute
void execute(T *b)
Definition: gpu_dyn_inst.hh:68
gem5::GPUDynInst::disassemble
const std::string & disassemble() const
Definition: gpu_dyn_inst.cc:276
gem5::GPUDynInst::isAtomicRet
bool isAtomicRet() const
Definition: gpu_dyn_inst.cc:444
gem5::GPUDynInst::isAtomicNoRet
bool isAtomicNoRet() const
Definition: gpu_dyn_inst.cc:438
gem5::GPUDynInst::isSaveRestore
bool isSaveRestore
Definition: gpu_dyn_inst.hh:476
gem5::GPUDynInst::profileLineAddressTime
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
Definition: gpu_dyn_inst.cc:983
gem5::mask
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
gem5::RegisterOperandInfo::numDWORDs
const int numDWORDs
Size of this operand in DWORDs.
Definition: gpu_dyn_inst.hh:113
gem5::TypedAtomicOpFunctor
Definition: amo.hh:56
gem5::GPUDynInst::getLaneStatus
int getLaneStatus(int lane) const
Definition: gpu_dyn_inst.hh:401
gem5::RegisterOperandInfo::numRegisters
int numRegisters() const
The number of registers required to store this operand.
Definition: gpu_dyn_inst.hh:96
gem5::GPUDynInst::memStatusVector
StatusVector memStatusVector
Definition: gpu_dyn_inst.hh:452
gem5::Request::ATOMIC_RETURN_OP
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition: request.hh:173
gem5::GPUDynInst::scalarAddr
Addr scalarAddr
Definition: gpu_dyn_inst.hh:168
gem5::GPUDynInst::isSleep
bool isSleep() const
Definition: gpu_dyn_inst.cc:390
gem5::GPUDynInst::time
Tick time
Definition: gpu_dyn_inst.hh:199
gem5::GPUDynInst::hasDestinationVgpr
bool hasDestinationVgpr() const
Definition: gpu_dyn_inst.cc:244
gem5::ComputeUnit
Definition: compute_unit.hh:203
gem5::GPUDynInst::isVector
bool isVector() const
Definition: gpu_dyn_inst.cc:450
gem5::GPUDynInst::isGloballyCoherent
bool isGloballyCoherent() const
Definition: gpu_dyn_inst.cc:689
gem5::GPUDynInst::statusVector
std::vector< int > statusVector
Definition: gpu_dyn_inst.hh:456
gem5::ArmISA::b
Bitfield< 7 > b
Definition: misc_types.hh:381
gem5::Request::ATOMIC_NO_RETURN_OP
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
Definition: request.hh:175
gem5::GPUDynInst::isMemSync
bool isMemSync() const
Definition: gpu_dyn_inst.cc:402
gem5::Request::INV_L1
@ INV_L1
Definition: request.hh:305
gem5::GPUDynInst::GPUDynInst
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:46
gem5::GPUDynInst::hasSourceVgpr
bool hasSourceVgpr() const
Definition: gpu_dyn_inst.cc:238
gem5::GPUDynInst::isNop
bool isNop() const
Definition: gpu_dyn_inst.cc:336
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::GPUDynInst::isUnconditionalJump
bool isUnconditionalJump() const
Definition: gpu_dyn_inst.cc:372
gem5::AtomicOpCAS::computeUnit
ComputeUnit * computeUnit
Definition: gpu_dyn_inst.hh:62
gem5::RegisterOperandInfo
Definition: gpu_dyn_inst.hh:81
gem5::GPUDynInst::dstScalarRegOperands
const std::vector< OperandInfo > & dstScalarRegOperands() const
Definition: gpu_dyn_inst.cc:132
gem5::GPUDynInst::isAtomicMin
bool isAtomicMin() const
Definition: gpu_dyn_inst.cc:623
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::GPUDynInst::isAtomicOr
bool isAtomicOr() const
Definition: gpu_dyn_inst.cc:570
gem5::GPUDynInst::numDstRegOperands
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:144
gem5::GPUDynInst::resolveFlatSegment
void resolveFlatSegment(const VectorMask &mask)
Definition: gpu_dyn_inst.cc:809
gem5::Request::GLC_BIT
@ GLC_BIT
Definition: request.hh:315
gem5::GPUDynInst::addr
std::vector< Addr > addr
Definition: gpu_dyn_inst.hh:170
gem5::GPUDynInst::x_data
uint8_t * x_data
Definition: gpu_dyn_inst.hh:180
gem5::GPUDynInst::latency
WaitClass latency
Definition: gpu_dyn_inst.hh:201
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::GPUDynInst::isMAC
bool isMAC() const
Definition: gpu_dyn_inst.cc:725
gem5::WaitClass
Definition: misc.hh:69
gem5::GPUDynInst::isSDWAInst
bool isSDWAInst() const
Definition: gpu_dyn_inst.cc:354
gem5::ComputeUnit::ComputeUnitStats::numCASOps
statistics::Scalar numCASOps
Definition: compute_unit.hh:1077
gem5::GPUDynInst::writesMode
bool writesMode() const
Definition: gpu_dyn_inst.cc:500
gem5::RegisterOperandInfo::operandIdx
int operandIdx() const
Definition: gpu_dyn_inst.hh:97
gem5::GPUDynInst::cu_id
int cu_id
Definition: gpu_dyn_inst.hh:191
gem5::GPUDynInst::isSystemCoherent
bool isSystemCoherent() const
Definition: gpu_dyn_inst.cc:695
gem5::GPUDynInst::getNumOperands
int getNumOperands() const
Definition: gpu_dyn_inst.cc:232
gem5::GPUDynInst::dstVecRegOperands
const std::vector< OperandInfo > & dstVecRegOperands() const
Definition: gpu_dyn_inst.cc:120
gem5::Gcn3ISA::RegSizeDWords
const int RegSizeDWords
Size of a single-precision register in DWords.
Definition: gpu_registers.hh:178
gem5::GPUDynInst
Definition: gpu_dyn_inst.hh:118
gem5::AtomicOpCAS::AtomicOpCAS
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition: gpu_dyn_inst.hh:64
gem5::GPUDynInst::numSrcRegOperands
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:138
gem5::GPUDynInst::staticInstruction
GPUStaticInst * staticInstruction()
Definition: gpu_dyn_inst.hh:213
gem5::GPUDynInst::allLanesZero
bool allLanesZero() const
Definition: gpu_dyn_inst.hh:409
gem5::RegisterOperandInfo::virtIndices
const std::vector< int > virtIndices
Definition: gpu_dyn_inst.hh:114
gem5::GPUDynInst::isLocalMem
bool isLocalMem() const
Definition: gpu_dyn_inst.cc:641
gem5::AtomicOpCAS
Definition: gpu_dyn_inst.hh:56
gem5::GPUDynInst::numDstScalarDWords
int numDstScalarDWords()
Definition: gpu_dyn_inst.cc:220
gem5::GPUDynInst::~GPUDynInst
~GPUDynInst()
Definition: gpu_dyn_inst.cc:98
gem5::GPUDynInst::isDPPInst
bool isDPPInst() const
Definition: gpu_dyn_inst.cc:360
gem5::GPUDynInst::isF32
bool isF32() const
Definition: gpu_dyn_inst.cc:707
gem5::GPUDynInst::maxSrcScalarRegOpSize
int maxSrcScalarRegOpSize
Definition: gpu_dyn_inst.hh:481
gem5::GPUDynInst::writesVCC
bool writesVCC() const
Definition: gpu_dyn_inst.cc:484
gem5::GPUDynInst::numDstScalarRegOperands
int numDstScalarRegOperands() const
Definition: gpu_dyn_inst.cc:194
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUDynInst::execUnitId
int execUnitId
Definition: gpu_dyn_inst.hh:197
gem5::GPUDynInst::isStore
bool isStore() const
Definition: gpu_dyn_inst.cc:426
gem5::GPUDynInst::ignoreExec
bool ignoreExec() const
Definition: gpu_dyn_inst.cc:518
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::GPUDynInst::printStatusVector
std::string printStatusVector() const
Definition: gpu_dyn_inst.hh:435
gem5::GPUDynInst::isFlat
bool isFlat() const
Definition: gpu_dyn_inst.cc:414
gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo()=delete
gem5::GPUDynInst::updateStats
void updateStats()
Definition: gpu_dyn_inst.cc:932
gem5::GPUExecContext
Definition: gpu_exec_context.hh:47
gem5::GPUDynInst::isSpillSeg
bool isSpillSeg() const
Definition: gpu_dyn_inst.cc:683
gem5::GPUDynInst::setRequestFlags
void setRequestFlags(RequestPtr req) const
Definition: gpu_dyn_inst.hh:333
gem5::GPUDynInst::getRoundTripTime
std::vector< Tick > getRoundTripTime() const
Definition: gpu_dyn_inst.hh:469
gem5::GPUDynInst::execute
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:108
gem5::GPUDynInst::simdId
int simdId
Definition: gpu_dyn_inst.hh:185
gem5::GPUDynInst::isScalar
bool isScalar() const
Definition: gpu_dyn_inst.cc:456
gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo(int op_idx, int num_dwords, const std::vector< int > &virt_indices, const std::vector< int > &phys_indices)
Definition: gpu_dyn_inst.hh:85
gem5::GPUDynInst::isSpecialOp
bool isSpecialOp() const
Definition: gpu_dyn_inst.cc:378
gem5::GPUDynInst::readsFlatScratch
bool readsFlatScratch() const
Definition: gpu_dyn_inst.cc:554
gem5::GPUDynInst::readsExecMask
bool readsExecMask() const
Definition: gpu_dyn_inst.cc:534
gem5::GPUDynInst::isAtomicAdd
bool isAtomicAdd() const
Definition: gpu_dyn_inst.cc:593
gem5::GPUDynInst::isGlobalSeg
bool isGlobalSeg() const
Definition: gpu_dyn_inst.cc:653
gem5::GPUDynInst::numDstVecRegOperands
int numDstVecRegOperands() const
Definition: gpu_dyn_inst.cc:156
gem5::GPUDynInst::resetEntireStatusVector
void resetEntireStatusVector()
Definition: gpu_dyn_inst.hh:362
gem5::GPUDynInst::completeAcc
void completeAcc(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:304
gpu_exec_context.hh
gem5::GPUDynInst::isMemRef
bool isMemRef() const
Definition: gpu_dyn_inst.cc:408
gem5::GPUDynInst::numDstVecDWords
int numDstVecDWords()
Definition: gpu_dyn_inst.cc:182
gem5::GPUDynInst::isEndOfKernel
bool isEndOfKernel() const
Definition: gpu_dyn_inst.cc:342
gem5::GPUDynInst::isBranch
bool isBranch() const
Definition: gpu_dyn_inst.cc:324
gem5::GPUDynInst::maxSrcVecRegOpSize
int maxSrcVecRegOpSize
Definition: gpu_dyn_inst.hh:480
gem5::GPUDynInst::a_data
uint8_t * a_data
Definition: gpu_dyn_inst.hh:178
gem5::GPUDynInst::srcLiteral
TheGpuISA::ScalarRegU32 srcLiteral() const
Definition: gpu_dyn_inst.cc:926
gem5::GPUDynInst::isF16
bool isF16() const
Definition: gpu_dyn_inst.cc:701
gem5::GPUDynInst::lineAddressTime
std::map< Addr, std::vector< Tick > > lineAddressTime
Definition: gpu_dyn_inst.hh:492
logging.hh
gem5::GPUDynInst::maxSrcScalarRegOperandSize
int maxSrcScalarRegOperandSize()
Definition: gpu_dyn_inst.cc:200
gem5::GPUDynInst::isArgLoad
bool isArgLoad() const
Definition: gpu_dyn_inst.cc:629
gem5::InstSeqNum
uint64_t InstSeqNum
Definition: inst_seq.hh:40
gem5::GPUDynInst::isReadOnlySeg
bool isReadOnlySeg() const
Definition: gpu_dyn_inst.cc:677
gem5::GPUDynInst::isAtomicMax
bool isAtomicMax() const
Definition: gpu_dyn_inst.cc:617
trace.hh
gem5::AtomicOpCAS::c
T c
Definition: gpu_dyn_inst.hh:59
gem5::GPUDynInst::isPrivateSeg
bool isPrivateSeg() const
Definition: gpu_dyn_inst.cc:671
gem5::GPUDynInst::wg_id
int wg_id
Definition: gpu_dyn_inst.hh:193
gem5::GPUDynInst::writesExecMask
bool writesExecMask() const
Definition: gpu_dyn_inst.cc:524
gem5::GPUDynInst::hasSourceSgpr
bool hasSourceSgpr() const
Definition: gpu_dyn_inst.cc:250
gem5::GPUDynInst::numScalarReqs
int numScalarReqs
Definition: gpu_dyn_inst.hh:462
gem5::AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition: amo.hh:242
gem5::GPUDynInst::isAtomicCAS
bool isAtomicCAS() const
Definition: gpu_dyn_inst.cc:582
gem5::ComputeUnit::ComputeUnitStats::numFailedCASOps
statistics::Scalar numFailedCASOps
Definition: compute_unit.hh:1078
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::GPUDynInst::numSrcVecDWords
int numSrcVecDWords()
Definition: gpu_dyn_inst.cc:176
gem5::GPUDynInst::writesSCC
bool writesSCC() const
Definition: gpu_dyn_inst.cc:468
gem5::GPUDynInst::resetStatusVector
void resetStatusVector(int lane)
Definition: gpu_dyn_inst.hh:372
gem5::GPUExecContext::cu
ComputeUnit * cu
Definition: gpu_exec_context.hh:64
gem5::GPUDynInst::isWaitcnt
bool isWaitcnt() const
Definition: gpu_dyn_inst.cc:384
gem5::GPUDynInst::isAtomicExch
bool isAtomicExch() const
Definition: gpu_dyn_inst.cc:587
gem5::GPUDynInst::isF64
bool isF64() const
Definition: gpu_dyn_inst.cc:713
operand_info.hh
gem5::RegisterOperandInfo::virtIdx
int virtIdx(int reg_num=0) const
We typically only need the first virtual register for the operand regardless of its size.
Definition: gpu_dyn_inst.hh:102
gem5::GPUDynInst::isAtomicSub
bool isAtomicSub() const
Definition: gpu_dyn_inst.cc:599
gem5::GPUDynInst::isALU
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
Definition: gpu_dyn_inst.cc:318
gem5::GPUDynInst::exec_mask
VectorMask exec_mask
Definition: gpu_dyn_inst.hh:182
gem5::GPUDynInst::isReturn
bool isReturn() const
Definition: gpu_dyn_inst.cc:366
gem5::GPUDynInst::pAddr
Addr pAddr
Definition: gpu_dyn_inst.hh:171
gem5::GPUDynInst::isGlobalMem
bool isGlobalMem() const
Definition: gpu_dyn_inst.cc:635
gem5::GPUDynInst::setStatusVector
void setStatusVector(int lane, int newVal)
Definition: gpu_dyn_inst.hh:379
gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: gpu_registers.hh:155
gem5::GPUDynInst::maxOperandSize
int maxOperandSize()
Definition: gpu_dyn_inst.cc:226
gem5::GPUDynInst::srcScalarRegOperands
const std::vector< OperandInfo > & srcScalarRegOperands() const
Definition: gpu_dyn_inst.cc:126
gem5::GPUDynInst::executedAs
enums::StorageClassType executedAs()
Definition: gpu_dyn_inst.cc:288

Generated on Tue Sep 7 2021 14:53:47 for gem5 by doxygen 1.8.17