gem5  v22.1.0.0
gpu_dyn_inst.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include "debug/GPUInst.hh"
35 #include "debug/GPUMem.hh"
38 #include "gpu-compute/shader.hh"
39 #include "gpu-compute/wavefront.hh"
40 
41 namespace gem5
42 {
43 
45  GPUStaticInst *static_inst, InstSeqNum instSeqNum)
46  : GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
47  (Addr)0), numScalarReqs(0), isSaveRestore(false),
48  _staticInst(static_inst), _seqNum(instSeqNum),
49  maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
50 {
53  tlbHitLevel.assign(computeUnit()->wfSize(), -1);
54  // vector instructions can have up to 4 source/destination operands
55  d_data = new uint8_t[computeUnit()->wfSize() * 4 * sizeof(double)];
56  a_data = new uint8_t[computeUnit()->wfSize() * 8];
57  x_data = new uint8_t[computeUnit()->wfSize() * 8];
58  // scalar loads can read up to 16 Dwords of data (see publicly
59  // available GCN3 ISA manual)
60  scalar_data = new uint8_t[16 * sizeof(uint32_t)];
61  for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
62  scalar_data[i] = 0;
63  }
64  for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
65  a_data[i] = 0;
66  x_data[i] = 0;
67  }
68  for (int i = 0; i < (computeUnit()->wfSize() * 4 * sizeof(double)); ++i) {
69  d_data[i] = 0;
70  }
71  time = 0;
72 
73  cu_id = _cu->cu_id;
74  if (_wf) {
75  simdId = _wf->simdId;
76  wfDynId = _wf->wfDynId;
77  kern_id = _wf->kernId;
78  wg_id = _wf->wgId;
79  wfSlotId = _wf->wfSlotId;
80  } else {
81  simdId = -1;
82  wfDynId = -1;
83  kern_id = -1;
84  wg_id = -1;
85  wfSlotId = -1;
86  }
87 
88 
89  DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
91 
93 
94 }
95 
97 {
98  delete[] d_data;
99  delete[] a_data;
100  delete[] x_data;
101  delete[] scalar_data;
102  delete _staticInst;
103 }
104 
105 void
107 {
108  _staticInst->execute(gpuDynInst);
109 }
110 
113 {
114  return _staticInst->srcVecRegOperands();
115 }
116 
119 {
120  return _staticInst->dstVecRegOperands();
121 }
122 
125 {
127 }
128 
131 {
133 }
134 
135 int
137 {
138  return _staticInst->numSrcRegOperands();
139 }
140 
141 int
143 {
144  return _staticInst->numDstRegOperands();
145 }
146 
147 int
149 {
150  return _staticInst->numSrcVecOperands();
151 }
152 
153 int
155 {
156  return _staticInst->numDstVecOperands();
157 }
158 
159 int
161 {
162  if (maxSrcVecRegOpSize != -1)
163  return maxSrcVecRegOpSize;
164 
165  maxSrcVecRegOpSize = 0;
166  for (const auto& srcVecOp : srcVecRegOperands())
167  if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
168  maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
169 
170  return maxSrcVecRegOpSize;
171 }
172 
173 int
175 {
176  return _staticInst->numSrcVecDWords();
177 }
178 
179 int
181 {
182  return _staticInst->numDstVecDWords();
183 }
184 
185 int
187 {
189 }
190 
191 int
193 {
195 }
196 
197 int
199 {
200  if (maxSrcScalarRegOpSize != -1)
201  return maxSrcScalarRegOpSize;
202 
204  for (const auto& srcScOp : srcScalarRegOperands())
205  if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
206  maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
207 
208  return maxSrcScalarRegOpSize;
209 }
210 
211 int
213 {
215 }
216 
217 int
219 {
221 }
222 
223 int
225 {
226  return _staticInst->maxOperandSize();
227 }
228 
229 int
231 {
232  return _staticInst->getNumOperands();
233 }
234 
235 bool
237 {
238  return !srcVecRegOperands().empty();
239 }
240 
241 bool
243 {
244  return !dstVecRegOperands().empty();
245 }
246 
247 bool
249 {
250  return !srcScalarRegOperands().empty();
251 }
252 
253 bool
255 {
256  return !dstScalarRegOperands().empty();
257 }
258 
259 bool
260 GPUDynInst::isOpcode(const std::string& opcodeStr,
261  const std::string& extStr) const
262 {
263  return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264  _staticInst->opcode().find(extStr) != std::string::npos;
265 }
266 
267 bool
268 GPUDynInst::isOpcode(const std::string& opcodeStr) const
269 {
270  return _staticInst->opcode().find(opcodeStr) != std::string::npos;
271 }
272 
273 const std::string&
275 {
276  return _staticInst->disassemble();
277 }
278 
281 {
282  return _seqNum;
283 }
284 
285 Addr
287 {
288  return wavefront()->pc();
289 }
290 
291 void
293 {
294  wavefront()->pc(_pc);
295 }
296 
297 enums::StorageClassType
299 {
300  return _staticInst->executed_as;
301 }
302 
303 // Process a memory instruction and (if necessary) submit timing request
304 void
306 {
307  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
309 
310  _staticInst->initiateAcc(gpuDynInst);
311 }
312 
313 void
315 {
316  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector="
317  "%#x\n complete",
319 
320  _staticInst->completeAcc(gpuDynInst);
321 }
322 
327 bool
329 {
330  return _staticInst->isALU();
331 }
332 
333 bool
335 {
336  return _staticInst->isBranch();
337 }
338 
339 bool
341 {
342  return _staticInst->isCondBranch();
343 }
344 
345 bool
347 {
348  return _staticInst->isNop();
349 }
350 
351 bool
353 {
354  return _staticInst->isEndOfKernel();
355 }
356 
357 bool
359 {
360  return _staticInst->isKernelLaunch();
361 }
362 
363 bool
365 {
366  return _staticInst->isSDWAInst();
367 }
368 
369 bool
371 {
372  return _staticInst->isDPPInst();
373 }
374 
375 bool
377 {
378  return _staticInst->isReturn();
379 }
380 
381 bool
383 {
385 }
386 
387 bool
389 {
390  return _staticInst->isSpecialOp();
391 }
392 
393 bool
395 {
396  return _staticInst->isWaitcnt();
397 }
398 
399 bool
401 {
402  return _staticInst->isSleep();
403 }
404 
405 bool
407 {
408  return _staticInst->isBarrier();
409 }
410 
411 bool
413 {
414  return _staticInst->isMemSync();
415 }
416 
417 bool
419 {
420  return _staticInst->isMemRef();
421 }
422 
423 bool
425 {
426  return _staticInst->isFlat();
427 }
428 
429 bool
431 {
432  return _staticInst->isFlatGlobal();
433 }
434 
435 bool
437 {
438  return _staticInst->isLoad();
439 }
440 
441 bool
443 {
444  return _staticInst->isStore();
445 }
446 
447 bool
449 {
450  return _staticInst->isAtomic();
451 }
452 
453 bool
455 {
456  return _staticInst->isAtomicNoRet();
457 }
458 
459 bool
461 {
462  return _staticInst->isAtomicRet();
463 }
464 
465 bool
467 {
468  return !_staticInst->isScalar();
469 }
470 
471 bool
473 {
474  return _staticInst->isScalar();
475 }
476 
477 bool
479 {
480  return _staticInst->readsSCC();
481 }
482 
483 bool
485 {
486  return _staticInst->writesSCC();
487 }
488 
489 bool
491 {
492  for (const auto& srcOp : _staticInst->srcOperands())
493  if (srcOp.isVcc())
494  return true;
495 
496  return _staticInst->readsVCC();
497 }
498 
499 bool
501 {
502  for (const auto& dstOp : _staticInst->dstOperands())
503  if (dstOp.isVcc())
504  return true;
505 
506  return _staticInst->writesVCC();
507 }
508 
509 bool
511 {
512  return _staticInst->readsMode();
513 }
514 
515 bool
517 {
518  return _staticInst->writesMode();
519 }
520 
521 bool
523 {
524  return _staticInst->readsEXEC();
525 }
526 
527 bool
529 {
530  return _staticInst->writesEXEC();
531 }
532 
533 bool
535 {
536  return _staticInst->ignoreExec();
537 }
538 
539 bool
541 {
542  for (const auto& dstOp : _staticInst->dstOperands())
543  if (dstOp.isExec())
544  return true;
545 
546  return _staticInst->writesEXEC();
547 }
548 
549 bool
551 {
552  for (const auto& srcOp : _staticInst->srcOperands())
553  if (srcOp.isExec())
554  return true;
555 
556  return _staticInst->readsEXEC();
557 }
558 
559 bool
561 {
562  for (const auto& dstScalarOp : dstScalarRegOperands())
563  if (dstScalarOp.isFlatScratch())
564  return true;
565 
566  return false;
567 }
568 
569 bool
571 {
572  for (const auto& srcScalarOp : srcScalarRegOperands())
573  if (srcScalarOp.isFlatScratch())
574  return true;
575 
576  return false;
577 }
578 
579 bool
581 {
582  return _staticInst->isAtomicAnd();
583 }
584 
585 bool
587 {
588  return _staticInst->isAtomicOr();
589 }
590 
591 bool
593 {
594  return _staticInst->isAtomicXor();
595 }
596 
597 bool
599 {
600  return _staticInst->isAtomicCAS();
601 }
602 
604 {
605  return _staticInst->isAtomicExch();
606 }
607 
608 bool
610 {
611  return _staticInst->isAtomicAdd();
612 }
613 
614 bool
616 {
617  return _staticInst->isAtomicSub();
618 }
619 
620 bool
622 {
623  return _staticInst->isAtomicInc();
624 }
625 
626 bool
628 {
629  return _staticInst->isAtomicDec();
630 }
631 
632 bool
634 {
635  return _staticInst->isAtomicMax();
636 }
637 
638 bool
640 {
641  return _staticInst->isAtomicMin();
642 }
643 
644 bool
646 {
647  return _staticInst->isArgLoad();
648 }
649 
650 bool
652 {
653  return _staticInst->isGlobalMem();
654 }
655 
656 bool
658 {
659  return _staticInst->isLocalMem();
660 }
661 
662 bool
664 {
665  return _staticInst->isArgSeg();
666 }
667 
668 bool
670 {
671  return _staticInst->isGlobalSeg();
672 }
673 
674 bool
676 {
677  return _staticInst->isGroupSeg();
678 }
679 
680 bool
682 {
683  return _staticInst->isKernArgSeg();
684 }
685 
686 bool
688 {
689  return _staticInst->isPrivateSeg();
690 }
691 
692 bool
694 {
695  return _staticInst->isReadOnlySeg();
696 }
697 
698 bool
700 {
701  return _staticInst->isSpillSeg();
702 }
703 
704 bool
706 {
708 }
709 
710 bool
712 {
713  return _staticInst->isSystemCoherent();
714 }
715 
716 bool
718 {
719  return _staticInst->isF16();
720 }
721 
722 bool
724 {
725  return _staticInst->isF32();
726 }
727 
728 bool
730 {
731  return _staticInst->isF64();
732 }
733 
734 bool
736 {
737  return _staticInst->isFMA();
738 }
739 
740 bool
742 {
743  return _staticInst->isMAC();
744 }
745 
746 bool
748 {
749  return _staticInst->isMAD();
750 }
751 
752 void
754 {
755  assert(mask.any());
756  // find the segment of the first active address, after
757  // that we check that all other active addresses also
758  // fall within the same APE
759  for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
760  if (mask[lane]) {
761  if (computeUnit()->shader->isLdsApe(addr[lane])) {
762  // group segment
763  staticInstruction()->executed_as = enums::SC_GROUP;
764  break;
765  } else if (computeUnit()->shader->isScratchApe(addr[lane])) {
766  // private segment
767  staticInstruction()->executed_as = enums::SC_PRIVATE;
768  break;
769  } else if (computeUnit()->shader->isGpuVmApe(addr[lane])) {
770  // we won't support GPUVM
771  fatal("flat access is in GPUVM APE\n");
772  } else if (bits(addr[lane], 63, 47) != 0x1FFFF &&
773  bits(addr[lane], 63, 47)) {
774  // we are in the "hole", this is a memory violation
775  fatal("flat access at addr %#x has a memory violation\n",
776  addr[lane]);
777  } else {
778  // global memory segment
779  staticInstruction()->executed_as = enums::SC_GLOBAL;
780  break;
781  }
782  }
783  }
784 
785  // we should have found the segment
786  assert(executedAs() != enums::SC_NONE);
787 
788  // flat accesses should not straddle multiple APEs so we
789  // must check that all addresses fall within the same APE
790  if (executedAs() == enums::SC_GROUP) {
791  for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
792  if (mask[lane]) {
793  // if the first valid addr we found above was LDS,
794  // all the rest should be
795  assert(computeUnit()->shader->isLdsApe(addr[lane]));
796  }
797  }
798  } else if (executedAs() == enums::SC_PRIVATE) {
799  for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
800  if (mask[lane]) {
801  // if the first valid addr we found above was private,
802  // all the rest should be
803  assert(computeUnit()->shader->isScratchApe(addr[lane]));
804  }
805  }
806  } else {
807  for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
808  if (mask[lane]) {
809  // if the first valid addr we found above was global,
810  // all the rest should be. because we don't have an
811  // explicit range of the global segment, we just make
812  // sure that the address fall in no other APE and that
813  // it is not a memory violation
814  assert(!computeUnit()->shader->isLdsApe(addr[lane]));
815  assert(!computeUnit()->shader->isScratchApe(addr[lane]));
816  assert(!computeUnit()->shader->isGpuVmApe(addr[lane]));
817  assert(!(bits(addr[lane], 63, 47) != 0x1FFFF
818  && bits(addr[lane], 63, 47)));
819  }
820  }
821  }
822 }
823 
824 void
826 {
828 
829 
830  // Now that we know the aperature, do the following:
831  // 1. Transform the flat address to its segmented equivalent.
832  // 2. Set the execUnitId based an the aperture check.
833  // 3. Decrement any extra resources that were reserved. Other
834  // resources are released as normal, below.
835  if (executedAs() == enums::SC_GLOBAL) {
836  // no transormation for global segment
838  if (isLoad()) {
840  } else if (isStore()) {
842  } else if (isAtomic() || isMemSync()) {
845  } else {
846  panic("Invalid memory operation!\n");
847  }
848  } else if (executedAs() == enums::SC_GROUP) {
849  for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
850  if (mask[lane]) {
851  // flat address calculation goes here.
852  // addr[lane] = segmented address
853  addr[lane] = addr[lane] -
855  assert(addr[lane] <
857  }
858  }
861  if (isLoad()) {
863  } else if (isStore()) {
865  } else if (isAtomic() || isMemSync()) {
868  } else {
869  panic("Invalid memory operation!\n");
870  }
871  } else if (executedAs() == enums::SC_PRIVATE) {
901  uint32_t numSgprs = wavefront()->maxSgprs;
902  uint32_t physSgprIdx =
904  numSgprs - 3);
905  uint32_t offset =
906  wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
907  physSgprIdx =
909  numSgprs - 4);
910  uint32_t size =
911  wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
912  for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
913  if (mask[lane]) {
914  addr[lane] = addr[lane] + lane * size + offset +
917  }
918  }
921  if (isLoad()) {
923  } else if (isStore()) {
925  } else if (isAtomic() || isMemSync()) {
928  } else {
929  panic("Invalid memory operation!\n");
930  }
931  } else {
932  for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
933  if (mask[lane]) {
934  panic("flat addr %#llx maps to bad segment %d\n",
935  addr[lane], executedAs());
936  }
937  }
938  }
939 }
940 
943 {
944  return _staticInst->srcLiteral();
945 }
946 
947 void
949 {
950  if (_staticInst->isLocalMem()) {
951  // access to LDS (shared) memory
953  } else if (_staticInst->isFlat()) {
955  } else {
956  // access to global memory
957 
958  // update PageDivergence histogram
959  int number_pages_touched = cu->pagesTouched.size();
960  assert(number_pages_touched);
961  cu->stats.pageDivergenceDist.sample(number_pages_touched);
962 
964 
965  for (auto it : cu->pagesTouched) {
966  // see if this page has been touched before. if not, this also
967  // inserts the page into the table.
968  ret = cu->pageAccesses
969  .insert(ComputeUnit::pageDataStruct::value_type(it.first,
970  std::make_pair(1, it.second)));
971 
972  // if yes, then update the stats
973  if (!ret.second) {
974  ret.first->second.first++;
975  ret.first->second.second += it.second;
976  }
977  }
978 
979  cu->pagesTouched.clear();
980 
981  // total number of memory instructions (dynamic)
982  // Atomics are counted as a single memory instruction.
983  // this is # memory instructions per wavefronts, not per workitem
985  }
986 }
987 
988 void
989 GPUDynInst::profileRoundTripTime(Tick currentTime, int hopId)
990 {
991  // Only take the first measurement in the case of coalescing
992  if (roundTripTime.size() > hopId)
993  return;
994 
995  roundTripTime.push_back(currentTime);
996 }
997 
998 void
1000 {
1001  if (lineAddressTime.count(addr)) {
1002  if (lineAddressTime[addr].size() > hopId) {
1003  return;
1004  }
1005 
1006  lineAddressTime[addr].push_back(currentTime);
1007  } else if (hopId == 0) {
1008  auto addressTimeVec = std::vector<Tick> { currentTime };
1009  lineAddressTime.insert(std::make_pair(addr, addressTimeVec));
1010  }
1011 }
1012 
1013 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
int wfSize() const
pageDataStruct pageAccesses
std::map< Addr, int > pagesTouched
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
LdsState & getLds() const
gem5::ComputeUnit::ComputeUnitStats stats
bool isKernelLaunch() const
bool isAtomicCAS() const
bool isSpecialOp() const
std::vector< Tick > roundTripTime
bool isLocalMem() const
bool hasDestinationSgpr() const
bool writesVCC() const
bool isAtomicDec() const
bool readsVCC() const
bool isNop() const
bool isF16() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
bool isAtomicRet() const
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isGlobalMem() const
bool isAtomicMin() const
bool isAtomicExch() const
bool isFlatGlobal() const
bool isBranch() const
bool isF32() const
bool isAtomicSub() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:44
bool isAtomicXor() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool isMemRef() const
bool isAtomicAnd() const
bool isStore() const
bool isDPPInst() const
bool isSleep() const
VectorMask exec_mask
bool isMemSync() const
bool writesSCC() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool ignoreExec() const
bool hasSourceSgpr() const
uint8_t * scalar_data
GPUStaticInst * staticInstruction()
bool isReturn() const
bool readsSCC() const
bool isMAD() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
bool writesExec() const
bool isSDWAInst() const
bool isWaitcnt() const
bool writesMode() const
enums::StorageClassType executedAs()
bool isFlat() const
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isAtomicInc() const
bool isGloballyCoherent() const
bool readsExecMask() const
bool isGroupSeg() const
TheGpuISA::ScalarRegU32 srcLiteral() const
bool readsExec() const
int maxSrcScalarRegOperandSize()
bool isScalar() const
bool isVector() const
InstSeqNum seqNum() const
bool isFMA() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isAtomicAdd() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
const InstSeqNum _seqNum
bool isBarrier() const
bool isLoad() const
std::vector< Addr > addr
bool writesFlatScratch() const
bool readsMode() const
void execute(GPUDynInstPtr gpuDynInst)
bool isMAC() const
bool isKernArgSeg() const
bool isArgLoad() const
bool isGlobalSeg() const
bool isArgSeg() const
bool isAtomic() const
bool isAtomicOr() const
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
bool isSpillSeg() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isF64() const
bool isAtomicMax() const
ComputeUnit * computeUnit()
virtual int numDstRegOperands()=0
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isGlobalMem() const
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isAtomicDec() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
const std::string & opcode() const
const std::string & disassemble()
bool isSpecialOp() const
bool isReadOnlySeg() const
enums::StorageClassType executed_as
bool isPrivateSeg() const
bool isGlobalSeg() const
bool isAtomicSub() const
const std::vector< OperandInfo > & srcVecRegOperands() const
const std::vector< OperandInfo > & srcOperands() const
bool isAtomicMin() const
bool isKernArgSeg() const
bool isEndOfKernel() const
bool isCondBranch() const
bool isAtomicInc() const
bool isKernelLaunch() const
bool isAtomicMax() const
bool isAtomicCAS() const
bool isFlatGlobal() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & dstVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
bool isAtomicRet() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
virtual int getNumOperands()=0
bool isAtomicAdd() const
bool isAtomicExch() const
bool isAtomicNoRet() const
bool isUnconditionalJump() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
bool isAtomicXor() const
bool isAtomicAnd() const
const std::vector< OperandInfo > & dstOperands() const
AddrRange getAddrRange() const
Definition: lds_state.hh:479
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & ldsApe() const
Definition: shader.hh:134
bool isLdsApe(Addr addr) const
Definition: shader.hh:168
Addr getScratchBase()
Definition: shader.hh:185
Addr getHiddenPrivateBase()
Definition: shader.hh:191
uint32_t maxSgprs
Definition: wavefront.hh:133
Addr pc() const
Definition: wavefront.cc:1387
uint32_t wgId
Definition: wavefront.hh:160
const int simdId
Definition: wavefront.hh:99
ComputeUnit * computeUnit
Definition: wavefront.hh:106
void decVMemInstsIssued()
Definition: wavefront.cc:1369
const int wfSlotId
Definition: wavefront.hh:96
void decLGKMInstsIssued()
Definition: wavefront.cc:1381
uint64_t wfDynId
Definition: wavefront.hh:226
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
STL pair class.
Definition: stl.hh:58
STL vector class.
Definition: stl.hh:37
Addr size() const
Get the size of the address range.
Definition: addr_range.hh:326
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
constexpr uint64_t mask(unsigned nbits)
Generate a 64-bit mask of 'nbits' 1s, right justified.
Definition: bitfield.hh:63
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
constexpr unsigned NumVecElemPerVecReg
Definition: vec.hh:61
uint32_t ScalarRegU32
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
uint64_t InstSeqNum
Definition: inst_seq.hh:40
@ SC_NONE
Definition: sc_report.hh:50
statistics::Distribution pageDivergenceDist
statistics::Scalar dynamicFlatMemInstrCnt

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1