gem5 v23.0.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_dyn_inst.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
38#include "gpu-compute/shader.hh"
40
41namespace gem5
42{
43
45 GPUStaticInst *static_inst, InstSeqNum instSeqNum)
46 : GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
47 (Addr)0), numScalarReqs(0), isSaveRestore(false),
48 _staticInst(static_inst), _seqNum(instSeqNum),
49 maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
50{
52 statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
53 tlbHitLevel.assign(computeUnit()->wfSize(), -1);
54 // vector instructions can have up to 4 source/destination operands
55 d_data = new uint8_t[computeUnit()->wfSize() * 4 * sizeof(double)];
56 a_data = new uint8_t[computeUnit()->wfSize() * 8];
57 x_data = new uint8_t[computeUnit()->wfSize() * 8];
58 // scalar loads can read up to 16 Dwords of data (see publicly
59 // available GCN3 ISA manual)
60 scalar_data = new uint8_t[16 * sizeof(uint32_t)];
61 for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
62 scalar_data[i] = 0;
63 }
64 for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
65 a_data[i] = 0;
66 x_data[i] = 0;
67 }
68 for (int i = 0; i < (computeUnit()->wfSize() * 4 * sizeof(double)); ++i) {
69 d_data[i] = 0;
70 }
71 time = 0;
72
73 cu_id = _cu->cu_id;
74 if (_wf) {
75 simdId = _wf->simdId;
76 wfDynId = _wf->wfDynId;
77 kern_id = _wf->kernId;
78 wg_id = _wf->wgId;
79 wfSlotId = _wf->wfSlotId;
80 } else {
81 simdId = -1;
82 wfDynId = -1;
83 kern_id = -1;
84 wg_id = -1;
85 wfSlotId = -1;
86 }
87
88
89 DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
91
93
94}
95
97{
98 delete[] d_data;
99 delete[] a_data;
100 delete[] x_data;
101 delete[] scalar_data;
102 delete _staticInst;
103}
104
105void
107{
108 _staticInst->execute(gpuDynInst);
109}
110
113{
115}
116
119{
121}
122
125{
127}
128
131{
133}
134
135int
137{
139}
140
141int
143{
145}
146
147int
149{
151}
152
153int
155{
157}
158
159int
161{
162 if (maxSrcVecRegOpSize != -1)
163 return maxSrcVecRegOpSize;
164
166 for (const auto& srcVecOp : srcVecRegOperands())
167 if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
168 maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
169
170 return maxSrcVecRegOpSize;
171}
172
173int
175{
177}
178
179int
181{
183}
184
185int
187{
189}
190
191int
193{
195}
196
197int
199{
200 if (maxSrcScalarRegOpSize != -1)
202
204 for (const auto& srcScOp : srcScalarRegOperands())
205 if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
206 maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
207
209}
210
211int
213{
215}
216
217int
219{
221}
222
223int
225{
226 return _staticInst->maxOperandSize();
227}
228
229int
231{
232 return _staticInst->getNumOperands();
233}
234
235bool
237{
238 return !srcVecRegOperands().empty();
239}
240
241bool
243{
244 return !dstVecRegOperands().empty();
245}
246
247bool
249{
250 return !srcScalarRegOperands().empty();
251}
252
253bool
255{
256 return !dstScalarRegOperands().empty();
257}
258
259bool
260GPUDynInst::isOpcode(const std::string& opcodeStr,
261 const std::string& extStr) const
262{
263 return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264 _staticInst->opcode().find(extStr) != std::string::npos;
265}
266
267bool
268GPUDynInst::isOpcode(const std::string& opcodeStr) const
269{
270 return _staticInst->opcode().find(opcodeStr) != std::string::npos;
271}
272
273const std::string&
275{
276 return _staticInst->disassemble();
277}
278
281{
282 return _seqNum;
283}
284
285Addr
287{
288 return wavefront()->pc();
289}
290
291void
293{
294 wavefront()->pc(_pc);
295}
296
297enums::StorageClassType
299{
300 return _staticInst->executed_as;
301}
302
303// Process a memory instruction and (if necessary) submit timing request
304void
306{
307 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
309
310 _staticInst->initiateAcc(gpuDynInst);
311}
312
313void
315{
316 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector="
317 "%#x\n complete",
319
320 _staticInst->completeAcc(gpuDynInst);
321}
322
327bool
329{
330 return _staticInst->isALU();
331}
332
333bool
335{
336 return _staticInst->isBranch();
337}
338
339bool
341{
342 return _staticInst->isCondBranch();
343}
344
345bool
347{
348 return _staticInst->isNop();
349}
350
351bool
353{
354 return _staticInst->isEndOfKernel();
355}
356
357bool
359{
360 return _staticInst->isKernelLaunch();
361}
362
363bool
365{
366 return _staticInst->isSDWAInst();
367}
368
369bool
371{
372 return _staticInst->isDPPInst();
373}
374
375bool
377{
378 return _staticInst->isReturn();
379}
380
381bool
383{
385}
386
387bool
389{
390 return _staticInst->isSpecialOp();
391}
392
393bool
395{
396 return _staticInst->isWaitcnt();
397}
398
399bool
401{
402 return _staticInst->isSleep();
403}
404
405bool
407{
408 return _staticInst->isBarrier();
409}
410
411bool
413{
414 return _staticInst->isMemSync();
415}
416
417bool
419{
420 return _staticInst->isMemRef();
421}
422
423bool
425{
426 return _staticInst->isFlat();
427}
428
429bool
431{
432 return _staticInst->isFlatGlobal();
433}
434
435bool
437{
438 return _staticInst->isLoad();
439}
440
441bool
443{
444 return _staticInst->isStore();
445}
446
447bool
449{
450 return _staticInst->isAtomic();
451}
452
453bool
455{
456 return _staticInst->isAtomicNoRet();
457}
458
459bool
461{
462 return _staticInst->isAtomicRet();
463}
464
465bool
467{
468 return !_staticInst->isScalar();
469}
470
471bool
473{
474 return _staticInst->isScalar();
475}
476
477bool
479{
480 return _staticInst->readsSCC();
481}
482
483bool
485{
486 return _staticInst->writesSCC();
487}
488
489bool
491{
492 for (const auto& srcOp : _staticInst->srcOperands())
493 if (srcOp.isVcc())
494 return true;
495
496 return _staticInst->readsVCC();
497}
498
499bool
501{
502 for (const auto& dstOp : _staticInst->dstOperands())
503 if (dstOp.isVcc())
504 return true;
505
506 return _staticInst->writesVCC();
507}
508
509bool
511{
512 return _staticInst->readsMode();
513}
514
515bool
517{
518 return _staticInst->writesMode();
519}
520
521bool
523{
524 return _staticInst->readsEXEC();
525}
526
527bool
529{
530 return _staticInst->writesEXEC();
531}
532
533bool
535{
536 return _staticInst->ignoreExec();
537}
538
539bool
541{
542 for (const auto& dstOp : _staticInst->dstOperands())
543 if (dstOp.isExec())
544 return true;
545
546 return _staticInst->writesEXEC();
547}
548
549bool
551{
552 for (const auto& srcOp : _staticInst->srcOperands())
553 if (srcOp.isExec())
554 return true;
555
556 return _staticInst->readsEXEC();
557}
558
559bool
561{
562 for (const auto& dstScalarOp : dstScalarRegOperands())
563 if (dstScalarOp.isFlatScratch())
564 return true;
565
566 return false;
567}
568
569bool
571{
572 for (const auto& srcScalarOp : srcScalarRegOperands())
573 if (srcScalarOp.isFlatScratch())
574 return true;
575
576 return false;
577}
578
579bool
581{
582 return _staticInst->isAtomicAnd();
583}
584
585bool
587{
588 return _staticInst->isAtomicOr();
589}
590
591bool
593{
594 return _staticInst->isAtomicXor();
595}
596
597bool
599{
600 return _staticInst->isAtomicCAS();
601}
602
604{
605 return _staticInst->isAtomicExch();
606}
607
608bool
610{
611 return _staticInst->isAtomicAdd();
612}
613
614bool
616{
617 return _staticInst->isAtomicSub();
618}
619
620bool
622{
623 return _staticInst->isAtomicInc();
624}
625
626bool
628{
629 return _staticInst->isAtomicDec();
630}
631
632bool
634{
635 return _staticInst->isAtomicMax();
636}
637
638bool
640{
641 return _staticInst->isAtomicMin();
642}
643
644bool
646{
647 return _staticInst->isArgLoad();
648}
649
650bool
652{
653 return _staticInst->isGlobalMem();
654}
655
656bool
658{
659 return _staticInst->isLocalMem();
660}
661
662bool
664{
665 return _staticInst->isArgSeg();
666}
667
668bool
670{
671 return _staticInst->isGlobalSeg();
672}
673
674bool
676{
677 return _staticInst->isGroupSeg();
678}
679
680bool
682{
683 return _staticInst->isKernArgSeg();
684}
685
686bool
688{
689 return _staticInst->isPrivateSeg();
690}
691
692bool
694{
695 return _staticInst->isReadOnlySeg();
696}
697
698bool
700{
701 return _staticInst->isSpillSeg();
702}
703
704bool
706{
708}
709
710bool
712{
714}
715
716bool
718{
719 return _staticInst->isF16();
720}
721
722bool
724{
725 return _staticInst->isF32();
726}
727
728bool
730{
731 return _staticInst->isF64();
732}
733
734bool
736{
737 return _staticInst->isFMA();
738}
739
740bool
742{
743 return _staticInst->isMAC();
744}
745
746bool
748{
749 return _staticInst->isMAD();
750}
751
752void
754{
755 assert(mask.any());
756 // find the segment of the first active address, after
757 // that we check that all other active addresses also
758 // fall within the same APE
759 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
760 if (mask[lane]) {
761 if (computeUnit()->shader->isLdsApe(addr[lane])) {
762 // group segment
763 staticInstruction()->executed_as = enums::SC_GROUP;
764 break;
765 } else if (computeUnit()->shader->isScratchApe(addr[lane])) {
766 // private segment
767 staticInstruction()->executed_as = enums::SC_PRIVATE;
768 break;
769 } else if (computeUnit()->shader->isGpuVmApe(addr[lane])) {
770 // we won't support GPUVM
771 fatal("flat access is in GPUVM APE\n");
772 } else if (bits(addr[lane], 63, 47) != 0x1FFFF &&
773 bits(addr[lane], 63, 47)) {
774 // we are in the "hole", this is a memory violation
775 fatal("flat access at addr %#x has a memory violation\n",
776 addr[lane]);
777 } else {
778 // global memory segment
779 staticInstruction()->executed_as = enums::SC_GLOBAL;
780 break;
781 }
782 }
783 }
784
785 // we should have found the segment
786 assert(executedAs() != enums::SC_NONE);
787
788 // flat accesses should not straddle multiple APEs so we
789 // must check that all addresses fall within the same APE
790 if (executedAs() == enums::SC_GROUP) {
791 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
792 if (mask[lane]) {
793 // if the first valid addr we found above was LDS,
794 // all the rest should be
795 assert(computeUnit()->shader->isLdsApe(addr[lane]));
796 }
797 }
798 } else if (executedAs() == enums::SC_PRIVATE) {
799 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
800 if (mask[lane]) {
801 // if the first valid addr we found above was private,
802 // all the rest should be
803 assert(computeUnit()->shader->isScratchApe(addr[lane]));
804 }
805 }
806 } else {
807 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
808 if (mask[lane]) {
809 // if the first valid addr we found above was global,
810 // all the rest should be. because we don't have an
811 // explicit range of the global segment, we just make
812 // sure that the address fall in no other APE and that
813 // it is not a memory violation
814 assert(!computeUnit()->shader->isLdsApe(addr[lane]));
815 assert(!computeUnit()->shader->isScratchApe(addr[lane]));
816 assert(!computeUnit()->shader->isGpuVmApe(addr[lane]));
817 assert(!(bits(addr[lane], 63, 47) != 0x1FFFF
818 && bits(addr[lane], 63, 47)));
819 }
820 }
821 }
822}
823
824void
826{
828
829
830 // Now that we know the aperature, do the following:
831 // 1. Transform the flat address to its segmented equivalent.
832 // 2. Set the execUnitId based an the aperture check.
833 // 3. Decrement any extra resources that were reserved. Other
834 // resources are released as normal, below.
835 if (executedAs() == enums::SC_GLOBAL) {
836 // no transormation for global segment
838 if (isLoad()) {
840 } else if (isStore()) {
842 } else if (isAtomic() || isMemSync()) {
845 } else {
846 panic("Invalid memory operation!\n");
847 }
848 } else if (executedAs() == enums::SC_GROUP) {
849 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
850 if (mask[lane]) {
851 // flat address calculation goes here.
852 // addr[lane] = segmented address
853 addr[lane] = addr[lane] -
855 assert(addr[lane] <
857 }
858 }
861 if (isLoad()) {
863 } else if (isStore()) {
865 } else if (isAtomic() || isMemSync()) {
868 } else {
869 panic("Invalid memory operation!\n");
870 }
871 } else if (executedAs() == enums::SC_PRIVATE) {
901 uint32_t numSgprs = wavefront()->maxSgprs;
902 uint32_t physSgprIdx =
904 numSgprs - 3);
905 uint32_t offset =
906 wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
907 physSgprIdx =
909 numSgprs - 4);
910 uint32_t size =
911 wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
912 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
913 if (mask[lane]) {
914 addr[lane] = addr[lane] + lane * size + offset +
917 }
918 }
921 if (isLoad()) {
923 } else if (isStore()) {
925 } else if (isAtomic() || isMemSync()) {
928 } else {
929 panic("Invalid memory operation!\n");
930 }
931 } else {
932 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
933 if (mask[lane]) {
934 panic("flat addr %#llx maps to bad segment %d\n",
935 addr[lane], executedAs());
936 }
937 }
938 }
939}
940
941TheGpuISA::ScalarRegU32
943{
944 return _staticInst->srcLiteral();
945}
946
947void
949{
950 if (_staticInst->isLocalMem()) {
951 // access to LDS (shared) memory
953 } else if (_staticInst->isFlat()) {
955 } else {
956 // access to global memory
957
958 // update PageDivergence histogram
959 int number_pages_touched = cu->pagesTouched.size();
960 assert(number_pages_touched);
961 cu->stats.pageDivergenceDist.sample(number_pages_touched);
962
964
965 for (auto it : cu->pagesTouched) {
966 // see if this page has been touched before. if not, this also
967 // inserts the page into the table.
968 ret = cu->pageAccesses
969 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
970 std::make_pair(1, it.second)));
971
972 // if yes, then update the stats
973 if (!ret.second) {
974 ret.first->second.first++;
975 ret.first->second.second += it.second;
976 }
977 }
978
979 cu->pagesTouched.clear();
980
981 // total number of memory instructions (dynamic)
982 // Atomics are counted as a single memory instruction.
983 // this is # memory instructions per wavefronts, not per workitem
985 }
986}
987
988void
990{
991 // Only take the first measurement in the case of coalescing
992 if (roundTripTime.size() > hopId)
993 return;
994
995 roundTripTime.push_back(currentTime);
996}
997
998void
1000{
1001 if (lineAddressTime.count(addr)) {
1002 if (lineAddressTime[addr].size() > hopId) {
1003 return;
1004 }
1005
1006 lineAddressTime[addr].push_back(currentTime);
1007 } else if (hopId == 0) {
1008 auto addressTimeVec = std::vector<Tick> { currentTime };
1009 lineAddressTime.insert(std::make_pair(addr, addressTimeVec));
1010 }
1011}
1012
1013} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
pageDataStruct pageAccesses
LdsState & getLds() const
std::map< Addr, int > pagesTouched
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
gem5::ComputeUnit::ComputeUnitStats stats
bool isKernelLaunch() const
bool isAtomicCAS() const
bool isSpecialOp() const
std::vector< Tick > roundTripTime
bool isLocalMem() const
bool hasDestinationSgpr() const
bool writesVCC() const
bool isAtomicDec() const
bool readsVCC() const
bool isNop() const
bool isF16() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
bool isAtomicRet() const
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isGlobalMem() const
bool isAtomicMin() const
bool isAtomicExch() const
bool isFlatGlobal() const
bool isBranch() const
bool isF32() const
bool isAtomicSub() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isAtomicXor() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool isMemRef() const
bool isAtomicAnd() const
bool isStore() const
bool isDPPInst() const
bool isSleep() const
VectorMask exec_mask
bool isMemSync() const
bool writesSCC() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool ignoreExec() const
bool hasSourceSgpr() const
uint8_t * scalar_data
bool isReturn() const
bool readsSCC() const
bool isMAD() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
bool writesExec() const
bool isSDWAInst() const
bool isWaitcnt() const
bool writesMode() const
enums::StorageClassType executedAs()
bool isFlat() const
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isAtomicInc() const
bool isGloballyCoherent() const
bool readsExecMask() const
bool isGroupSeg() const
TheGpuISA::ScalarRegU32 srcLiteral() const
bool readsExec() const
int maxSrcScalarRegOperandSize()
bool isScalar() const
bool isVector() const
InstSeqNum seqNum() const
bool isFMA() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isAtomicAdd() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
const InstSeqNum _seqNum
bool isBarrier() const
bool isLoad() const
std::vector< Addr > addr
bool writesFlatScratch() const
bool readsMode() const
void execute(GPUDynInstPtr gpuDynInst)
bool isMAC() const
bool isKernArgSeg() const
bool isArgLoad() const
bool isGlobalSeg() const
bool isArgSeg() const
bool isAtomic() const
bool isAtomicOr() const
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
bool isSpillSeg() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isF64() const
bool isAtomicMax() const
ComputeUnit * computeUnit()
virtual int numDstRegOperands()=0
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
const std::string & disassemble()
const std::vector< OperandInfo > & dstVecRegOperands() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
enums::StorageClassType executed_as
const std::string & opcode() const
const std::vector< OperandInfo > & srcOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
const std::vector< OperandInfo > & dstOperands() const
bool isKernelLaunch() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & srcVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
virtual int getNumOperands()=0
bool isUnconditionalJump() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
AddrRange getAddrRange() const
Definition lds_state.hh:479
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & ldsApe() const
Definition shader.hh:134
bool isLdsApe(Addr addr) const
Definition shader.hh:168
Addr getScratchBase()
Definition shader.hh:185
Addr getHiddenPrivateBase()
Definition shader.hh:191
uint32_t maxSgprs
Definition wavefront.hh:133
Addr pc() const
const int simdId
Definition wavefront.hh:99
ComputeUnit * computeUnit
Definition wavefront.hh:106
void decVMemInstsIssued()
const int wfSlotId
Definition wavefront.hh:96
void decLGKMInstsIssued()
uint64_t wfDynId
Definition wavefront.hh:226
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
Addr size() const
Get the size of the address range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
uint64_t InstSeqNum
Definition inst_seq.hh:40
statistics::Distribution pageDivergenceDist

Generated on Mon Jul 10 2023 15:32:03 for gem5 by doxygen 1.9.7