gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
gpu_dyn_inst.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
38#include "gpu-compute/shader.hh"
40
41namespace gem5
42{
43
45 GPUStaticInst *static_inst, InstSeqNum instSeqNum)
46 : GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
47 (Addr)0), numScalarReqs(0), isSaveRestore(false),
48 _staticInst(static_inst), _seqNum(instSeqNum),
50{
51 _staticInst->initOperandInfo();
52 statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
53 tlbHitLevel.assign(computeUnit()->wfSize(), -1);
54 // vector instructions can have up to 4 source/destination operands
55 d_data = new uint8_t[computeUnit()->wfSize() * 4 * sizeof(double)];
56 a_data = new uint8_t[computeUnit()->wfSize() * 8];
57 x_data = new uint8_t[computeUnit()->wfSize() * 8];
58 // scalar loads can read up to 16 Dwords of data (see publicly
59 // available Vega ISA manual)
60 scalar_data = new uint8_t[16 * sizeof(uint32_t)];
61 for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
62 scalar_data[i] = 0;
63 }
64 for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
65 a_data[i] = 0;
66 x_data[i] = 0;
67 }
68 for (int i = 0; i < (computeUnit()->wfSize() * 4 * sizeof(double)); ++i) {
69 d_data[i] = 0;
70 }
71 time = 0;
72
73 cu_id = _cu->cu_id;
74 if (_wf) {
75 simdId = _wf->simdId;
76 wfDynId = _wf->wfDynId;
77 kern_id = _wf->kernId;
78 wg_id = _wf->wgId;
79 wfSlotId = _wf->wfSlotId;
80 } else {
81 simdId = -1;
82 wfDynId = -1;
83 kern_id = -1;
84 wg_id = -1;
85 wfSlotId = -1;
86 }
87
88
89 DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
91
92 _staticInst->initDynOperandInfo(wavefront(), computeUnit());
93
94}
95
97{
98 delete[] d_data;
99 delete[] a_data;
100 delete[] x_data;
101 delete[] scalar_data;
102 delete _staticInst;
103}
104
105void
107{
108 _staticInst->execute(gpuDynInst);
109}
110
113{
114 return _staticInst->srcVecRegOperands();
115}
116
119{
120 return _staticInst->dstVecRegOperands();
121}
122
125{
126 return _staticInst->srcScalarRegOperands();
127}
128
131{
132 return _staticInst->dstScalarRegOperands();
133}
134
135int
137{
138 return _staticInst->numSrcRegOperands();
139}
140
141int
143{
144 return _staticInst->numDstRegOperands();
145}
146
147int
149{
150 return _staticInst->numSrcVecOperands();
151}
152
153int
155{
156 return _staticInst->numDstVecOperands();
157}
158
159int
161{
162 if (maxSrcVecRegOpSize != -1)
163 return maxSrcVecRegOpSize;
164
166 for (const auto& srcVecOp : srcVecRegOperands())
167 if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
168 maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
169
170 return maxSrcVecRegOpSize;
171}
172
173int
175{
176 return _staticInst->numSrcVecDWords();
177}
178
179int
181{
182 return _staticInst->numDstVecDWords();
183}
184
185int
187{
188 return _staticInst->numSrcScalarOperands();
189}
190
191int
193{
194 return _staticInst->numDstScalarOperands();
195}
196
197int
199{
200 if (maxSrcScalarRegOpSize != -1)
202
204 for (const auto& srcScOp : srcScalarRegOperands())
205 if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
206 maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
207
209}
210
211int
213{
214 return _staticInst->numSrcScalarDWords();
215}
216
217int
219{
220 return _staticInst->numDstScalarDWords();
221}
222
223int
225{
226 return _staticInst->maxOperandSize();
227}
228
229int
231{
232 return _staticInst->getNumOperands();
233}
234
235bool
237{
238 return !srcVecRegOperands().empty();
239}
240
241bool
243{
244 return !dstVecRegOperands().empty();
245}
246
247bool
249{
250 return !srcScalarRegOperands().empty();
251}
252
253bool
255{
256 return !dstScalarRegOperands().empty();
257}
258
259bool
260GPUDynInst::isOpcode(const std::string& opcodeStr,
261 const std::string& extStr) const
262{
263 return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264 _staticInst->opcode().find(extStr) != std::string::npos;
265}
266
267bool
268GPUDynInst::isOpcode(const std::string& opcodeStr) const
269{
270 return _staticInst->opcode().find(opcodeStr) != std::string::npos;
271}
272
273const std::string&
275{
276 return _staticInst->disassemble();
277}
278
281{
282 return _seqNum;
283}
284
285Addr
287{
288 return wavefront()->pc();
289}
290
291void
293{
294 wavefront()->pc(_pc);
295}
296
297enums::StorageClassType
299{
300 return _staticInst->executed_as;
301}
302
303// Process a memory instruction and (if necessary) submit timing request
304void
306{
307 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
308 cu->cu_id, simdId, wfSlotId, exec_mask);
309
310 _staticInst->initiateAcc(gpuDynInst);
311}
312
313void
315{
316 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector="
317 "%#x complete\n",
318 cu->cu_id, simdId, wfSlotId, exec_mask);
319
320 _staticInst->completeAcc(gpuDynInst);
321}
322
327bool
329{
330 return _staticInst->isALU();
331}
332
333bool
335{
336 return _staticInst->isBranch();
337}
338
339bool
341{
342 return _staticInst->isCondBranch();
343}
344
345bool
347{
348 return _staticInst->isNop();
349}
350
351bool
353{
354 return _staticInst->isEndOfKernel();
355}
356
357bool
359{
360 return _staticInst->isKernelLaunch();
361}
362
363bool
365{
366 return _staticInst->isSDWAInst();
367}
368
369bool
371{
372 return _staticInst->isDPPInst();
373}
374
375bool
377{
378 return _staticInst->isReturn();
379}
380
381bool
383{
384 return _staticInst->isUnconditionalJump();
385}
386
387bool
389{
390 return _staticInst->isSpecialOp();
391}
392
393bool
395{
396 return _staticInst->isWaitcnt();
397}
398
399bool
401{
402 return _staticInst->isSleep();
403}
404
405bool
407{
408 return _staticInst->isBarrier();
409}
410
411bool
413{
414 return _staticInst->isMemSync();
415}
416
417bool
419{
420 return _staticInst->isMemRef();
421}
422
423bool
425{
426 return _staticInst->isFlat();
427}
428
429bool
431{
432 return _staticInst->isFlatGlobal();
433}
434
435bool
437{
438 return _staticInst->isFlatScratch();
439}
440
441bool
443{
444 return _staticInst->isLoad();
445}
446
447bool
449{
450 return _staticInst->isStore();
451}
452
453bool
455{
456 return _staticInst->isAtomic();
457}
458
459bool
461{
462 return _staticInst->isAtomicNoRet();
463}
464
465bool
467{
468 return _staticInst->isAtomicRet();
469}
470
471bool
473{
474 return !_staticInst->isScalar();
475}
476
477bool
479{
480 return _staticInst->isScalar();
481}
482
483bool
485{
486 return _staticInst->readsSCC();
487}
488
489bool
491{
492 return _staticInst->writesSCC();
493}
494
495bool
497{
498 for (const auto& srcOp : _staticInst->srcOperands())
499 if (srcOp.isVcc())
500 return true;
501
502 return _staticInst->readsVCC();
503}
504
505bool
507{
508 for (const auto& dstOp : _staticInst->dstOperands())
509 if (dstOp.isVcc())
510 return true;
511
512 return _staticInst->writesVCC();
513}
514
515bool
517{
518 return _staticInst->readsMode();
519}
520
521bool
523{
524 return _staticInst->writesMode();
525}
526
527bool
529{
530 return _staticInst->readsEXEC();
531}
532
533bool
535{
536 return _staticInst->writesEXEC();
537}
538
539bool
541{
542 return _staticInst->ignoreExec();
543}
544
545bool
547{
548 for (const auto& dstOp : _staticInst->dstOperands())
549 if (dstOp.isExec())
550 return true;
551
552 return _staticInst->writesEXEC();
553}
554
555bool
557{
558 for (const auto& srcOp : _staticInst->srcOperands())
559 if (srcOp.isExec())
560 return true;
561
562 return _staticInst->readsEXEC();
563}
564
565bool
567{
568 for (const auto& dstScalarOp : dstScalarRegOperands())
569 if (dstScalarOp.isFlatScratch())
570 return true;
571
572 return false;
573}
574
575bool
577{
578 for (const auto& srcScalarOp : srcScalarRegOperands())
579 if (srcScalarOp.isFlatScratch())
580 return true;
581
582 return false;
583}
584
585bool
587{
588 return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch();
589}
590
591bool
593{
594 return _staticInst->isAtomicAnd();
595}
596
597bool
599{
600 return _staticInst->isAtomicOr();
601}
602
603bool
605{
606 return _staticInst->isAtomicXor();
607}
608
609bool
611{
612 return _staticInst->isAtomicCAS();
613}
614
616{
617 return _staticInst->isAtomicExch();
618}
619
620bool
622{
623 return _staticInst->isAtomicAdd();
624}
625
626bool
628{
629 return _staticInst->isAtomicSub();
630}
631
632bool
634{
635 return _staticInst->isAtomicInc();
636}
637
638bool
640{
641 return _staticInst->isAtomicDec();
642}
643
644bool
646{
647 return _staticInst->isAtomicMax();
648}
649
650bool
652{
653 return _staticInst->isAtomicMin();
654}
655
656bool
658{
659 return _staticInst->isAtomicPkAddBF16();
660}
661
662bool
664{
665 return _staticInst->isArgLoad();
666}
667
668bool
670{
671 return _staticInst->isGlobalMem();
672}
673
674bool
676{
677 return _staticInst->isLocalMem();
678}
679
680bool
682{
683 return _staticInst->isArgSeg();
684}
685
686bool
688{
689 return _staticInst->isGlobalSeg();
690}
691
692bool
694{
695 return _staticInst->isGroupSeg();
696}
697
698bool
700{
701 return _staticInst->isKernArgSeg();
702}
703
704bool
706{
707 return _staticInst->isPrivateSeg();
708}
709
710bool
712{
713 return _staticInst->isReadOnlySeg();
714}
715
716bool
718{
719 return _staticInst->isSpillSeg();
720}
721
722bool
724{
725 return _staticInst->isGloballyCoherent();
726}
727
728bool
730{
731 return _staticInst->isSystemCoherent();
732}
733
734bool
736{
737 return _staticInst->isI8();
738}
739
740bool
742{
743 return _staticInst->isF16();
744}
745
746bool
748{
749 return _staticInst->isF32();
750}
751
752bool
754{
755 return _staticInst->isF64();
756}
757
758bool
760{
761 return _staticInst->isFMA();
762}
763
764bool
766{
767 return _staticInst->isMAC();
768}
769
770bool
772{
773 return _staticInst->isMAD();
774}
775
776bool
778{
779 return _staticInst->isMFMA();
780}
781
782void
784{
785 assert(mask.any());
786 // find the segment of the first active address, after
787 // that we check that all other active addresses also
788 // fall within the same APE
789 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
790 if (mask[lane]) {
791 if (computeUnit()->shader->isLdsApe(addr[lane])) {
792 // group segment
793 staticInstruction()->executed_as = enums::SC_GROUP;
794 break;
795 } else if (computeUnit()->shader->isScratchApe(addr[lane])) {
796 // private segment
797 staticInstruction()->executed_as = enums::SC_PRIVATE;
798 break;
799 } else if (computeUnit()->shader->isGpuVmApe(addr[lane])) {
800 // we won't support GPUVM
801 fatal("flat access is in GPUVM APE\n");
802 } else if (bits(addr[lane], 63, 47) != 0x1FFFF &&
803 bits(addr[lane], 63, 47)) {
804 // we are in the "hole", this is a memory violation
805 fatal("flat access at addr %#x has a memory violation\n",
806 addr[lane]);
807 } else {
808 // global memory segment
809 staticInstruction()->executed_as = enums::SC_GLOBAL;
810 break;
811 }
812 }
813 }
814
815 // we should have found the segment
816 assert(executedAs() != enums::SC_NONE);
817
818 // flat accesses should not straddle multiple APEs so we
819 // must check that all addresses fall within the same APE
820 if (executedAs() == enums::SC_GROUP) {
821 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
822 if (mask[lane]) {
823 // if the first valid addr we found above was LDS,
824 // all the rest should be
825 assert(computeUnit()->shader->isLdsApe(addr[lane]));
826 }
827 }
828 } else if (executedAs() == enums::SC_PRIVATE) {
829 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
830 if (mask[lane]) {
831 // if the first valid addr we found above was private,
832 // all the rest should be
833 assert(computeUnit()->shader->isScratchApe(addr[lane]));
834 }
835 }
836 } else {
837 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
838 if (mask[lane]) {
839 // if the first valid addr we found above was global,
840 // all the rest should be. because we don't have an
841 // explicit range of the global segment, we just make
842 // sure that the address fall in no other APE and that
843 // it is not a memory violation
844 assert(!computeUnit()->shader->isLdsApe(addr[lane]));
845 assert(!computeUnit()->shader->isScratchApe(addr[lane]));
846 assert(!computeUnit()->shader->isGpuVmApe(addr[lane]));
847 assert(!(bits(addr[lane], 63, 47) != 0x1FFFF
848 && bits(addr[lane], 63, 47)));
849 }
850 }
851 }
852}
853
854void
856{
858
859
860 // Now that we know the aperature, do the following:
861 // 1. Transform the flat address to its segmented equivalent.
862 // 2. Set the execUnitId based an the aperture check.
863 // 3. Decrement any extra resources that were reserved. Other
864 // resources are released as normal, below.
865 if (executedAs() == enums::SC_GLOBAL) {
866 // no transormation for global segment
868 if (isLoad()) {
870 } else if (isStore()) {
872 } else if (isAtomic() || isMemSync()) {
875 } else {
876 panic("Invalid memory operation!\n");
877 }
878 } else if (executedAs() == enums::SC_GROUP) {
879 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
880 if (mask[lane]) {
881 // flat address calculation goes here.
882 // addr[lane] = segmented address
883 addr[lane] = addr[lane] -
885 assert(addr[lane] <
886 wavefront()->computeUnit->getLds().getAddrRange().size());
887 }
888 }
891 wavefront()->vmemIssued.erase(seqNum());
892 if (isLoad()) {
894 } else if (isStore()) {
896 } else if (isAtomic() || isMemSync()) {
899 } else {
900 panic("Invalid memory operation!\n");
901 }
902 } else if (executedAs() == enums::SC_PRIVATE) {
931
933
934 if (wavefront()->gfxVersion == GfxVersion::gfx942 ||
935 wavefront()->gfxVersion == GfxVersion::gfx950) {
936 // Architected flat scratch base address is in a dedicated hardware
937 // register.
938 for (int lane = 0; lane < cu->wfSize(); ++lane) {
939 if (mask[lane]) {
940 // The scratch base is added for other gfx versions,
941 // otherwise this would simply add the register base.
942 addr[lane] = addr[lane] - cu->shader->getScratchBase()
944 }
945 }
946 } else {
947 // In absolute flat scratch the program needs to place scratch
948 // address in SGPRn-3,4.
949 uint32_t numSgprs = wavefront()->maxSgprs;
950 uint32_t physSgprIdx =
951 cu->registerManager->mapSgpr(wavefront(), numSgprs - 4);
952 uint32_t offset = cu->srf[simdId]->read(physSgprIdx);
953 physSgprIdx =
954 cu->registerManager->mapSgpr(wavefront(), numSgprs - 3);
955 uint32_t size = cu->srf[simdId]->read(physSgprIdx);
956
957
958 for (int lane = 0; lane < cu->wfSize(); ++lane) {
959 if (mask[lane]) {
960 addr[lane] = addr[lane] + lane * size + offset +
961 cu->shader->getHiddenPrivateBase() -
962 cu->shader->getScratchBase();
963 }
964 }
965 }
966
968
969 // For FLAT the local memory pipe counters are incremented, but they
970 // are not incremented for explicit scratch_* instructions. Only
971 // decrement these counters if we are explicitly a FLAT instruction.
972 if (isFlat()) {
974 wavefront()->lgkmIssued.erase(seqNum());
975 if (isLoad()) {
977 } else if (isStore()) {
979 } else if (isAtomic() || isMemSync()) {
982 } else {
983 panic("Invalid memory operation!\n");
984 }
985 }
986 } else {
987 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
988 if (mask[lane]) {
989 panic("flat addr %#llx maps to bad segment %d\n",
990 addr[lane], executedAs());
991 }
992 }
993 }
994}
995
996TheGpuISA::ScalarRegU32
998{
999 return _staticInst->srcLiteral();
1000}
1001
1002void
1004{
1005 if (_staticInst->isLocalMem()) {
1006 // access to LDS (shared) memory
1007 cu->stats.dynamicLMemInstrCnt++;
1008 } else if (_staticInst->isFlat()) {
1009 cu->stats.dynamicFlatMemInstrCnt++;
1010 } else {
1011 // access to global memory
1012
1013 // update PageDivergence histogram
1014 int number_pages_touched = cu->pagesTouched.size();
1015 assert(number_pages_touched);
1016 cu->stats.pageDivergenceDist.sample(number_pages_touched);
1017
1019
1020 for (auto it : cu->pagesTouched) {
1021 // see if this page has been touched before. if not, this also
1022 // inserts the page into the table.
1023 ret = cu->pageAccesses
1024 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
1025 std::make_pair(1, it.second)));
1026
1027 // if yes, then update the stats
1028 if (!ret.second) {
1029 ret.first->second.first++;
1030 ret.first->second.second += it.second;
1031 }
1032 }
1033
1034 cu->pagesTouched.clear();
1035
1036 // total number of memory instructions (dynamic)
1037 // Atomics are counted as a single memory instruction.
1038 // this is # memory instructions per wavefronts, not per workitem
1039 cu->stats.dynamicGMemInstrCnt++;
1040 }
1041}
1042
1043void
1045{
1046 // Only take the first measurement in the case of coalescing
1047 if (roundTripTime.size() > hopId)
1048 return;
1049
1050 roundTripTime.push_back(currentTime);
1051}
1052
1053void
1055{
1056 if (lineAddressTime.count(addr)) {
1057 if (lineAddressTime[addr].size() > hopId) {
1058 return;
1059 }
1060
1061 lineAddressTime[addr].push_back(currentTime);
1062 } else if (hopId == 0) {
1063 auto addressTimeVec = std::vector<Tick> { currentTime };
1064 lineAddressTime.insert(std::make_pair(addr, addressTimeVec));
1065 }
1066}
1067
1068} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
bool isFlatScratch() const
bool isKernelLaunch() const
bool isAtomicCAS() const
bool isSpecialOp() const
std::vector< Tick > roundTripTime
bool isI8() const
bool isLocalMem() const
bool hasDestinationSgpr() const
bool writesVCC() const
bool isAtomicDec() const
bool readsVCC() const
bool isNop() const
bool isF16() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
bool isAtomicRet() const
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isGlobalMem() const
bool isAtomicMin() const
bool isAtomicExch() const
bool isFlatGlobal() const
bool isBranch() const
bool isF32() const
bool isAtomicSub() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isAtomicXor() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool isMemRef() const
bool isAtomicAnd() const
bool isStore() const
bool isDPPInst() const
bool isSleep() const
VectorMask exec_mask
bool isMemSync() const
bool needsToken() const
bool writesSCC() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool ignoreExec() const
bool hasSourceSgpr() const
uint8_t * scalar_data
bool isReturn() const
bool readsSCC() const
bool isMAD() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
bool writesExec() const
bool isSDWAInst() const
bool isWaitcnt() const
bool writesMode() const
enums::StorageClassType executedAs()
bool isFlat() const
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isAtomicInc() const
bool isGloballyCoherent() const
bool readsExecMask() const
bool isGroupSeg() const
TheGpuISA::ScalarRegU32 srcLiteral() const
bool readsExec() const
int maxSrcScalarRegOperandSize()
bool isScalar() const
bool isVector() const
InstSeqNum seqNum() const
bool isFMA() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isMFMA() const
bool isAtomicAdd() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
const InstSeqNum _seqNum
bool isBarrier() const
bool isLoad() const
std::vector< Addr > addr
bool writesFlatScratch() const
bool readsMode() const
void execute(GPUDynInstPtr gpuDynInst)
bool isMAC() const
bool isKernArgSeg() const
bool isArgLoad() const
bool isGlobalSeg() const
bool isArgSeg() const
bool isAtomic() const
bool isAtomicOr() const
bool isAtomicPkAddBF16() const
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
bool isSpillSeg() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isF64() const
bool isAtomicMax() const
ComputeUnit * computeUnit()
GPUExecContext(ComputeUnit *_cu, Wavefront *_wf)
enums::StorageClassType executed_as
const ApertureRegister & ldsApe() const
Definition shader.hh:146
uint32_t maxSgprs
Definition wavefront.hh:136
Addr pc() const
const int simdId
Definition wavefront.hh:102
std::set< InstSeqNum > lgkmIssued
Definition wavefront.hh:292
ComputeUnit * computeUnit
Definition wavefront.hh:109
void decVMemInstsIssued()
const int wfSlotId
Definition wavefront.hh:99
void decLGKMInstsIssued()
Addr archFlatScratchAddr
Definition wavefront.hh:211
std::set< InstSeqNum > vmemIssued
Definition wavefront.hh:291
uint64_t wfDynId
Definition wavefront.hh:235
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
uint64_t InstSeqNum
Definition inst_seq.hh:40

Generated on Mon Oct 27 2025 04:13:02 for gem5 by doxygen 1.14.0