gem5 v24.0.0.0
Loading...
Searching...
No Matches
gpu_dyn_inst.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUInst.hh"
35#include "debug/GPUMem.hh"
38#include "gpu-compute/shader.hh"
40
41namespace gem5
42{
43
45 GPUStaticInst *static_inst, InstSeqNum instSeqNum)
46 : GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
47 (Addr)0), numScalarReqs(0), isSaveRestore(false),
48 _staticInst(static_inst), _seqNum(instSeqNum),
49 maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
50{
52 statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
53 tlbHitLevel.assign(computeUnit()->wfSize(), -1);
54 // vector instructions can have up to 4 source/destination operands
55 d_data = new uint8_t[computeUnit()->wfSize() * 4 * sizeof(double)];
56 a_data = new uint8_t[computeUnit()->wfSize() * 8];
57 x_data = new uint8_t[computeUnit()->wfSize() * 8];
58 // scalar loads can read up to 16 Dwords of data (see publicly
59 // available Vega ISA manual)
60 scalar_data = new uint8_t[16 * sizeof(uint32_t)];
61 for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
62 scalar_data[i] = 0;
63 }
64 for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
65 a_data[i] = 0;
66 x_data[i] = 0;
67 }
68 for (int i = 0; i < (computeUnit()->wfSize() * 4 * sizeof(double)); ++i) {
69 d_data[i] = 0;
70 }
71 time = 0;
72
73 cu_id = _cu->cu_id;
74 if (_wf) {
75 simdId = _wf->simdId;
76 wfDynId = _wf->wfDynId;
77 kern_id = _wf->kernId;
78 wg_id = _wf->wgId;
79 wfSlotId = _wf->wfSlotId;
80 } else {
81 simdId = -1;
82 wfDynId = -1;
83 kern_id = -1;
84 wg_id = -1;
85 wfSlotId = -1;
86 }
87
88
89 DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
91
93
94}
95
97{
98 delete[] d_data;
99 delete[] a_data;
100 delete[] x_data;
101 delete[] scalar_data;
102 delete _staticInst;
103}
104
105void
107{
108 _staticInst->execute(gpuDynInst);
109}
110
116
122
128
134
135int
140
141int
146
147int
152
153int
158
159int
161{
162 if (maxSrcVecRegOpSize != -1)
163 return maxSrcVecRegOpSize;
164
166 for (const auto& srcVecOp : srcVecRegOperands())
167 if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
168 maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
169
170 return maxSrcVecRegOpSize;
171}
172
173int
178
179int
184
185int
190
191int
196
197int
199{
200 if (maxSrcScalarRegOpSize != -1)
202
204 for (const auto& srcScOp : srcScalarRegOperands())
205 if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
206 maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
207
209}
210
211int
216
217int
222
223int
228
229int
234
235bool
237{
238 return !srcVecRegOperands().empty();
239}
240
241bool
243{
244 return !dstVecRegOperands().empty();
245}
246
247bool
249{
250 return !srcScalarRegOperands().empty();
251}
252
253bool
255{
256 return !dstScalarRegOperands().empty();
257}
258
259bool
260GPUDynInst::isOpcode(const std::string& opcodeStr,
261 const std::string& extStr) const
262{
263 return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
264 _staticInst->opcode().find(extStr) != std::string::npos;
265}
266
267bool
268GPUDynInst::isOpcode(const std::string& opcodeStr) const
269{
270 return _staticInst->opcode().find(opcodeStr) != std::string::npos;
271}
272
273const std::string&
275{
276 return _staticInst->disassemble();
277}
278
281{
282 return _seqNum;
283}
284
285Addr
287{
288 return wavefront()->pc();
289}
290
291void
293{
294 wavefront()->pc(_pc);
295}
296
297enums::StorageClassType
302
303// Process a memory instruction and (if necessary) submit timing request
304void
306{
307 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
309
310 _staticInst->initiateAcc(gpuDynInst);
311}
312
313void
315{
316 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector="
317 "%#x\n complete",
319
320 _staticInst->completeAcc(gpuDynInst);
321}
322
327bool
329{
330 return _staticInst->isALU();
331}
332
333bool
335{
336 return _staticInst->isBranch();
337}
338
339bool
341{
342 return _staticInst->isCondBranch();
343}
344
345bool
347{
348 return _staticInst->isNop();
349}
350
351bool
353{
354 return _staticInst->isEndOfKernel();
355}
356
357bool
362
363bool
365{
366 return _staticInst->isSDWAInst();
367}
368
369bool
371{
372 return _staticInst->isDPPInst();
373}
374
375bool
377{
378 return _staticInst->isReturn();
379}
380
381bool
386
387bool
389{
390 return _staticInst->isSpecialOp();
391}
392
393bool
395{
396 return _staticInst->isWaitcnt();
397}
398
399bool
401{
402 return _staticInst->isSleep();
403}
404
405bool
407{
408 return _staticInst->isBarrier();
409}
410
411bool
413{
414 return _staticInst->isMemSync();
415}
416
417bool
419{
420 return _staticInst->isMemRef();
421}
422
423bool
425{
426 return _staticInst->isFlat();
427}
428
429bool
431{
432 return _staticInst->isFlatGlobal();
433}
434
435bool
437{
438 return _staticInst->isFlatScratch();
439}
440
441bool
443{
444 return _staticInst->isLoad();
445}
446
447bool
449{
450 return _staticInst->isStore();
451}
452
453bool
455{
456 return _staticInst->isAtomic();
457}
458
459bool
461{
462 return _staticInst->isAtomicNoRet();
463}
464
465bool
467{
468 return _staticInst->isAtomicRet();
469}
470
471bool
473{
474 return !_staticInst->isScalar();
475}
476
477bool
479{
480 return _staticInst->isScalar();
481}
482
483bool
485{
486 return _staticInst->readsSCC();
487}
488
489bool
491{
492 return _staticInst->writesSCC();
493}
494
495bool
497{
498 for (const auto& srcOp : _staticInst->srcOperands())
499 if (srcOp.isVcc())
500 return true;
501
502 return _staticInst->readsVCC();
503}
504
505bool
507{
508 for (const auto& dstOp : _staticInst->dstOperands())
509 if (dstOp.isVcc())
510 return true;
511
512 return _staticInst->writesVCC();
513}
514
515bool
517{
518 return _staticInst->readsMode();
519}
520
521bool
523{
524 return _staticInst->writesMode();
525}
526
527bool
529{
530 return _staticInst->readsEXEC();
531}
532
533bool
535{
536 return _staticInst->writesEXEC();
537}
538
539bool
541{
542 return _staticInst->ignoreExec();
543}
544
545bool
547{
548 for (const auto& dstOp : _staticInst->dstOperands())
549 if (dstOp.isExec())
550 return true;
551
552 return _staticInst->writesEXEC();
553}
554
555bool
557{
558 for (const auto& srcOp : _staticInst->srcOperands())
559 if (srcOp.isExec())
560 return true;
561
562 return _staticInst->readsEXEC();
563}
564
565bool
567{
568 for (const auto& dstScalarOp : dstScalarRegOperands())
569 if (dstScalarOp.isFlatScratch())
570 return true;
571
572 return false;
573}
574
575bool
577{
578 for (const auto& srcScalarOp : srcScalarRegOperands())
579 if (srcScalarOp.isFlatScratch())
580 return true;
581
582 return false;
583}
584
585bool
587{
588 return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch();
589}
590
591bool
593{
594 return _staticInst->isAtomicAnd();
595}
596
597bool
599{
600 return _staticInst->isAtomicOr();
601}
602
603bool
605{
606 return _staticInst->isAtomicXor();
607}
608
609bool
611{
612 return _staticInst->isAtomicCAS();
613}
614
616{
617 return _staticInst->isAtomicExch();
618}
619
620bool
622{
623 return _staticInst->isAtomicAdd();
624}
625
626bool
628{
629 return _staticInst->isAtomicSub();
630}
631
632bool
634{
635 return _staticInst->isAtomicInc();
636}
637
638bool
640{
641 return _staticInst->isAtomicDec();
642}
643
644bool
646{
647 return _staticInst->isAtomicMax();
648}
649
650bool
652{
653 return _staticInst->isAtomicMin();
654}
655
656bool
658{
659 return _staticInst->isArgLoad();
660}
661
662bool
664{
665 return _staticInst->isGlobalMem();
666}
667
668bool
670{
671 return _staticInst->isLocalMem();
672}
673
674bool
676{
677 return _staticInst->isArgSeg();
678}
679
680bool
682{
683 return _staticInst->isGlobalSeg();
684}
685
686bool
688{
689 return _staticInst->isGroupSeg();
690}
691
692bool
694{
695 return _staticInst->isKernArgSeg();
696}
697
698bool
700{
701 return _staticInst->isPrivateSeg();
702}
703
704bool
706{
707 return _staticInst->isReadOnlySeg();
708}
709
710bool
712{
713 return _staticInst->isSpillSeg();
714}
715
716bool
721
722bool
727
728bool
730{
731 return _staticInst->isI8();
732}
733
734bool
736{
737 return _staticInst->isF16();
738}
739
740bool
742{
743 return _staticInst->isF32();
744}
745
746bool
748{
749 return _staticInst->isF64();
750}
751
752bool
754{
755 return _staticInst->isFMA();
756}
757
758bool
760{
761 return _staticInst->isMAC();
762}
763
764bool
766{
767 return _staticInst->isMAD();
768}
769
770bool
772{
773 return _staticInst->isMFMA();
774}
775
776void
778{
779 assert(mask.any());
780 // find the segment of the first active address, after
781 // that we check that all other active addresses also
782 // fall within the same APE
783 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
784 if (mask[lane]) {
785 if (computeUnit()->shader->isLdsApe(addr[lane])) {
786 // group segment
787 staticInstruction()->executed_as = enums::SC_GROUP;
788 break;
789 } else if (computeUnit()->shader->isScratchApe(addr[lane])) {
790 // private segment
791 staticInstruction()->executed_as = enums::SC_PRIVATE;
792 break;
793 } else if (computeUnit()->shader->isGpuVmApe(addr[lane])) {
794 // we won't support GPUVM
795 fatal("flat access is in GPUVM APE\n");
796 } else if (bits(addr[lane], 63, 47) != 0x1FFFF &&
797 bits(addr[lane], 63, 47)) {
798 // we are in the "hole", this is a memory violation
799 fatal("flat access at addr %#x has a memory violation\n",
800 addr[lane]);
801 } else {
802 // global memory segment
803 staticInstruction()->executed_as = enums::SC_GLOBAL;
804 break;
805 }
806 }
807 }
808
809 // we should have found the segment
810 assert(executedAs() != enums::SC_NONE);
811
812 // flat accesses should not straddle multiple APEs so we
813 // must check that all addresses fall within the same APE
814 if (executedAs() == enums::SC_GROUP) {
815 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
816 if (mask[lane]) {
817 // if the first valid addr we found above was LDS,
818 // all the rest should be
819 assert(computeUnit()->shader->isLdsApe(addr[lane]));
820 }
821 }
822 } else if (executedAs() == enums::SC_PRIVATE) {
823 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
824 if (mask[lane]) {
825 // if the first valid addr we found above was private,
826 // all the rest should be
827 assert(computeUnit()->shader->isScratchApe(addr[lane]));
828 }
829 }
830 } else {
831 for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
832 if (mask[lane]) {
833 // if the first valid addr we found above was global,
834 // all the rest should be. because we don't have an
835 // explicit range of the global segment, we just make
836 // sure that the address fall in no other APE and that
837 // it is not a memory violation
838 assert(!computeUnit()->shader->isLdsApe(addr[lane]));
839 assert(!computeUnit()->shader->isScratchApe(addr[lane]));
840 assert(!computeUnit()->shader->isGpuVmApe(addr[lane]));
841 assert(!(bits(addr[lane], 63, 47) != 0x1FFFF
842 && bits(addr[lane], 63, 47)));
843 }
844 }
845 }
846}
847
848void
850{
852
853
854 // Now that we know the aperature, do the following:
855 // 1. Transform the flat address to its segmented equivalent.
856 // 2. Set the execUnitId based an the aperture check.
857 // 3. Decrement any extra resources that were reserved. Other
858 // resources are released as normal, below.
859 if (executedAs() == enums::SC_GLOBAL) {
860 // no transormation for global segment
862 if (isLoad()) {
864 } else if (isStore()) {
866 } else if (isAtomic() || isMemSync()) {
869 } else {
870 panic("Invalid memory operation!\n");
871 }
872 } else if (executedAs() == enums::SC_GROUP) {
873 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
874 if (mask[lane]) {
875 // flat address calculation goes here.
876 // addr[lane] = segmented address
877 addr[lane] = addr[lane] -
879 assert(addr[lane] <
881 }
882 }
885 if (isLoad()) {
887 } else if (isStore()) {
889 } else if (isAtomic() || isMemSync()) {
892 } else {
893 panic("Invalid memory operation!\n");
894 }
895 } else if (executedAs() == enums::SC_PRIVATE) {
926
927 if (wavefront()->gfxVersion == GfxVersion::gfx942) {
928 // Architected flat scratch base address is in a dedicated hardware
929 // register.
930 for (int lane = 0; lane < cu->wfSize(); ++lane) {
931 if (mask[lane]) {
932 // The scratch base is added for other gfx versions,
933 // otherwise this would simply add the register base.
934 addr[lane] = addr[lane] - cu->shader->getScratchBase()
936 }
937 }
938 } else {
939 // In absolute flat scratch the program needs to place scratch
940 // address in SGPRn-3,4.
941 uint32_t numSgprs = wavefront()->maxSgprs;
942 uint32_t physSgprIdx =
943 cu->registerManager->mapSgpr(wavefront(), numSgprs - 4);
944 uint32_t offset = cu->srf[simdId]->read(physSgprIdx);
945 physSgprIdx =
946 cu->registerManager->mapSgpr(wavefront(), numSgprs - 3);
947 uint32_t size = cu->srf[simdId]->read(physSgprIdx);
948
949
950 for (int lane = 0; lane < cu->wfSize(); ++lane) {
951 if (mask[lane]) {
952 addr[lane] = addr[lane] + lane * size + offset +
955 }
956 }
957 }
958
960
961 // For FLAT the local memory pipe counters are incremented, but they
962 // are not incremented for explicit scratch_* instructions. Only
963 // decrement these counters if we are explicitly a FLAT instruction.
964 if (isFlat()) {
966 if (isLoad()) {
968 } else if (isStore()) {
970 } else if (isAtomic() || isMemSync()) {
973 } else {
974 panic("Invalid memory operation!\n");
975 }
976 }
977 } else {
978 for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
979 if (mask[lane]) {
980 panic("flat addr %#llx maps to bad segment %d\n",
981 addr[lane], executedAs());
982 }
983 }
984 }
985}
986
987TheGpuISA::ScalarRegU32
989{
990 return _staticInst->srcLiteral();
991}
992
993void
995{
996 if (_staticInst->isLocalMem()) {
997 // access to LDS (shared) memory
999 } else if (_staticInst->isFlat()) {
1001 } else {
1002 // access to global memory
1003
1004 // update PageDivergence histogram
1005 int number_pages_touched = cu->pagesTouched.size();
1006 assert(number_pages_touched);
1007 cu->stats.pageDivergenceDist.sample(number_pages_touched);
1008
1010
1011 for (auto it : cu->pagesTouched) {
1012 // see if this page has been touched before. if not, this also
1013 // inserts the page into the table.
1014 ret = cu->pageAccesses
1015 .insert(ComputeUnit::pageDataStruct::value_type(it.first,
1016 std::make_pair(1, it.second)));
1017
1018 // if yes, then update the stats
1019 if (!ret.second) {
1020 ret.first->second.first++;
1021 ret.first->second.second += it.second;
1022 }
1023 }
1024
1025 cu->pagesTouched.clear();
1026
1027 // total number of memory instructions (dynamic)
1028 // Atomics are counted as a single memory instruction.
1029 // this is # memory instructions per wavefronts, not per workitem
1031 }
1032}
1033
1034void
1036{
1037 // Only take the first measurement in the case of coalescing
1038 if (roundTripTime.size() > hopId)
1039 return;
1040
1041 roundTripTime.push_back(currentTime);
1042}
1043
1044void
1046{
1047 if (lineAddressTime.count(addr)) {
1048 if (lineAddressTime[addr].size() > hopId) {
1049 return;
1050 }
1051
1052 lineAddressTime[addr].push_back(currentTime);
1053 } else if (hopId == 0) {
1054 auto addressTimeVec = std::vector<Tick> { currentTime };
1055 lineAddressTime.insert(std::make_pair(addr, addressTimeVec));
1056 }
1057}
1058
1059} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
pageDataStruct pageAccesses
LdsState & getLds() const
std::map< Addr, int > pagesTouched
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
gem5::ComputeUnit::ComputeUnitStats stats
bool isFlatScratch() const
bool isKernelLaunch() const
bool isAtomicCAS() const
bool isSpecialOp() const
std::vector< Tick > roundTripTime
bool isI8() const
bool isLocalMem() const
bool hasDestinationSgpr() const
bool writesVCC() const
bool isAtomicDec() const
bool readsVCC() const
bool isNop() const
bool isF16() const
int numDstScalarRegOperands() const
std::map< Addr, std::vector< Tick > > lineAddressTime
void doApertureCheck(const VectorMask &mask)
bool isAtomicRet() const
void resolveFlatSegment(const VectorMask &mask)
std::vector< int > tlbHitLevel
bool isGlobalMem() const
bool isAtomicMin() const
bool isAtomicExch() const
bool isFlatGlobal() const
bool isBranch() const
bool isF32() const
bool isAtomicSub() const
GPUStaticInst * _staticInst
bool hasDestinationVgpr() const
std::vector< int > statusVector
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
bool isUnconditionalJump() const
GPUStaticInst * staticInstruction()
int numSrcScalarRegOperands() const
bool isOpcode(const std::string &opcodeStr) const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
bool isAtomicXor() const
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isReadOnlySeg() const
bool isSystemCoherent() const
bool isMemRef() const
bool isAtomicAnd() const
bool isStore() const
bool isDPPInst() const
bool isSleep() const
VectorMask exec_mask
bool isMemSync() const
bool needsToken() const
bool writesSCC() const
bool hasSourceVgpr() const
int numDstVecRegOperands() const
bool ignoreExec() const
bool hasSourceSgpr() const
uint8_t * scalar_data
bool isReturn() const
bool readsSCC() const
bool isMAD() const
bool readsFlatScratch() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
int getNumOperands() const
bool writesExec() const
bool isSDWAInst() const
bool isWaitcnt() const
bool writesMode() const
enums::StorageClassType executedAs()
bool isFlat() const
const std::vector< OperandInfo > & dstVecRegOperands() const
void profileRoundTripTime(Tick currentTime, int hopId)
bool isCondBranch() const
bool writesExecMask() const
bool isPrivateSeg() const
bool isEndOfKernel() const
const std::vector< OperandInfo > & srcVecRegOperands() const
bool isAtomicInc() const
bool isGloballyCoherent() const
bool readsExecMask() const
bool isGroupSeg() const
TheGpuISA::ScalarRegU32 srcLiteral() const
bool readsExec() const
int maxSrcScalarRegOperandSize()
bool isScalar() const
bool isVector() const
InstSeqNum seqNum() const
bool isFMA() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
bool isMFMA() const
bool isAtomicAdd() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
int numSrcVecRegOperands() const
const InstSeqNum _seqNum
bool isBarrier() const
bool isLoad() const
std::vector< Addr > addr
bool writesFlatScratch() const
bool readsMode() const
void execute(GPUDynInstPtr gpuDynInst)
bool isMAC() const
bool isKernArgSeg() const
bool isArgLoad() const
bool isGlobalSeg() const
bool isArgSeg() const
bool isAtomic() const
bool isAtomicOr() const
int maxSrcVecRegOperandSize()
bool isAtomicNoRet() const
bool isSpillSeg() const
const std::string & disassemble() const
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isF64() const
bool isAtomicMax() const
ComputeUnit * computeUnit()
virtual int numDstRegOperands()=0
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
const std::string & disassemble()
const std::vector< OperandInfo > & dstVecRegOperands() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
enums::StorageClassType executed_as
const std::string & opcode() const
const std::vector< OperandInfo > & srcOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
const std::vector< OperandInfo > & dstOperands() const
bool isKernelLaunch() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & srcVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
virtual int getNumOperands()=0
bool isUnconditionalJump() const
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
AddrRange getAddrRange() const
Definition lds_state.hh:529
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & ldsApe() const
Definition shader.hh:146
bool isLdsApe(Addr addr) const
Definition shader.hh:180
Addr getScratchBase()
Definition shader.hh:197
Addr getHiddenPrivateBase()
Definition shader.hh:203
uint32_t maxSgprs
Definition wavefront.hh:135
Addr pc() const
const int simdId
Definition wavefront.hh:101
ComputeUnit * computeUnit
Definition wavefront.hh:108
void decVMemInstsIssued()
const int wfSlotId
Definition wavefront.hh:98
void decLGKMInstsIssued()
Addr archFlatScratchAddr
Definition wavefront.hh:209
uint64_t wfDynId
Definition wavefront.hh:233
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
STL pair class.
Definition stl.hh:58
STL vector class.
Definition stl.hh:37
Addr size() const
Get the size of the address range.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
uint64_t InstSeqNum
Definition inst_seq.hh:40
statistics::Distribution pageDivergenceDist

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0