gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
gpu_static_inst.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __GPU_STATIC_INST_HH__
33#define __GPU_STATIC_INST_HH__
34
35/*
36 * @file gpu_static_inst.hh
37 *
38 * Defines the base class representing static instructions for the GPU. The
39 * instructions are "static" because they contain no dynamic instruction
40 * information. GPUStaticInst corresponds to the StaticInst class for the CPU
41 * models.
42 */
43
44#include <cstdint>
45#include <string>
46#include <vector>
47
48#include "enums/GPUStaticInstFlags.hh"
49#include "enums/StorageClassType.hh"
51#include "gpu-compute/misc.hh"
54
55namespace gem5
56{
57
58class BaseOperand;
59class BaseRegOperand;
60
61class GPUStaticInst : public GPUStaticInstFlags
62{
63 public:
64 GPUStaticInst(const std::string &opcode);
65 virtual ~GPUStaticInst() { }
66 void instAddr(int inst_addr) { _instAddr = inst_addr; }
67 int instAddr() const { return _instAddr; }
68 int nextInstAddr() const { return _instAddr + instSize(); }
69
70 void instNum(int num) { _instNum = num; }
71
72 int instNum() { return _instNum; }
73
74 void ipdInstNum(int num) { _ipdInstNum = num; }
75
76 int ipdInstNum() const { return _ipdInstNum; }
77
78 virtual TheGpuISA::ScalarRegU32 srcLiteral() const { return 0; }
79
81
82 virtual void initOperandInfo() = 0;
83 virtual void execute(GPUDynInstPtr gpuDynInst) = 0;
84 virtual void generateDisassembly() = 0;
85 const std::string& disassemble();
86 virtual int getNumOperands() = 0;
87 virtual bool isFlatScratchRegister(int opIdx) = 0;
88 virtual bool isExecMaskRegister(int opIdx) = 0;
89 virtual int getOperandSize(int operandIndex) = 0;
90
91 virtual int numDstRegOperands() = 0;
92 virtual int numSrcRegOperands() = 0;
93
96 int numSrcVecDWords();
97 int numDstVecDWords();
98
101 int numSrcScalarDWords();
102 int numDstScalarDWords();
103
104 int maxOperandSize();
105
106 virtual int coalescerTokenCount() const { return 0; }
107
108 bool isALU() const { return _flags[ALU]; }
109 bool isBranch() const { return _flags[Branch]; }
110 bool isCondBranch() const { return _flags[CondBranch]; }
111 bool isNop() const { return _flags[Nop]; }
112 bool isReturn() const { return _flags[Return]; }
113 bool isEndOfKernel() const { return _flags[EndOfKernel]; }
114 bool isKernelLaunch() const { return _flags[KernelLaunch]; }
115 bool isSDWAInst() const { return _flags[IsSDWA]; }
116 bool isDPPInst() const { return _flags[IsDPP]; }
117
118 bool
120 {
121 return _flags[UnconditionalJump];
122 }
123
124 bool isSpecialOp() const { return _flags[SpecialOp]; }
125 bool isWaitcnt() const { return _flags[Waitcnt]; }
126 bool isSleep() const { return _flags[Sleep]; }
127
128 bool isBarrier() const { return _flags[MemBarrier]; }
129 bool isMemSync() const { return _flags[MemSync]; }
130 bool isMemRef() const { return _flags[MemoryRef]; }
131 bool isFlat() const { return _flags[Flat]; }
132 bool isFlatGlobal() const { return _flags[FlatGlobal]; }
133 bool isFlatScratch() const { return _flags[FlatScratch]; }
134 bool isLoad() const { return _flags[Load]; }
135 bool isStore() const { return _flags[Store]; }
136
137 bool
138 isAtomic() const
139 {
140 return _flags[AtomicReturn] || _flags[AtomicNoReturn];
141 }
142
143 bool isAtomicNoRet() const { return _flags[AtomicNoReturn]; }
144 bool isAtomicRet() const { return _flags[AtomicReturn]; }
145
146 bool isScalar() const { return _flags[Scalar]; }
147 bool readsSCC() const { return _flags[ReadsSCC]; }
148 bool writesSCC() const { return _flags[WritesSCC]; }
149 bool readsVCC() const { return _flags[ReadsVCC]; }
150 bool writesVCC() const { return _flags[WritesVCC]; }
151 // Identify instructions that implicitly read the Execute mask
152 // as a source operand but not to dictate which threads execute.
153 bool readsEXEC() const { return _flags[ReadsEXEC]; }
154 bool writesEXEC() const { return _flags[WritesEXEC]; }
155 bool readsMode() const { return _flags[ReadsMode]; }
156 bool writesMode() const { return _flags[WritesMode]; }
157 bool ignoreExec() const { return _flags[IgnoreExec]; }
158
159 bool isAtomicAnd() const { return _flags[AtomicAnd]; }
160 bool isAtomicOr() const { return _flags[AtomicOr]; }
161 bool isAtomicXor() const { return _flags[AtomicXor]; }
162 bool isAtomicCAS() const { return _flags[AtomicCAS]; }
163 bool isAtomicExch() const { return _flags[AtomicExch]; }
164 bool isAtomicAdd() const { return _flags[AtomicAdd]; }
165 bool isAtomicSub() const { return _flags[AtomicSub]; }
166 bool isAtomicInc() const { return _flags[AtomicInc]; }
167 bool isAtomicDec() const { return _flags[AtomicDec]; }
168 bool isAtomicMax() const { return _flags[AtomicMax]; }
169 bool isAtomicMin() const { return _flags[AtomicMin]; }
170 bool isAtomicPkAddBF16() const { return _flags[AtomicPkAddBF16]; }
171
172 bool
173 isArgLoad() const
174 {
175 return (_flags[KernArgSegment] || _flags[ArgSegment]) && _flags[Load];
176 }
177
178 bool
180 {
181 return _flags[MemoryRef] && (_flags[GlobalSegment] ||
182 _flags[PrivateSegment] || _flags[ReadOnlySegment] ||
183 _flags[SpillSegment] || _flags[FlatGlobal] ||
184 _flags[FlatScratch]);
185 }
186
187 bool
189 {
190 return _flags[MemoryRef] && _flags[GroupSegment];
191 }
192
193 bool isArgSeg() const { return _flags[ArgSegment]; }
194 bool isGlobalSeg() const { return _flags[GlobalSegment]; }
195 bool isGroupSeg() const { return _flags[GroupSegment]; }
196 bool isKernArgSeg() const { return _flags[KernArgSegment]; }
197 bool isPrivateSeg() const { return _flags[PrivateSegment]; }
198 bool isReadOnlySeg() const { return _flags[ReadOnlySegment]; }
199 bool isSpillSeg() const { return _flags[SpillSegment]; }
200
211 bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
212 bool isSystemCoherent() const { return _flags[SystemCoherent]; }
213
214 // Floating-point instructions
215 bool isI8() const { return _flags[I8]; }
216 bool isF16() const { return _flags[F16]; }
217 bool isF32() const { return _flags[F32]; }
218 bool isF64() const { return _flags[F64]; }
219
220 // FMA, MAC, MAD instructions
221 bool isFMA() const { return _flags[FMA]; }
222 bool isMAC() const { return _flags[MAC]; }
223 bool isMAD() const { return _flags[MAD]; }
224 bool isMFMA() const { return _flags[MFMA]; }
225
226 bool hasNoAddr() const { return _flags[NoAddr]; }
227
228 virtual int instSize() const = 0;
229
230 // only used for memory instructions
231 virtual void
233 {
234 fatal("calling initiateAcc() on a non-memory instruction.\n");
235 }
236
237 // only used for memory instructions
238 virtual void
240 {
241 fatal("calling completeAcc() on a non-memory instruction.\n");
242 }
243
244 virtual uint32_t getTargetPc() { return 0; }
245
246 static uint64_t dynamic_id_count;
247
248 // For flat memory accesses
249 enums::StorageClassType executed_as;
250
251 void setFlag(Flags flag) {
252 _flags[flag] = true;
253
254 if (isGroupSeg()) {
255 executed_as = enums::SC_GROUP;
256 } else if (isGlobalSeg()) {
257 executed_as = enums::SC_GLOBAL;
258 } else if (isPrivateSeg()) {
259 executed_as = enums::SC_PRIVATE;
260 } else if (isSpillSeg()) {
261 executed_as = enums::SC_SPILL;
262 } else if (isReadOnlySeg()) {
263 executed_as = enums::SC_READONLY;
264 } else if (isKernArgSeg()) {
265 executed_as = enums::SC_KERNARG;
266 } else if (isArgSeg()) {
267 executed_as = enums::SC_ARG;
268 }
269 }
270 const std::string& opcode() const { return _opcode; }
271
272 const std::vector<OperandInfo>& srcOperands() const { return srcOps; }
273 const std::vector<OperandInfo>& dstOperands() const { return dstOps; }
274
277 {
278 return srcVecRegOps;
279 }
280
283 {
284 return dstVecRegOps;
285 }
286
289 {
290 return srcScalarRegOps;
291 }
292
295 {
296 return dstScalarRegOps;
297 }
298
299 // These next 2 lines are used in initDynOperandInfo to let the lambda
300 // function work
301 typedef int (RegisterManager::*MapRegFn)(Wavefront *, int);
303
304 protected:
305 const std::string _opcode;
306 std::string disassembly;
311
312 private:
318
323
328
329 std::bitset<Num_Flags> _flags;
330
332 std::vector<OperandInfo>& opVec, OpType opType);
333};
334
336{
337 public:
339 {
340 setFlag(Nop);
341 setFlag(KernelLaunch);
342 setFlag(MemSync);
344 setFlag(GlobalSegment);
345 }
346
347 void
348 execute(GPUDynInstPtr gpuDynInst) override
349 {
350 fatal("kernel launch instruction should not be executed\n");
351 }
352
353 void
355 {
357 }
358
359 void initOperandInfo() override { return; }
360 int getNumOperands() override { return 0; }
361 bool isFlatScratchRegister(int opIdx) override { return false; }
362 // return true if the Execute mask is explicitly used as a source
363 // register operand
364 bool isExecMaskRegister(int opIdx) override { return false; }
365 int getOperandSize(int operandIndex) override { return 0; }
366
367 int numDstRegOperands() override { return 0; }
368 int numSrcRegOperands() override { return 0; }
369 int instSize() const override { return 0; }
370};
371
372} // namespace gem5
373
374#endif // __GPU_STATIC_INST_HH__
Wrapper that groups a few flag bits under the same undelying container.
Definition flags.hh:45
virtual int numDstRegOperands()=0
std::bitset< Num_Flags > _flags
virtual TheGpuISA::ScalarRegU32 srcLiteral() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
std::vector< OperandInfo > srcOps
const std::string & disassemble()
void setFlag(Flags flag)
bool isAtomicPkAddBF16() const
const std::vector< OperandInfo > & dstVecRegOperands() const
const std::vector< OperandInfo > & srcScalarRegOperands() const
std::vector< OperandInfo > dstScalarRegOps
enums::StorageClassType executed_as
const std::string & opcode() const
virtual void generateDisassembly()=0
virtual int getOperandSize(int operandIndex)=0
virtual bool isExecMaskRegister(int opIdx)=0
const std::vector< OperandInfo > & srcOperands() const
const std::vector< OperandInfo > & dstScalarRegOperands() const
void ipdInstNum(int num)
GPUStaticInst(const std::string &opcode)
std::vector< OperandInfo > dstVecRegOps
int _ipdInstNum
Identifier of the immediate post-dominator instruction.
std::vector< OperandInfo > dstOps
static uint64_t dynamic_id_count
virtual bool isFlatScratchRegister(int opIdx)=0
const std::vector< OperandInfo > & dstOperands() const
bool isKernelLaunch() const
bool isSystemCoherent() const
virtual void initOperandInfo()=0
virtual void execute(GPUDynInstPtr gpuDynInst)=0
const std::vector< OperandInfo > & srcVecRegOperands() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
std::vector< OperandInfo > srcVecRegOps
virtual int getNumOperands()=0
void generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu, OperandInfo &op, std::vector< OperandInfo > &opVec, OpType opType)
virtual uint32_t getTargetPc()
virtual int instSize() const =0
std::vector< OperandInfo > srcScalarRegOps
const std::string _opcode
virtual int coalescerTokenCount() const
bool isUnconditionalJump() const
void instAddr(int inst_addr)
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
void initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
virtual int numSrcRegOperands()=0
int(RegisterManager::* MapRegFn)(Wavefront *, int)
bool isFlatScratchRegister(int opIdx) override
int getOperandSize(int operandIndex) override
void execute(GPUDynInstPtr gpuDynInst) override
int instSize() const override
bool isExecMaskRegister(int opIdx) override
Base class for branch operations.
Definition branch.hh:49
Nop class.
Definition nop.hh:49
This is a simple scalar statistic, like a counter.
STL vector class.
Definition stl.hh:37
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
Bitfield< 4 > op
Definition types.hh:83
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Mon Oct 27 2025 04:13:02 for gem5 by doxygen 1.14.0