gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
wavefront.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __GPU_COMPUTE_WAVEFRONT_HH__
33#define __GPU_COMPUTE_WAVEFRONT_HH__
34
35#include <cassert>
36#include <deque>
37#include <list>
38#include <memory>
39#include <set>
40#include <unordered_map>
41#include <vector>
42
43#include "arch/gpu_isa.hh"
44#include "base/logging.hh"
45#include "base/statistics.hh"
46#include "base/stats/group.hh"
47#include "base/types.hh"
48#include "config/the_gpu_isa.hh"
54#include "gpu-compute/misc.hh"
55#include "params/Wavefront.hh"
56#include "sim/sim_object.hh"
57
58namespace gem5
59{
60
61class Wavefront : public SimObject
62{
63 public:
65 {
66 // wavefront is stalled
68 // wavefront is returning from a kernel
70 // wavefront is running normally
72 // wavefront is stalled
74
76
94 };
95
96 // gfx version wavefront is executing
97 GfxVersion gfxVersion;
98 // HW slot id where the WF is mapped to inside a SIMD unit
99 const int wfSlotId;
101 // SIMD unit where the WV has been scheduled
102 const int simdId;
103 // id of the execution unit (or pipeline) where the oldest instruction
104 // of the WF is scheduled
108 // pointer to parent CU
111
113
116 // last tick during which all WFs in the CU are not idle
118
119 // Execution unit resource ID's associated with this WF
120 // These are static mappings set at WF slot construction and
121 // based off of the simdId and wfSlotId.
122
123 // Index to scalarALUs resource vector in CU
125
126 // Indices into readyList/dispatchList of resources used by this
127 // wavefront
132
133 // number of VGPRs required by WF
134 uint32_t maxVgprs;
135 // number of SGPRs required by WF
136 uint32_t maxSgprs;
137 // first accumulation vgpr number
138 uint32_t accumOffset;
139 void freeResources();
141 void setStatus(status_e newStatus);
143 std::string statusToString(status_e status);
144 void resizeRegFiles(int num_vregs, int num_sregs);
147 bool isOldestInstWaitcnt();
148 bool isOldestInstSleep();
149 bool isOldestInstGMem();
150 bool isOldestInstLMem();
151 bool isOldestInstPrivMem();
152 bool isOldestInstFlatMem();
156 bool isOldestInstBarrier();
157
158 // used for passing spill address to DDInstGPU
162 /* kernel launch parameters */
163 uint32_t workGroupId[3];
164 uint32_t workGroupSz[3];
165 uint32_t gridSz[3];
166 uint32_t wgId;
167 uint32_t wgSz;
168 /* the actual WG size can differ than the maximum size */
169 uint32_t actualWgSz[3];
172 // wavefront id within a workgroup
173 uint32_t wfId;
174 uint32_t maxDynWaveId;
175 uint32_t dispatchId;
176 // vector and scalar memory requests pending in memory system
178 // outstanding global memory write requests
180 // outstanding local memory write requests
182 // outstanding global memory read requests
184 // outstanding local memory read requests
186 // outstanding scalar memory read requests
188 // outstanding scalar memory write requests
196
198 uint64_t lastTrace;
199 // number of virtual vector registers reserved by WF
201 // number of virtual scalar registers reserved by WF
203 // Index into the Vector Register File's namespace where the WF's registers
204 // will live while the WF is executed
206 // Index into the Scalar Register File's namespace where the WF's registers
207 // will live while the WF is executed
209
210 // Architected flat scratch address for MI300+
212
213 // Old value of destination gpr (for trace)
215 // Id of destination gpr (for trace)
216 uint32_t oldVgprId;
217 // Tick count of last old_vgpr copy
218 uint64_t oldVgprTcnt;
219
220 // Old value of destination gpr (for trace)
222 // Id of destination gpr (for trace)
223 uint32_t oldDgprId;
224 // Tick count of last old_vgpr copy
225 uint64_t oldDgprTcnt;
226
227 // Execution mask at wavefront start
229
230 // a pointer to the fraction of the LDS allocated
231 // to this workgroup (thus this wavefront)
233
234 // unique WF id over all WFs executed across all CUs
235 uint64_t wfDynId;
236
237 // dyn inst id (per SIMD) of last instruction exec from this wave
238 uint64_t lastInstExec;
239
240 // Map to track the dyn instruction id of each vector register value
241 // produced, indexed by physical vector register ID
242 std::unordered_map<int,uint64_t> rawDist;
243
244 // Counts the number of reads performed to each physical register
245 // - counts are reset to 0 for each dynamic wavefront launched
247
248 void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems);
249
250 // context for save/restore
251 uint8_t *context;
252
253 typedef WavefrontParams Params;
254 Wavefront(const Params &p);
255 ~Wavefront();
256 virtual void init();
257
258 void
260 {
261 computeUnit = cu;
262 }
263
265 void start(uint64_t _wfDynId, uint64_t _base_ptr);
266 void exec();
267 // called by SCH stage to reserve
269 bool stopFetch();
270
271 Addr pc() const;
272 void pc(Addr new_pc);
273
275 bool execMask(int lane) const;
276
277
278 void discardFetch();
279
280 bool waitCntsSatisfied();
281 void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
282 void clearWaitCnts();
283
284 void incVMemInstsIssued();
285 void incExpInstsIssued();
286 void incLGKMInstsIssued();
287 void decVMemInstsIssued();
288 void decExpInstsIssued();
289 void decLGKMInstsIssued();
290
291 std::set<InstSeqNum> vmemIssued;
292 std::set<InstSeqNum> lgkmIssued;
293 std::set<InstSeqNum> expIssued;
294 std::unordered_map<InstSeqNum, std::string> cntInsts;
295
296 void trackVMemInst(GPUDynInstPtr gpu_dyn_inst);
297 void trackLGKMInst(GPUDynInstPtr gpu_dyn_inst);
298 void trackExpInst(GPUDynInstPtr gpu_dyn_inst);
299 void trackInst(GPUDynInstPtr gpu_dyn_inst);
300
301 void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst);
302 void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst);
303 void untrackExpInst(GPUDynInstPtr gpu_dyn_inst);
304 void untrackInst(InstSeqNum seqNum);
305
307 void freeRegisterFile();
308
309 bool sleepDone();
310 void setSleepTime(int sleep_time);
311
312 TheGpuISA::GPUISA&
314 {
315 return _gpuISA;
316 }
317
318 void barrierId(int bar_id);
319 int barrierId() const;
320 bool hasBarrier() const;
321 void releaseBarrier();
322
323 // For periodic progress prints
324 void printProgress();
325
326 // Tracking variables for periodic progress
328 std::string lastInstDisasm;
329 std::string lastInstRdyStatus;
331
332 private:
333 TheGpuISA::GPUISA _gpuISA;
334
337
364 int barId;
365
366 public:
368 {
370
371 // Number of instructions executed by this wavefront slot across all
372 // dynamic wavefronts
374
375 // Number of cycles this WF spends in SCH stage
377
378 // Number of stall cycles encounterd by this WF in SCH stage
380
381 // The following stats sum to the value of schStalls, and record, per
382 // WF slot, what the cause of each stall was at a coarse granularity.
383
384 // Cycles WF is selected by scheduler, but RFs cannot support
385 // instruction
387 // Cycles spent waiting for execution resources
389 // cycles spent waiting for RF reads to complete in SCH stage
391 // LDS arbitration stall cycles. WF attempts to execute LM instruction,
392 // but another wave is executing FLAT, which requires LM and GM and
393 // forces this WF to stall.
395
396 // number of times an instruction of a WF is blocked from being issued
397 // due to WAR and WAW dependencies
399 // number of times an instruction of a WF is blocked from being issued
400 // due to WAR and WAW dependencies
402
403 // Distribution to track the distance between producer and consumer
404 // for vector register values
406
407 // Distribution to track the number of times every vector register
408 // value produced is consumed.
411};
412
413} // namespace gem5
414
415#endif // __GPU_COMPUTE_WAVEFRONT_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition lds_state.hh:58
uint32_t maxSgprs
Definition wavefront.hh:136
status_e status
Definition wavefront.hh:361
bool isOldestInstWaitcnt()
Definition wavefront.cc:683
Addr pc() const
bool hasBarrier() const
VectorMask _execMask
Definition wavefront.hh:363
uint32_t actualWgSzTotal
Definition wavefront.hh:170
InstSeqNum lastInstSeqNum
Definition wavefront.hh:327
void reserveGmResource(GPUDynInstPtr ii)
Definition wavefront.cc:841
uint64_t oldVgprTcnt
Definition wavefront.hh:218
std::vector< Addr > lastAddr
Definition wavefront.hh:159
uint32_t oldDgprId
Definition wavefront.hh:223
std::set< InstSeqNum > expIssued
Definition wavefront.hh:293
void setStatus(status_e newStatus)
Definition wavefront.cc:599
void untrackInst(InstSeqNum seqNum)
bool waitCntsSatisfied()
uint8_t * context
Definition wavefront.hh:251
void validateRequestCounters()
Definition wavefront.cc:827
void trackInst(GPUDynInstPtr gpu_dyn_inst)
void trackVMemInst(GPUDynInstPtr gpu_dyn_inst)
const int simdId
Definition wavefront.hh:102
bool isOldestInstLMem()
Definition wavefront.cc:769
bool isOldestInstPrivMem()
Definition wavefront.cc:782
bool isOldestInstScalarMem()
Definition wavefront.cc:756
uint32_t maxDynWaveId
Definition wavefront.hh:174
uint64_t oldDgprTcnt
Definition wavefront.hh:225
Wavefront(const Params &p)
Definition wavefront.cc:50
bool isOldestInstBarrier()
Definition wavefront.cc:730
void resizeRegFiles(int num_vregs, int num_sregs)
Definition wavefront.cc:588
TheGpuISA::GPUISA & gpuISA()
Definition wavefront.hh:313
int scalarOutstandingReqsWrGm
Definition wavefront.hh:189
uint32_t gridSz[3]
Definition wavefront.hh:165
void decExpInstsIssued()
std::set< InstSeqNum > lgkmIssued
Definition wavefront.hh:292
std::vector< uint32_t > oldVgpr
Definition wavefront.hh:214
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition wavefront.cc:122
void setSleepTime(int sleep_time)
ComputeUnit * computeUnit
Definition wavefront.hh:109
std::vector< uint32_t > workItemFlatId
Definition wavefront.hh:161
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition wavefront.hh:354
std::vector< int > vecReads
Definition wavefront.hh:246
std::deque< GPUDynInstPtr > instructionBuffer
Definition wavefront.hh:112
bool isOldestInstSleep()
Definition wavefront.cc:669
uint32_t accumOffset
Definition wavefront.hh:138
bool isLmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:658
GPUDynInstPtr nextInstr()
uint64_t lastTrace
Definition wavefront.hh:198
std::vector< uint32_t > workItemId[3]
Definition wavefront.hh:160
std::vector< uint64_t > oldDgpr
Definition wavefront.hh:221
bool isOldestInstScalarALU()
Definition wavefront.cc:700
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void releaseBarrier()
bool isOldestInstFlatMem()
Definition wavefront.cc:795
uint32_t dispatchId
Definition wavefront.hh:175
status_e getStatus()
Definition wavefront.hh:142
VectorMask initMask
Definition wavefront.hh:228
WavefrontParams Params
Definition wavefront.hh:253
uint32_t maxVgprs
Definition wavefront.hh:134
void decVMemInstsIssued()
void computeActualWgSz(HSAQueueEntry *task)
std::string lastInstDisasm
Definition wavefront.hh:328
uint32_t workGroupId[3]
Definition wavefront.hh:163
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
const int wfSlotId
Definition wavefront.hh:99
void setParent(ComputeUnit *cu)
Definition wavefront.hh:259
std::unordered_map< int, uint64_t > rawDist
Definition wavefront.hh:242
void incExpInstsIssued()
void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
std::vector< int > reserveResources()
Definition wavefront.cc:889
uint32_t startSgprIndex
Definition wavefront.hh:208
GfxVersion gfxVersion
Definition wavefront.hh:97
void decLGKMInstsIssued()
void incLGKMInstsIssued()
int barrierId() const
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition wavefront.cc:107
uint32_t workGroupSz[3]
Definition wavefront.hh:164
uint32_t oldVgprId
Definition wavefront.hh:216
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
void trackExpInst(GPUDynInstPtr gpu_dyn_inst)
bool isOldestInstVectorALU()
Definition wavefront.cc:715
uint64_t lastInstExec
Definition wavefront.hh:238
LdsChunk * ldsChunk
Definition wavefront.hh:232
std::unordered_map< InstSeqNum, std::string > cntInsts
Definition wavefront.hh:294
uint32_t actualWgSz[3]
Definition wavefront.hh:169
Addr archFlatScratchAddr
Definition wavefront.hh:211
std::set< InstSeqNum > vmemIssued
Definition wavefront.hh:291
void trackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
int scalarOutstandingReqsRdGm
Definition wavefront.hh:187
void freeResources()
Definition wavefront.cc:822
void incVMemInstsIssued()
std::string statusToString(status_e status)
void reserveLmResource(GPUDynInstPtr ii)
Definition wavefront.cc:871
std::string lastInstRdyStatus
Definition wavefront.hh:329
@ S_BARRIER
WF is stalled at a barrier.
Definition wavefront.hh:93
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition wavefront.hh:89
bool isOldestInstGMem()
Definition wavefront.cc:743
gem5::Wavefront::WavefrontStats stats
VectorMask & execMask()
uint64_t wfDynId
Definition wavefront.hh:235
void freeRegisterFile()
Freeing VRF space.
bool isGmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:647
uint32_t startVgprIndex
Definition wavefront.hh:205
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition wavefront.cc:636
TheGpuISA::GPUISA _gpuISA
Definition wavefront.hh:333
A simple distribution stat.
Statistics container.
Definition group.hh:93
This is a simple scalar statistic, like a counter.
STL deque class.
Definition stl.hh:44
STL vector class.
Definition stl.hh:37
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
SimObject(const Params &p)
Definition sim_object.cc:58
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 5, 0 > status
Bitfield< 0 > p
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
uint64_t InstSeqNum
Definition inst_seq.hh:40
Declaration of Statistics objects.
statistics::Scalar numTimesBlockedDueRAWDependencies
Definition wavefront.hh:401
statistics::Scalar schResourceStalls
Definition wavefront.hh:388
WavefrontStats(statistics::Group *parent)
statistics::Distribution vecRawDistance
Definition wavefront.hh:405
statistics::Distribution readsPerWrite
Definition wavefront.hh:409
statistics::Scalar schCycles
Definition wavefront.hh:376
statistics::Scalar numTimesBlockedDueWAXDependencies
Definition wavefront.hh:398
statistics::Scalar schRfAccessStalls
Definition wavefront.hh:386
statistics::Scalar schOpdNrdyStalls
Definition wavefront.hh:390
statistics::Scalar numInstrExecuted
Definition wavefront.hh:373
statistics::Scalar schStalls
Definition wavefront.hh:379
statistics::Scalar schLdsArbStalls
Definition wavefront.hh:394

Generated on Mon May 26 2025 09:19:11 for gem5 by doxygen 1.13.2