gem5 v24.0.0.0
Loading...
Searching...
No Matches
wavefront.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __GPU_COMPUTE_WAVEFRONT_HH__
33#define __GPU_COMPUTE_WAVEFRONT_HH__
34
35#include <cassert>
36#include <deque>
37#include <list>
38#include <memory>
39#include <unordered_map>
40#include <vector>
41
42#include "arch/gpu_isa.hh"
43#include "base/logging.hh"
44#include "base/statistics.hh"
45#include "base/stats/group.hh"
46#include "base/types.hh"
47#include "config/the_gpu_isa.hh"
53#include "gpu-compute/misc.hh"
54#include "params/Wavefront.hh"
55#include "sim/sim_object.hh"
56
57namespace gem5
58{
59
60class Wavefront : public SimObject
61{
62 public:
64 {
65 // wavefront is stalled
67 // wavefront is returning from a kernel
69 // wavefront is running normally
71 // wavefront is stalled
73
75
93 };
94
95 // gfx version wavefront is executing
96 GfxVersion gfxVersion;
97 // HW slot id where the WF is mapped to inside a SIMD unit
98 const int wfSlotId;
99 int kernId;
100 // SIMD unit where the WV has been scheduled
101 const int simdId;
102 // id of the execution unit (or pipeline) where the oldest instruction
103 // of the WF is scheduled
107 // pointer to parent CU
110
112
115 // last tick during which all WFs in the CU are not idle
117
118 // Execution unit resource ID's associated with this WF
119 // These are static mappings set at WF slot construction and
120 // based off of the simdId and wfSlotId.
121
122 // Index to scalarALUs resource vector in CU
124
125 // Indices into readyList/dispatchList of resources used by this
126 // wavefront
131
132 // number of VGPRs required by WF
133 uint32_t maxVgprs;
134 // number of SGPRs required by WF
135 uint32_t maxSgprs;
136 // first accumulation vgpr number
137 uint32_t accumOffset;
138 void freeResources();
140 void setStatus(status_e newStatus);
142 void resizeRegFiles(int num_vregs, int num_sregs);
145 bool isOldestInstWaitcnt();
146 bool isOldestInstSleep();
147 bool isOldestInstGMem();
148 bool isOldestInstLMem();
149 bool isOldestInstPrivMem();
150 bool isOldestInstFlatMem();
154 bool isOldestInstBarrier();
155
156 // used for passing spill address to DDInstGPU
160 /* kernel launch parameters */
161 uint32_t workGroupId[3];
162 uint32_t workGroupSz[3];
163 uint32_t gridSz[3];
164 uint32_t wgId;
165 uint32_t wgSz;
166 /* the actual WG size can differ than the maximum size */
167 uint32_t actualWgSz[3];
170 // wavefront id within a workgroup
171 uint32_t wfId;
172 uint32_t maxDynWaveId;
173 uint32_t dispatchId;
174 // vector and scalar memory requests pending in memory system
176 // outstanding global memory write requests
178 // outstanding local memory write requests
180 // outstanding global memory read requests
182 // outstanding local memory read requests
184 // outstanding scalar memory read requests
186 // outstanding scalar memory write requests
194
196 uint64_t lastTrace;
197 // number of virtual vector registers reserved by WF
199 // number of virtual scalar registers reserved by WF
201 // Index into the Vector Register File's namespace where the WF's registers
202 // will live while the WF is executed
204 // Index into the Scalar Register File's namespace where the WF's registers
205 // will live while the WF is executed
207
208 // Architected flat scratch address for MI300+
210
211 // Old value of destination gpr (for trace)
213 // Id of destination gpr (for trace)
214 uint32_t oldVgprId;
215 // Tick count of last old_vgpr copy
216 uint64_t oldVgprTcnt;
217
218 // Old value of destination gpr (for trace)
220 // Id of destination gpr (for trace)
221 uint32_t oldDgprId;
222 // Tick count of last old_vgpr copy
223 uint64_t oldDgprTcnt;
224
225 // Execution mask at wavefront start
227
228 // a pointer to the fraction of the LDS allocated
229 // to this workgroup (thus this wavefront)
231
232 // unique WF id over all WFs executed across all CUs
233 uint64_t wfDynId;
234
235 // dyn inst id (per SIMD) of last instruction exec from this wave
236 uint64_t lastInstExec;
237
238 // Map to track the dyn instruction id of each vector register value
239 // produced, indexed by physical vector register ID
240 std::unordered_map<int,uint64_t> rawDist;
241
242 // Counts the number of reads performed to each physical register
243 // - counts are reset to 0 for each dynamic wavefront launched
245
246 void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems);
247
248 // context for save/restore
249 uint8_t *context;
250
251 typedef WavefrontParams Params;
252 Wavefront(const Params &p);
253 ~Wavefront();
254 virtual void init();
255
256 void
258 {
259 computeUnit = cu;
260 }
261
263 void start(uint64_t _wfDynId, uint64_t _base_ptr);
264 void exec();
265 // called by SCH stage to reserve
267 bool stopFetch();
268
269 Addr pc() const;
270 void pc(Addr new_pc);
271
273 bool execMask(int lane) const;
274
275
276 void discardFetch();
277
278 bool waitCntsSatisfied();
279 void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
280 void clearWaitCnts();
281
282 void incVMemInstsIssued();
283 void incExpInstsIssued();
284 void incLGKMInstsIssued();
285 void decVMemInstsIssued();
286 void decExpInstsIssued();
287 void decLGKMInstsIssued();
288
290 void freeRegisterFile();
291
292 bool sleepDone();
293 void setSleepTime(int sleep_time);
294
295 TheGpuISA::GPUISA&
297 {
298 return _gpuISA;
299 }
300
301 void barrierId(int bar_id);
302 int barrierId() const;
303 bool hasBarrier() const;
304 void releaseBarrier();
305
306 private:
307 TheGpuISA::GPUISA _gpuISA;
308
311
338 int barId;
339
340 public:
342 {
344
345 // Number of instructions executed by this wavefront slot across all
346 // dynamic wavefronts
348
349 // Number of cycles this WF spends in SCH stage
351
352 // Number of stall cycles encounterd by this WF in SCH stage
354
355 // The following stats sum to the value of schStalls, and record, per
356 // WF slot, what the cause of each stall was at a coarse granularity.
357
358 // Cycles WF is selected by scheduler, but RFs cannot support
359 // instruction
361 // Cycles spent waiting for execution resources
363 // cycles spent waiting for RF reads to complete in SCH stage
365 // LDS arbitration stall cycles. WF attempts to execute LM instruction,
366 // but another wave is executing FLAT, which requires LM and GM and
367 // forces this WF to stall.
369
370 // number of times an instruction of a WF is blocked from being issued
371 // due to WAR and WAW dependencies
373 // number of times an instruction of a WF is blocked from being issued
374 // due to WAR and WAW dependencies
376
377 // Distribution to track the distance between producer and consumer
378 // for vector register values
380
381 // Distribution to track the number of times every vector register
382 // value produced is consumed.
385};
386
387} // namespace gem5
388
389#endif // __GPU_COMPUTE_WAVEFRONT_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition lds_state.hh:58
Abstract superclass for simulation objects.
uint32_t maxSgprs
Definition wavefront.hh:135
status_e status
Definition wavefront.hh:335
bool isOldestInstWaitcnt()
Definition wavefront.cc:657
Addr pc() const
bool hasBarrier() const
VectorMask _execMask
Definition wavefront.hh:337
uint32_t actualWgSzTotal
Definition wavefront.hh:168
void reserveGmResource(GPUDynInstPtr ii)
Definition wavefront.cc:815
uint64_t oldVgprTcnt
Definition wavefront.hh:216
std::vector< Addr > lastAddr
Definition wavefront.hh:157
uint32_t oldDgprId
Definition wavefront.hh:221
void setStatus(status_e newStatus)
Definition wavefront.cc:573
bool waitCntsSatisfied()
uint8_t * context
Definition wavefront.hh:249
void validateRequestCounters()
Definition wavefront.cc:801
const int simdId
Definition wavefront.hh:101
bool isOldestInstLMem()
Definition wavefront.cc:743
bool isOldestInstPrivMem()
Definition wavefront.cc:756
bool isOldestInstScalarMem()
Definition wavefront.cc:730
uint32_t maxDynWaveId
Definition wavefront.hh:172
uint64_t oldDgprTcnt
Definition wavefront.hh:223
Wavefront(const Params &p)
Definition wavefront.cc:49
bool isOldestInstBarrier()
Definition wavefront.cc:704
void resizeRegFiles(int num_vregs, int num_sregs)
Definition wavefront.cc:562
TheGpuISA::GPUISA & gpuISA()
Definition wavefront.hh:296
int scalarOutstandingReqsWrGm
Definition wavefront.hh:187
uint32_t gridSz[3]
Definition wavefront.hh:163
void decExpInstsIssued()
std::vector< uint32_t > oldVgpr
Definition wavefront.hh:212
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition wavefront.cc:118
void setSleepTime(int sleep_time)
ComputeUnit * computeUnit
Definition wavefront.hh:108
std::vector< uint32_t > workItemFlatId
Definition wavefront.hh:159
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition wavefront.hh:328
std::vector< int > vecReads
Definition wavefront.hh:244
std::deque< GPUDynInstPtr > instructionBuffer
Definition wavefront.hh:111
bool isOldestInstSleep()
Definition wavefront.cc:643
uint32_t accumOffset
Definition wavefront.hh:137
bool isLmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:632
GPUDynInstPtr nextInstr()
uint64_t lastTrace
Definition wavefront.hh:196
std::vector< uint32_t > workItemId[3]
Definition wavefront.hh:158
std::vector< uint64_t > oldDgpr
Definition wavefront.hh:219
bool isOldestInstScalarALU()
Definition wavefront.cc:674
void releaseBarrier()
bool isOldestInstFlatMem()
Definition wavefront.cc:769
uint32_t dispatchId
Definition wavefront.hh:173
status_e getStatus()
Definition wavefront.hh:141
VectorMask initMask
Definition wavefront.hh:226
WavefrontParams Params
Definition wavefront.hh:251
uint32_t maxVgprs
Definition wavefront.hh:133
void decVMemInstsIssued()
void computeActualWgSz(HSAQueueEntry *task)
uint32_t workGroupId[3]
Definition wavefront.hh:161
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
const int wfSlotId
Definition wavefront.hh:98
void setParent(ComputeUnit *cu)
Definition wavefront.hh:257
std::unordered_map< int, uint64_t > rawDist
Definition wavefront.hh:240
void incExpInstsIssued()
std::vector< int > reserveResources()
Definition wavefront.cc:863
uint32_t startSgprIndex
Definition wavefront.hh:206
GfxVersion gfxVersion
Definition wavefront.hh:96
void decLGKMInstsIssued()
void incLGKMInstsIssued()
int barrierId() const
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition wavefront.cc:103
uint32_t workGroupSz[3]
Definition wavefront.hh:162
uint32_t oldVgprId
Definition wavefront.hh:214
bool isOldestInstVectorALU()
Definition wavefront.cc:689
uint64_t lastInstExec
Definition wavefront.hh:236
LdsChunk * ldsChunk
Definition wavefront.hh:230
uint32_t actualWgSz[3]
Definition wavefront.hh:167
Addr archFlatScratchAddr
Definition wavefront.hh:209
int scalarOutstandingReqsRdGm
Definition wavefront.hh:185
void freeResources()
Definition wavefront.cc:796
void incVMemInstsIssued()
void reserveLmResource(GPUDynInstPtr ii)
Definition wavefront.cc:845
@ S_BARRIER
WF is stalled at a barrier.
Definition wavefront.hh:92
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition wavefront.hh:88
bool isOldestInstGMem()
Definition wavefront.cc:717
gem5::Wavefront::WavefrontStats stats
VectorMask & execMask()
uint64_t wfDynId
Definition wavefront.hh:233
void freeRegisterFile()
Freeing VRF space.
bool isGmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:621
uint32_t startVgprIndex
Definition wavefront.hh:203
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition wavefront.cc:610
TheGpuISA::GPUISA _gpuISA
Definition wavefront.hh:307
A simple distribution stat.
Statistics container.
Definition group.hh:93
This is a simple scalar statistic, like a counter.
STL deque class.
Definition stl.hh:44
STL vector class.
Definition stl.hh:37
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
Declaration of Statistics objects.
statistics::Scalar numTimesBlockedDueRAWDependencies
Definition wavefront.hh:375
statistics::Scalar schResourceStalls
Definition wavefront.hh:362
WavefrontStats(statistics::Group *parent)
statistics::Distribution vecRawDistance
Definition wavefront.hh:379
statistics::Distribution readsPerWrite
Definition wavefront.hh:383
statistics::Scalar schCycles
Definition wavefront.hh:350
statistics::Scalar numTimesBlockedDueWAXDependencies
Definition wavefront.hh:372
statistics::Scalar schRfAccessStalls
Definition wavefront.hh:360
statistics::Scalar schOpdNrdyStalls
Definition wavefront.hh:364
statistics::Scalar numInstrExecuted
Definition wavefront.hh:347
statistics::Scalar schStalls
Definition wavefront.hh:353
statistics::Scalar schLdsArbStalls
Definition wavefront.hh:368

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0