gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
wavefront.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __GPU_COMPUTE_WAVEFRONT_HH__
33#define __GPU_COMPUTE_WAVEFRONT_HH__
34
35#include <cassert>
36#include <deque>
37#include <list>
38#include <memory>
39#include <unordered_map>
40#include <vector>
41
42#include "arch/gpu_isa.hh"
43#include "base/logging.hh"
44#include "base/statistics.hh"
45#include "base/stats/group.hh"
46#include "base/types.hh"
47#include "config/the_gpu_isa.hh"
53#include "gpu-compute/misc.hh"
54#include "params/Wavefront.hh"
55#include "sim/sim_object.hh"
56
57namespace gem5
58{
59
60class Wavefront : public SimObject
61{
62 public:
64 {
65 // wavefront is stalled
67 // wavefront is returning from a kernel
69 // wavefront is running normally
71 // wavefront is stalled
73
75
93 };
94
95 // HW slot id where the WF is mapped to inside a SIMD unit
96 const int wfSlotId;
97 int kernId;
98 // SIMD unit where the WV has been scheduled
99 const int simdId;
100 // id of the execution unit (or pipeline) where the oldest instruction
101 // of the WF is scheduled
105 // pointer to parent CU
108
110
113 // last tick during which all WFs in the CU are not idle
115
116 // Execution unit resource ID's associated with this WF
117 // These are static mappings set at WF slot construction and
118 // based off of the simdId and wfSlotId.
119
120 // Index to scalarALUs resource vector in CU
122
123 // Indices into readyList/dispatchList of resources used by this
124 // wavefront
129
130 // number of VGPRs required by WF
131 uint32_t maxVgprs;
132 // number of SGPRs required by WF
133 uint32_t maxSgprs;
134 void freeResources();
136 void setStatus(status_e newStatus);
138 void resizeRegFiles(int num_vregs, int num_sregs);
141 bool isOldestInstWaitcnt();
142 bool isOldestInstSleep();
143 bool isOldestInstGMem();
144 bool isOldestInstLMem();
145 bool isOldestInstPrivMem();
146 bool isOldestInstFlatMem();
150 bool isOldestInstBarrier();
151
152 // used for passing spill address to DDInstGPU
156 /* kernel launch parameters */
157 uint32_t workGroupId[3];
158 uint32_t workGroupSz[3];
159 uint32_t gridSz[3];
160 uint32_t wgId;
161 uint32_t wgSz;
162 /* the actual WG size can differ than the maximum size */
163 uint32_t actualWgSz[3];
166 // wavefront id within a workgroup
167 uint32_t wfId;
168 uint32_t maxDynWaveId;
169 uint32_t dispatchId;
170 // vector and scalar memory requests pending in memory system
172 // outstanding global memory write requests
174 // outstanding local memory write requests
176 // outstanding global memory read requests
178 // outstanding local memory read requests
180 // outstanding scalar memory read requests
182 // outstanding scalar memory write requests
190
192 uint64_t lastTrace;
193 // number of virtual vector registers reserved by WF
195 // number of virtual scalar registers reserved by WF
197 // Index into the Vector Register File's namespace where the WF's registers
198 // will live while the WF is executed
200 // Index into the Scalar Register File's namespace where the WF's registers
201 // will live while the WF is executed
203
204 // Old value of destination gpr (for trace)
206 // Id of destination gpr (for trace)
207 uint32_t oldVgprId;
208 // Tick count of last old_vgpr copy
209 uint64_t oldVgprTcnt;
210
211 // Old value of destination gpr (for trace)
213 // Id of destination gpr (for trace)
214 uint32_t oldDgprId;
215 // Tick count of last old_vgpr copy
216 uint64_t oldDgprTcnt;
217
218 // Execution mask at wavefront start
220
221 // a pointer to the fraction of the LDS allocated
222 // to this workgroup (thus this wavefront)
224
225 // unique WF id over all WFs executed across all CUs
226 uint64_t wfDynId;
227
228 // dyn inst id (per SIMD) of last instruction exec from this wave
229 uint64_t lastInstExec;
230
231 // Map to track the dyn instruction id of each vector register value
232 // produced, indexed by physical vector register ID
233 std::unordered_map<int,uint64_t> rawDist;
234
235 // Counts the number of reads performed to each physical register
236 // - counts are reset to 0 for each dynamic wavefront launched
238
239 void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems);
240
241 // context for save/restore
242 uint8_t *context;
243
244 typedef WavefrontParams Params;
245 Wavefront(const Params &p);
246 ~Wavefront();
247 virtual void init();
248
249 void
251 {
252 computeUnit = cu;
253 }
254
256 void start(uint64_t _wfDynId, uint64_t _base_ptr);
257 void exec();
258 // called by SCH stage to reserve
260 bool stopFetch();
261
262 Addr pc() const;
263 void pc(Addr new_pc);
264
266 bool execMask(int lane) const;
267
268
269 void discardFetch();
270
271 bool waitCntsSatisfied();
272 void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
273 void clearWaitCnts();
274
275 void incVMemInstsIssued();
276 void incExpInstsIssued();
277 void incLGKMInstsIssued();
278 void decVMemInstsIssued();
279 void decExpInstsIssued();
280 void decLGKMInstsIssued();
281
283 void freeRegisterFile();
284
285 bool sleepDone();
286 void setSleepTime(int sleep_time);
287
288 TheGpuISA::GPUISA&
290 {
291 return _gpuISA;
292 }
293
294 void barrierId(int bar_id);
295 int barrierId() const;
296 bool hasBarrier() const;
297 void releaseBarrier();
298
299 private:
300 TheGpuISA::GPUISA _gpuISA;
301
304
331 int barId;
332
333 public:
335 {
337
338 // Number of instructions executed by this wavefront slot across all
339 // dynamic wavefronts
341
342 // Number of cycles this WF spends in SCH stage
344
345 // Number of stall cycles encounterd by this WF in SCH stage
347
348 // The following stats sum to the value of schStalls, and record, per
349 // WF slot, what the cause of each stall was at a coarse granularity.
350
351 // Cycles WF is selected by scheduler, but RFs cannot support
352 // instruction
354 // Cycles spent waiting for execution resources
356 // cycles spent waiting for RF reads to complete in SCH stage
358 // LDS arbitration stall cycles. WF attempts to execute LM instruction,
359 // but another wave is executing FLAT, which requires LM and GM and
360 // forces this WF to stall.
362
363 // number of times an instruction of a WF is blocked from being issued
364 // due to WAR and WAW dependencies
366 // number of times an instruction of a WF is blocked from being issued
367 // due to WAR and WAW dependencies
369
370 // Distribution to track the distance between producer and consumer
371 // for vector register values
373
374 // Distribution to track the number of times every vector register
375 // value produced is consumed.
378};
379
380} // namespace gem5
381
382#endif // __GPU_COMPUTE_WAVEFRONT_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition lds_state.hh:57
Abstract superclass for simulation objects.
uint32_t maxSgprs
Definition wavefront.hh:133
status_e status
Definition wavefront.hh:328
bool isOldestInstWaitcnt()
Definition wavefront.cc:634
Addr pc() const
bool hasBarrier() const
VectorMask _execMask
Definition wavefront.hh:330
uint32_t actualWgSzTotal
Definition wavefront.hh:164
void reserveGmResource(GPUDynInstPtr ii)
Definition wavefront.cc:792
uint64_t oldVgprTcnt
Definition wavefront.hh:209
std::vector< Addr > lastAddr
Definition wavefront.hh:153
uint32_t oldDgprId
Definition wavefront.hh:214
void setStatus(status_e newStatus)
Definition wavefront.cc:550
bool waitCntsSatisfied()
uint8_t * context
Definition wavefront.hh:242
void validateRequestCounters()
Definition wavefront.cc:778
const int simdId
Definition wavefront.hh:99
bool isOldestInstLMem()
Definition wavefront.cc:720
bool isOldestInstPrivMem()
Definition wavefront.cc:733
bool isOldestInstScalarMem()
Definition wavefront.cc:707
uint32_t maxDynWaveId
Definition wavefront.hh:168
uint64_t oldDgprTcnt
Definition wavefront.hh:216
bool isOldestInstBarrier()
Definition wavefront.cc:681
void resizeRegFiles(int num_vregs, int num_sregs)
Definition wavefront.cc:539
TheGpuISA::GPUISA & gpuISA()
Definition wavefront.hh:289
int scalarOutstandingReqsWrGm
Definition wavefront.hh:183
uint32_t gridSz[3]
Definition wavefront.hh:159
void decExpInstsIssued()
std::vector< uint32_t > oldVgpr
Definition wavefront.hh:205
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition wavefront.cc:117
void setSleepTime(int sleep_time)
ComputeUnit * computeUnit
Definition wavefront.hh:106
std::vector< uint32_t > workItemFlatId
Definition wavefront.hh:155
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition wavefront.hh:321
std::vector< int > vecReads
Definition wavefront.hh:237
std::deque< GPUDynInstPtr > instructionBuffer
Definition wavefront.hh:109
bool isOldestInstSleep()
Definition wavefront.cc:620
bool isLmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:609
GPUDynInstPtr nextInstr()
uint64_t lastTrace
Definition wavefront.hh:192
std::vector< uint32_t > workItemId[3]
Definition wavefront.hh:154
std::vector< uint64_t > oldDgpr
Definition wavefront.hh:212
bool isOldestInstScalarALU()
Definition wavefront.cc:651
void releaseBarrier()
bool isOldestInstFlatMem()
Definition wavefront.cc:746
uint32_t dispatchId
Definition wavefront.hh:169
status_e getStatus()
Definition wavefront.hh:137
VectorMask initMask
Definition wavefront.hh:219
WavefrontParams Params
Definition wavefront.hh:244
uint32_t maxVgprs
Definition wavefront.hh:131
void decVMemInstsIssued()
void computeActualWgSz(HSAQueueEntry *task)
uint32_t workGroupId[3]
Definition wavefront.hh:157
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
const int wfSlotId
Definition wavefront.hh:96
void setParent(ComputeUnit *cu)
Definition wavefront.hh:250
std::unordered_map< int, uint64_t > rawDist
Definition wavefront.hh:233
void incExpInstsIssued()
std::vector< int > reserveResources()
Definition wavefront.cc:840
uint32_t startSgprIndex
Definition wavefront.hh:202
void decLGKMInstsIssued()
void incLGKMInstsIssued()
int barrierId() const
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition wavefront.cc:102
uint32_t workGroupSz[3]
Definition wavefront.hh:158
uint32_t oldVgprId
Definition wavefront.hh:207
bool isOldestInstVectorALU()
Definition wavefront.cc:666
uint64_t lastInstExec
Definition wavefront.hh:229
LdsChunk * ldsChunk
Definition wavefront.hh:223
uint32_t actualWgSz[3]
Definition wavefront.hh:163
int scalarOutstandingReqsRdGm
Definition wavefront.hh:181
void freeResources()
Definition wavefront.cc:773
void incVMemInstsIssued()
void reserveLmResource(GPUDynInstPtr ii)
Definition wavefront.cc:822
@ S_BARRIER
WF is stalled at a barrier.
Definition wavefront.hh:92
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition wavefront.hh:88
bool isOldestInstGMem()
Definition wavefront.cc:694
gem5::Wavefront::WavefrontStats stats
VectorMask & execMask()
uint64_t wfDynId
Definition wavefront.hh:226
void freeRegisterFile()
Freeing VRF space.
bool isGmInstruction(GPUDynInstPtr ii)
Definition wavefront.cc:598
uint32_t startVgprIndex
Definition wavefront.hh:199
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition wavefront.cc:587
TheGpuISA::GPUISA _gpuISA
Definition wavefront.hh:300
A simple distribution stat.
Statistics container.
Definition group.hh:93
This is a simple scalar statistic, like a counter.
STL deque class.
Definition stl.hh:44
STL vector class.
Definition stl.hh:37
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 0 > p
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
Declaration of Statistics objects.
statistics::Scalar numTimesBlockedDueRAWDependencies
Definition wavefront.hh:368
statistics::Scalar schResourceStalls
Definition wavefront.hh:355
statistics::Distribution vecRawDistance
Definition wavefront.hh:372
statistics::Distribution readsPerWrite
Definition wavefront.hh:376
statistics::Scalar schCycles
Definition wavefront.hh:343
statistics::Scalar numTimesBlockedDueWAXDependencies
Definition wavefront.hh:365
statistics::Scalar schRfAccessStalls
Definition wavefront.hh:353
statistics::Scalar schOpdNrdyStalls
Definition wavefront.hh:357
statistics::Scalar numInstrExecuted
Definition wavefront.hh:340
statistics::Scalar schStalls
Definition wavefront.hh:346
statistics::Scalar schLdsArbStalls
Definition wavefront.hh:361

Generated on Mon Jul 10 2023 14:24:31 for gem5 by doxygen 1.9.7