gem5  v22.1.0.0
wavefront.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __GPU_COMPUTE_WAVEFRONT_HH__
33 #define __GPU_COMPUTE_WAVEFRONT_HH__
34 
35 #include <cassert>
36 #include <deque>
37 #include <list>
38 #include <memory>
39 #include <unordered_map>
40 #include <vector>
41 
42 #include "arch/gpu_isa.hh"
43 #include "base/logging.hh"
44 #include "base/statistics.hh"
45 #include "base/stats/group.hh"
46 #include "base/types.hh"
47 #include "config/the_gpu_isa.hh"
52 #include "gpu-compute/lds_state.hh"
53 #include "gpu-compute/misc.hh"
54 #include "params/Wavefront.hh"
55 #include "sim/sim_object.hh"
56 
57 namespace gem5
58 {
59 
60 class Wavefront : public SimObject
61 {
62  public:
63  enum status_e
64  {
65  // wavefront is stalled
67  // wavefront is returning from a kernel
69  // wavefront is running normally
71  // wavefront is stalled
73 
75 
92  S_BARRIER
93  };
94 
95  // HW slot id where the WF is mapped to inside a SIMD unit
96  const int wfSlotId;
97  int kernId;
98  // SIMD unit where the WV has been scheduled
99  const int simdId;
100  // id of the execution unit (or pipeline) where the oldest instruction
101  // of the WF is scheduled
105  // pointer to parent CU
108 
110 
112  bool dropFetch;
113  // last tick during which all WFs in the CU are not idle
115 
116  // Execution unit resource ID's associated with this WF
117  // These are static mappings set at WF slot construction and
118  // based off of the simdId and wfSlotId.
119 
120  // Index to scalarALUs resource vector in CU
122 
123  // Indices into readyList/dispatchList of resources used by this
124  // wavefront
127  int localMem;
129 
130  // number of VGPRs required by WF
131  uint32_t maxVgprs;
132  // number of SGPRs required by WF
133  uint32_t maxSgprs;
134  void freeResources();
136  void setStatus(status_e newStatus);
137  status_e getStatus() { return status; }
138  void resizeRegFiles(int num_vregs, int num_sregs);
141  bool isOldestInstWaitcnt();
142  bool isOldestInstSleep();
143  bool isOldestInstGMem();
144  bool isOldestInstLMem();
145  bool isOldestInstPrivMem();
146  bool isOldestInstFlatMem();
147  bool isOldestInstVectorALU();
148  bool isOldestInstScalarALU();
149  bool isOldestInstScalarMem();
150  bool isOldestInstBarrier();
151 
152  // used for passing spill address to DDInstGPU
156  /* kernel launch parameters */
157  uint32_t workGroupId[3];
158  uint32_t workGroupSz[3];
159  uint32_t gridSz[3];
160  uint32_t wgId;
161  uint32_t wgSz;
162  /* the actual WG size can differ than the maximum size */
163  uint32_t actualWgSz[3];
164  uint32_t actualWgSzTotal;
165  void computeActualWgSz(HSAQueueEntry *task);
166  // wavefront id within a workgroup
167  uint32_t wfId;
168  uint32_t maxDynWaveId;
169  uint32_t dispatchId;
170  // vector and scalar memory requests pending in memory system
172  // outstanding global memory write requests
174  // outstanding local memory write requests
176  // outstanding global memory read requests
178  // outstanding local memory read requests
180  // outstanding scalar memory read requests
182  // outstanding scalar memory write requests
190 
192  uint64_t lastTrace;
193  // number of virtual vector registers reserved by WF
195  // number of virtual scalar registers reserved by WF
197  // Index into the Vector Register File's namespace where the WF's registers
198  // will live while the WF is executed
199  uint32_t startVgprIndex;
200  // Index into the Scalar Register File's namespace where the WF's registers
201  // will live while the WF is executed
202  uint32_t startSgprIndex;
203 
204  // Old value of destination gpr (for trace)
206  // Id of destination gpr (for trace)
207  uint32_t oldVgprId;
208  // Tick count of last old_vgpr copy
209  uint64_t oldVgprTcnt;
210 
211  // Old value of destination gpr (for trace)
213  // Id of destination gpr (for trace)
214  uint32_t oldDgprId;
215  // Tick count of last old_vgpr copy
216  uint64_t oldDgprTcnt;
217 
218  // Execution mask at wavefront start
220 
221  // a pointer to the fraction of the LDS allocated
222  // to this workgroup (thus this wavefront)
224 
225  // unique WF id over all WFs executed across all CUs
226  uint64_t wfDynId;
227 
228  // dyn inst id (per SIMD) of last instruction exec from this wave
229  uint64_t lastInstExec;
230 
231  // Map to track the dyn instruction id of each vector register value
232  // produced, indexed by physical vector register ID
233  std::unordered_map<int,uint64_t> rawDist;
234 
235  // Counts the number of reads performed to each physical register
236  // - counts are reset to 0 for each dynamic wavefront launched
238 
239  void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems);
240 
241  // context for save/restore
242  uint8_t *context;
243 
244  typedef WavefrontParams Params;
245  Wavefront(const Params &p);
246  ~Wavefront();
247  virtual void init();
248 
249  void
251  {
252  computeUnit = cu;
253  }
254 
256  void start(uint64_t _wfDynId, uint64_t _base_ptr);
257  void exec();
258  // called by SCH stage to reserve
260  bool stopFetch();
261 
262  Addr pc() const;
263  void pc(Addr new_pc);
264 
265  VectorMask& execMask();
266  bool execMask(int lane) const;
267 
268 
269  void discardFetch();
270 
271  bool waitCntsSatisfied();
272  void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
273  void clearWaitCnts();
274 
275  void incVMemInstsIssued();
276  void incExpInstsIssued();
277  void incLGKMInstsIssued();
278  void decVMemInstsIssued();
279  void decExpInstsIssued();
280  void decLGKMInstsIssued();
281 
283  void freeRegisterFile();
284 
285  bool sleepDone();
286  void setSleepTime(int sleep_time);
287 
288  TheGpuISA::GPUISA&
290  {
291  return _gpuISA;
292  }
293 
294  void barrierId(int bar_id);
295  int barrierId() const;
296  bool hasBarrier() const;
297  void releaseBarrier();
298 
299  private:
300  TheGpuISA::GPUISA _gpuISA;
301 
304 
327  int sleepCnt;
331  int barId;
332 
333  public:
335  {
337 
338  // Number of instructions executed by this wavefront slot across all
339  // dynamic wavefronts
341 
342  // Number of cycles this WF spends in SCH stage
344 
345  // Number of stall cycles encounterd by this WF in SCH stage
347 
348  // The following stats sum to the value of schStalls, and record, per
349  // WF slot, what the cause of each stall was at a coarse granularity.
350 
351  // Cycles WF is selected by scheduler, but RFs cannot support
352  // instruction
354  // Cycles spent waiting for execution resources
356  // cycles spent waiting for RF reads to complete in SCH stage
358  // LDS arbitration stall cycles. WF attempts to execute LM instruction,
359  // but another wave is executing FLAT, which requires LM and GM and
360  // forces this WF to stall.
362 
363  // number of times an instruction of a WF is blocked from being issued
364  // due to WAR and WAW dependencies
366  // number of times an instruction of a WF is blocked from being issued
367  // due to WAR and WAW dependencies
369 
370  // Distribution to track the distance between producer and consumer
371  // for vector register values
373 
374  // Distribution to track the number of times every vector register
375  // value produced is consumed.
377  } stats;
378 };
379 
380 } // namespace gem5
381 
382 #endif // __GPU_COMPUTE_WAVEFRONT_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
this represents a slice of the overall LDS, intended to be associated with an individual workgroup
Definition: lds_state.hh:57
Abstract superclass for simulation objects.
Definition: sim_object.hh:148
uint32_t maxSgprs
Definition: wavefront.hh:133
TheGpuISA::GPUISA & gpuISA()
Definition: wavefront.hh:289
status_e status
Definition: wavefront.hh:328
int scalarAluGlobalIdx
Definition: wavefront.hh:125
bool isOldestInstWaitcnt()
Definition: wavefront.cc:626
Addr pc() const
Definition: wavefront.cc:1387
bool hasBarrier() const
Definition: wavefront.cc:1452
VectorMask _execMask
Definition: wavefront.hh:330
uint32_t actualWgSzTotal
Definition: wavefront.hh:164
void reserveGmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:784
uint64_t oldVgprTcnt
Definition: wavefront.hh:209
std::vector< Addr > lastAddr
Definition: wavefront.hh:153
uint32_t oldDgprId
Definition: wavefront.hh:214
void discardFetch()
Definition: wavefront.cc:1210
uint32_t wfId
Definition: wavefront.hh:167
void setStatus(status_e newStatus)
Definition: wavefront.cc:542
bool waitCntsSatisfied()
Definition: wavefront.cc:1223
uint8_t * context
Definition: wavefront.hh:242
uint32_t wgId
Definition: wavefront.hh:160
void validateRequestCounters()
Definition: wavefront.cc:770
int reservedScalarRegs
Definition: wavefront.hh:196
const int simdId
Definition: wavefront.hh:99
int outstandingReqsWrGm
Definition: wavefront.hh:173
bool isOldestInstLMem()
Definition: wavefront.cc:712
bool isOldestInstPrivMem()
Definition: wavefront.cc:725
bool isOldestInstScalarMem()
Definition: wavefront.cc:699
uint32_t maxDynWaveId
Definition: wavefront.hh:168
uint64_t oldDgprTcnt
Definition: wavefront.hh:216
int scalarWrGmReqsInPipe
Definition: wavefront.hh:189
Wavefront(const Params &p)
Definition: wavefront.cc:48
bool isOldestInstBarrier()
Definition: wavefront.cc:673
Tick lastNonIdleTick
Definition: wavefront.hh:114
void resizeRegFiles(int num_vregs, int num_sregs)
Definition: wavefront.cc:531
int scalarOutstandingReqsWrGm
Definition: wavefront.hh:183
uint32_t gridSz[3]
Definition: wavefront.hh:159
void decExpInstsIssued()
Definition: wavefront.cc:1375
std::vector< uint32_t > oldVgpr
Definition: wavefront.hh:205
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition: wavefront.cc:117
void setSleepTime(int sleep_time)
Definition: wavefront.cc:1285
ComputeUnit * computeUnit
Definition: wavefront.hh:106
std::vector< uint32_t > workItemFlatId
Definition: wavefront.hh:155
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition: wavefront.hh:321
std::vector< int > vecReads
Definition: wavefront.hh:237
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:109
bool isOldestInstSleep()
Definition: wavefront.cc:612
int outstandingReqsRdLm
Definition: wavefront.hh:179
bool isLmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:601
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1191
uint64_t lastTrace
Definition: wavefront.hh:192
std::vector< uint32_t > workItemId[3]
Definition: wavefront.hh:154
std::vector< uint64_t > oldDgpr
Definition: wavefront.hh:212
bool isOldestInstScalarALU()
Definition: wavefront.cc:643
int reservedVectorRegs
Definition: wavefront.hh:194
void releaseBarrier()
Definition: wavefront.cc:1458
bool isOldestInstFlatMem()
Definition: wavefront.cc:738
uint32_t dispatchId
Definition: wavefront.hh:169
status_e getStatus()
Definition: wavefront.hh:137
VectorMask initMask
Definition: wavefront.hh:219
WavefrontParams Params
Definition: wavefront.hh:244
uint32_t maxVgprs
Definition: wavefront.hh:131
void decVMemInstsIssued()
Definition: wavefront.cc:1369
void computeActualWgSz(HSAQueueEntry *task)
Definition: wavefront.cc:1427
bool stopFetch()
Definition: wavefront.cc:751
uint32_t workGroupId[3]
Definition: wavefront.hh:157
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
Definition: wavefront.cc:1292
const int wfSlotId
Definition: wavefront.hh:96
void setParent(ComputeUnit *cu)
Definition: wavefront.hh:250
std::unordered_map< int, uint64_t > rawDist
Definition: wavefront.hh:233
void incExpInstsIssued()
Definition: wavefront.cc:1357
std::vector< int > reserveResources()
Definition: wavefront.cc:832
uint32_t startSgprIndex
Definition: wavefront.hh:202
void decLGKMInstsIssued()
Definition: wavefront.cc:1381
int outstandingReqsWrLm
Definition: wavefront.hh:175
void incLGKMInstsIssued()
Definition: wavefront.cc:1363
int barrierId() const
Definition: wavefront.cc:1446
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: wavefront.cc:102
uint32_t workGroupSz[3]
Definition: wavefront.hh:158
uint32_t oldVgprId
Definition: wavefront.hh:207
uint32_t wgSz
Definition: wavefront.hh:161
bool isOldestInstVectorALU()
Definition: wavefront.cc:658
uint64_t lastInstExec
Definition: wavefront.hh:229
LdsChunk * ldsChunk
Definition: wavefront.hh:223
uint32_t actualWgSz[3]
Definition: wavefront.hh:163
int scalarOutstandingReqsRdGm
Definition: wavefront.hh:181
int scalarRdGmReqsInPipe
Definition: wavefront.hh:188
void freeResources()
Definition: wavefront.cc:765
void incVMemInstsIssued()
Definition: wavefront.cc:1351
void reserveLmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:814
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:92
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88
int outstandingReqsRdGm
Definition: wavefront.hh:177
bool isOldestInstGMem()
Definition: wavefront.cc:686
gem5::Wavefront::WavefrontStats stats
VectorMask & execMask()
Definition: wavefront.cc:1399
uint64_t wfDynId
Definition: wavefront.hh:226
void freeRegisterFile()
Freeing VRF space.
Definition: wavefront.cc:1411
bool isGmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:590
void clearWaitCnts()
Definition: wavefront.cc:1337
uint32_t startVgprIndex
Definition: wavefront.hh:199
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition: wavefront.cc:579
TheGpuISA::GPUISA _gpuISA
Definition: wavefront.hh:300
A simple distribution stat.
Definition: statistics.hh:2085
Statistics container.
Definition: group.hh:94
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1931
The GPUDispatcher is the component of the shader that is responsible for creating and dispatching WGs...
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Bitfield< 54 > p
Definition: pagetable.hh:70
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
uint64_t Tick
Tick count type.
Definition: types.hh:58
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
Declaration of Statistics objects.
statistics::Scalar numTimesBlockedDueRAWDependencies
Definition: wavefront.hh:368
statistics::Scalar schResourceStalls
Definition: wavefront.hh:355
WavefrontStats(statistics::Group *parent)
Definition: wavefront.cc:1463
statistics::Distribution vecRawDistance
Definition: wavefront.hh:372
statistics::Distribution readsPerWrite
Definition: wavefront.hh:376
statistics::Scalar schCycles
Definition: wavefront.hh:343
statistics::Scalar numTimesBlockedDueWAXDependencies
Definition: wavefront.hh:365
statistics::Scalar schRfAccessStalls
Definition: wavefront.hh:353
statistics::Scalar schOpdNrdyStalls
Definition: wavefront.hh:357
statistics::Scalar numInstrExecuted
Definition: wavefront.hh:340
statistics::Scalar schStalls
Definition: wavefront.hh:346
statistics::Scalar schLdsArbStalls
Definition: wavefront.hh:361

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1