gem5 v24.0.0.0
Loading...
Searching...
No Matches
shader.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __SHADER_HH__
33#define __SHADER_HH__
34
35#include <functional>
36#include <string>
37
38#include "arch/gpu_isa.hh"
39#include "base/statistics.hh"
40#include "base/stats/group.hh"
41#include "base/types.hh"
42#include "cpu/simple/atomic.hh"
43#include "cpu/simple/timing.hh"
44#include "cpu/simple_thread.hh"
45#include "cpu/thread_context.hh"
46#include "cpu/thread_state.hh"
52#include "mem/page_table.hh"
53#include "mem/port.hh"
54#include "mem/request.hh"
55#include "params/Shader.hh"
56#include "sim/faults.hh"
57#include "sim/process.hh"
58#include "sim/sim_object.hh"
59
60namespace gem5
61{
62
63class BaseTLB;
64class GPUCommandProcessor;
65class GPUDispatcher;
66
67static const int LDS_SIZE = 65536;
68
69// aperture (APE) registers define the base/limit
70// pair for the ATC mapped memory space. currently
71// the only APEs we consider are for GPUVM/LDS/scratch.
72// the APEs are registered with unique values based
73// on a per-device basis
79
80// Class Shader: This describes a single shader instance. Most
81// configurations will only have a single shader.
82
83class Shader : public ClockedObject
84{
85 private:
90
91 // Hardware regs accessed by getreg/setreg instructions, set by queues
92 std::unordered_map<int, uint32_t> hwRegs;
93
94 // Number of active Cus attached to this shader
96
97 // Last tick that all CUs attached to this shader were inactive
99
100 // If a kernel-based exit event was requested, wait for all CUs in the
101 // shader to complete before actually exiting so that stats are updated.
103
104 // Set to true by the dispatcher if the current kernel is a blit kernel
105 bool blitKernel = false;
106
107 // Number of pending non-instruction invalidates outstanding. The shader
108 // should wait for these to be done to ensure correctness.
111
112 public:
113 typedef ShaderParams Params;
115
117 void sampleLoad(const Tick accessTime);
118 void sampleStore(const Tick accessTime);
119 void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
120 void sampleLineRoundTrip(const std::map<Addr,
121 std::vector<Tick>> &roundTripTime);
122
126
127 void
128 setHwReg(int regIdx, uint32_t val)
129 {
130 hwRegs[regIdx] = val;
131 }
132
133 uint32_t
134 getHwReg(int regIdx)
135 {
136 return hwRegs[regIdx];
137 }
138
139 const ApertureRegister&
140 gpuVmApe() const
141 {
142 return _gpuVmApe;
143 }
144
145 const ApertureRegister&
146 ldsApe() const
147 {
148 return _ldsApe;
149 }
150
151 void
157
158 const ApertureRegister&
160 {
161 return _scratchApe;
162 }
163
164 void
170
171 bool
173 {
174 bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
175
176 return is_gpu_vm;
177 }
178
179 bool
181 {
182 bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
183
184 return is_lds;
185 }
186
187 bool
189 {
190 bool is_scratch
192
193 return is_scratch;
194 }
195
196 Addr
198 {
199 return _scratchApe.base;
200 }
201
202 Addr
207
208 void
210 {
211 Addr sh_hidden_base_new = queueBase - offset;
212
213 // We are initializing sh_hidden_private_base_vmid from the
214 // amd queue descriptor from the first queue.
215 // The sh_hidden_private_base_vmid is supposed to be same for
216 // all the queues from the same process
217 if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
218 // Do not panic if shHiddenPrivateBaseVmid == 0,
219 // that is if it is uninitialized. Panic only
220 // if the value is initilized and we get
221 // a differnt base later.
223 "Currently we support only single process\n");
224 }
225 shHiddenPrivateBaseVmid = sh_hidden_base_new;
226 }
227
229
231
232 // is this simulation going to be timing mode in the memory?
235
236 // If set, issue acq packet @ kernel launch
238 // If set, issue rel packet @ kernel end
240 // If set, fetch returns may be coissued with instructions
242 // If set, always dump all 64 gprs to trace
244 // Number of cu units in the shader
245 int n_cu;
246 // Number of wavefront slots per SIMD per CU
247 int n_wf;
248 //Number of cu units per sqc in the shader
250
251 // The size of global memory
253
254 // Tracks CU that rr dispatcher should attempt scheduling
256
257 // Size of scheduled add queue
258 uint32_t sa_n;
259
260 // Pointer to value to be increments
262 // When to do the increment
264 // Amount to increment by
266
267 // List of Compute Units (CU's)
269
273
276
277 Shader(const Params &p);
278 ~Shader();
279 virtual void init();
280
281 // Run shader scheduled adds
282 void execScheduledAdds();
283
284 // Schedule a 32-bit value to be incremented some time in the future
285 void ScheduleAdd(int *val, Tick when, int x);
287
288 void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
289 MemCmd cmd, bool suppress_func_errors);
290
291 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
292
293 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
294 bool suppress_func_errors);
295
296 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
297
298 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
299 bool suppress_func_errors);
300
301 void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
302 bool suppress_func_errors, int cu_id);
303
304 void
305 registerCU(int cu_id, ComputeUnit *compute_unit)
306 {
307 cuList[cu_id] = compute_unit;
308 }
309
311 void prepareFlush(GPUDynInstPtr gpuDynInst);
312
314 Addr mmap(int length);
315 void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
316 void updateContext(int cid);
317 void notifyCuSleep();
318
319 void
320 incVectorInstSrcOperand(int num_operands)
321 {
322 stats.vectorInstSrcOperand[num_operands]++;
323 }
324
325 void
326 incVectorInstDstOperand(int num_operands)
327 {
328 stats.vectorInstDstOperand[num_operands]++;
329 }
330
331 void
332 requestKernelExitEvent(bool is_blit_kernel)
333 {
334 kernelExitRequested = true;
335 blitKernel = is_blit_kernel;
336 }
337
341
342 void addDeferredDispatch(void *raw_pkt, uint32_t queue_id,
343 Addr host_pkt_addr);
344
345 protected:
347 {
348 ShaderStats(statistics::Group *parent, int wf_size);
349
350 // some stats for measuring latency
354
355 // average ticks from vmem inst initiateAcc to coalescer issue,
357
358 // average ticks from coalescer issue to coalescer hit callback,
360
361 // average ticks from coalescer hit callback to GM pipe enqueue,
363
364 // average ticks spent in GM pipe's ordered resp buffer.
366
367 // average number of cache blocks requested by vmem inst
369
370 // average ticks for cache blocks to main memory for the Nth
371 // cache block generated by a vmem inst.
373
378};
379
380} // namespace gem5
381
382#endif // __SHADER_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
const char data[]
This class handles reads from the system/host memory space from the shader.
Definition system_hub.hh:51
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
bool kernelExitRequested
Definition shader.hh:102
bool timingSim
Definition shader.hh:233
Addr mmap(int length)
Definition shader.cc:117
void prepareInvalidate(HSAQueueEntry *task)
Definition shader.cc:203
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition shader.cc:395
bool blitKernel
Definition shader.hh:105
void notifyCuSleep()
Definition shader.cc:536
void setLdsApe(Addr base, Addr limit)
Definition shader.hh:152
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition shader.cc:303
void execScheduledAdds()
Definition shader.cc:173
int64_t total_valu_insts
Definition shader.hh:275
ApertureRegister _scratchApe
Definition shader.hh:88
void setScratchApe(Addr base, Addr limit)
Definition shader.hh:165
hsail_mode_e hsail_mode
Definition shader.hh:234
EventFunctionWrapper tickEvent
Definition shader.hh:230
std::unordered_map< int, uint32_t > hwRegs
Definition shader.hh:92
std::vector< ComputeUnit * > cuList
Definition shader.hh:268
ApertureRegister _ldsApe
Definition shader.hh:87
ApertureRegister _gpuVmApe
Definition shader.hh:86
const ApertureRegister & scratchApe() const
Definition shader.hh:159
void addDeferredDispatch(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
Definition shader.cc:571
int nextSchedCu
Definition shader.hh:255
void incNumOutstandingInvL2s()
Definition shader.hh:339
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition shader.hh:305
void ScheduleAdd(int *val, Tick when, int x)
Definition shader.cc:376
GPUDispatcher & _dispatcher
Definition shader.hh:271
uint32_t sa_n
Definition shader.hh:258
int trace_vgpr_all
Definition shader.hh:243
ShaderParams Params
Definition shader.hh:113
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick > > &roundTripTime)
Definition shader.cc:507
std::vector< uint64_t > sa_when
Definition shader.hh:263
bool processTimingPacket(PacketPtr pkt)
int getNumOutstandingInvL2s() const
Definition shader.hh:340
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition shader.cc:150
bool isScratchApe(Addr addr) const
Definition shader.hh:188
int coissue_return
Definition shader.hh:241
std::vector< int32_t > sa_x
Definition shader.hh:265
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition shader.cc:413
gem5::Shader::ShaderStats stats
const ApertureRegister & ldsApe() const
Definition shader.hh:146
std::vector< std::tuple< void *, uint32_t, Addr > > deferred_dispatches
Definition shader.hh:110
bool isLdsApe(Addr addr) const
Definition shader.hh:180
ThreadContext * gpuTc
Definition shader.hh:124
bool isGpuVmApe(Addr addr) const
Definition shader.hh:172
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition shader.cc:254
GPUDispatcher & dispatcher()
Definition shader.cc:111
Addr getScratchBase()
Definition shader.hh:197
Shader(const Params &p)
Definition shader.cc:57
void decNumOutstandingInvL2s()
Definition shader.cc:556
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition shader.hh:209
int impl_kern_launch_acq
Definition shader.hh:237
void incVectorInstDstOperand(int num_operands)
Definition shader.hh:326
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from CP.
Definition shader.cc:582
AMDGPUSystemHub * systemHub
Definition shader.hh:272
void setHwReg(int regIdx, uint32_t val)
Definition shader.hh:128
SimpleThread * cpuThread
Definition shader.hh:123
void updateContext(int cid)
Definition shader.cc:165
int n_cu_per_sqc
Definition shader.hh:249
int64_t max_valu_insts
Definition shader.hh:274
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition shader.cc:427
int globalMemSize
Definition shader.hh:252
void requestKernelExitEvent(bool is_blit_kernel)
Definition shader.hh:332
int impl_kern_end_rel
Definition shader.hh:239
uint32_t getHwReg(int regIdx)
Definition shader.hh:134
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition shader.cc:241
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition shader.cc:487
void sampleLoad(const Tick accessTime)
Definition shader.cc:480
const ApertureRegister & gpuVmApe() const
Definition shader.hh:140
void incVectorInstSrcOperand(int num_operands)
Definition shader.hh:320
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition shader.cc:446
int num_outstanding_invl2s
Definition shader.hh:109
void sampleStore(const Tick accessTime)
Definition shader.cc:470
BaseCPU * cpuPointer
Definition shader.hh:125
GPUCommandProcessor & gpuCmdProc
Definition shader.hh:270
Addr shHiddenPrivateBaseVmid
Definition shader.hh:89
Tick _lastInactiveTick
Definition shader.hh:98
std::vector< int * > sa_val
Definition shader.hh:261
Addr getHiddenPrivateBase()
Definition shader.hh:203
int _activeCus
Definition shader.hh:95
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
ThreadContext is the external interface to all thread state for anything outside of the CPU.
A simple distribution stat.
Statistics container.
Definition group.hh:93
This is a simple scalar statistic, like a counter.
A vector of scalar stats.
STL vector class.
Definition stl.hh:37
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Port Object Declaration.
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 0 > p
Bitfield< 3 > x
Definition pagetable.hh:73
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 63 > val
Definition misc.hh:804
BitfieldType< SegDescriptorLimit > limit
Definition misc.hh:959
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
uint16_t RequestorID
Definition request.hh:95
static const int LDS_SIZE
Definition shader.hh:67
Declarations of a non-full system Page Table.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Declaration of Statistics objects.
statistics::Vector vectorInstSrcOperand
Definition shader.hh:375
statistics::Distribution storeLatencyDist
Definition shader.hh:353
statistics::Distribution initToCoalesceLatency
Definition shader.hh:356
statistics::Scalar shaderActiveTicks
Definition shader.hh:374
statistics::Distribution loadLatencyDist
Definition shader.hh:352
statistics::Distribution allLatencyDist
Definition shader.hh:351
statistics::Distribution gmToCompleteLatency
Definition shader.hh:365
ShaderStats(statistics::Group *parent, int wf_size)
Definition shader.cc:587
statistics::Distribution coalsrLineAddresses
Definition shader.hh:368
statistics::Vector vectorInstDstOperand
Definition shader.hh:376
statistics::Distribution rubyNetworkLatency
Definition shader.hh:359
statistics::Distribution * cacheBlockRoundTrip
Definition shader.hh:372
statistics::Distribution gmEnqueueLatency
Definition shader.hh:362

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0