gem5 v23.0.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
shader.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __SHADER_HH__
33#define __SHADER_HH__
34
35#include <functional>
36#include <string>
37
38#include "arch/gpu_isa.hh"
39#include "base/statistics.hh"
40#include "base/stats/group.hh"
41#include "base/types.hh"
42#include "cpu/simple/atomic.hh"
43#include "cpu/simple/timing.hh"
44#include "cpu/simple_thread.hh"
45#include "cpu/thread_context.hh"
46#include "cpu/thread_state.hh"
52#include "mem/page_table.hh"
53#include "mem/port.hh"
54#include "mem/request.hh"
55#include "params/Shader.hh"
56#include "sim/faults.hh"
57#include "sim/process.hh"
58#include "sim/sim_object.hh"
59
60namespace gem5
61{
62
63class BaseTLB;
64class GPUCommandProcessor;
65class GPUDispatcher;
66
67static const int LDS_SIZE = 65536;
68
69// aperture (APE) registers define the base/limit
70// pair for the ATC mapped memory space. currently
71// the only APEs we consider are for GPUVM/LDS/scratch.
72// the APEs are registered with unique values based
73// on a per-device basis
75{
78};
79
80// Class Shader: This describes a single shader instance. Most
81// configurations will only have a single shader.
82
83class Shader : public ClockedObject
84{
85 private:
90
91 // Hardware regs accessed by getreg/setreg instructions, set by queues
92 std::unordered_map<int, uint32_t> hwRegs;
93
94 // Number of active Cus attached to this shader
96
97 // Last tick that all CUs attached to this shader were inactive
99
100 public:
101 typedef ShaderParams Params;
103
105 void sampleLoad(const Tick accessTime);
106 void sampleStore(const Tick accessTime);
107 void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
108 void sampleLineRoundTrip(const std::map<Addr,
109 std::vector<Tick>> &roundTripTime);
110
114
115 void
116 setHwReg(int regIdx, uint32_t val)
117 {
118 hwRegs[regIdx] = val;
119 }
120
121 uint32_t
122 getHwReg(int regIdx)
123 {
124 return hwRegs[regIdx];
125 }
126
127 const ApertureRegister&
128 gpuVmApe() const
129 {
130 return _gpuVmApe;
131 }
132
133 const ApertureRegister&
134 ldsApe() const
135 {
136 return _ldsApe;
137 }
138
139 void
141 {
142 _ldsApe.base = base;
144 }
145
146 const ApertureRegister&
148 {
149 return _scratchApe;
150 }
151
152 void
154 {
157 }
158
159 bool
161 {
162 bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
163
164 return is_gpu_vm;
165 }
166
167 bool
169 {
170 bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
171
172 return is_lds;
173 }
174
175 bool
177 {
178 bool is_scratch
180
181 return is_scratch;
182 }
183
184 Addr
186 {
187 return _scratchApe.base;
188 }
189
190 Addr
192 {
194 }
195
196 void
198 {
199 Addr sh_hidden_base_new = queueBase - offset;
200
201 // We are initializing sh_hidden_private_base_vmid from the
202 // amd queue descriptor from the first queue.
203 // The sh_hidden_private_base_vmid is supposed to be same for
204 // all the queues from the same process
205 if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
206 // Do not panic if shHiddenPrivateBaseVmid == 0,
207 // that is if it is uninitialized. Panic only
208 // if the value is initilized and we get
209 // a differnt base later.
211 "Currently we support only single process\n");
212 }
213 shHiddenPrivateBaseVmid = sh_hidden_base_new;
214 }
215
217
219
220 // is this simulation going to be timing mode in the memory?
223
224 // If set, issue acq packet @ kernel launch
226 // If set, issue rel packet @ kernel end
228 // If set, fetch returns may be coissued with instructions
230 // If set, always dump all 64 gprs to trace
232 // Number of cu units in the shader
233 int n_cu;
234 // Number of wavefront slots per SIMD per CU
235 int n_wf;
236
237 // The size of global memory
239
240 // Tracks CU that rr dispatcher should attempt scheduling
242
243 // Size of scheduled add queue
244 uint32_t sa_n;
245
246 // Pointer to value to be increments
248 // When to do the increment
250 // Amount to increment by
252
253 // List of Compute Units (CU's)
255
259
262
263 Shader(const Params &p);
264 ~Shader();
265 virtual void init();
266
267 // Run shader scheduled adds
268 void execScheduledAdds();
269
270 // Schedule a 32-bit value to be incremented some time in the future
271 void ScheduleAdd(int *val, Tick when, int x);
273
274 void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
275 MemCmd cmd, bool suppress_func_errors);
276
277 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
278
279 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
280 bool suppress_func_errors);
281
282 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
283
284 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
285 bool suppress_func_errors);
286
287 void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
288 bool suppress_func_errors, int cu_id);
289
290 void
291 registerCU(int cu_id, ComputeUnit *compute_unit)
292 {
293 cuList[cu_id] = compute_unit;
294 }
295
297 void prepareFlush(GPUDynInstPtr gpuDynInst);
298
300 Addr mmap(int length);
301 void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
302 void updateContext(int cid);
303 void notifyCuSleep();
304
305 void
306 incVectorInstSrcOperand(int num_operands)
307 {
308 stats.vectorInstSrcOperand[num_operands]++;
309 }
310
311 void
312 incVectorInstDstOperand(int num_operands)
313 {
314 stats.vectorInstDstOperand[num_operands]++;
315 }
316
317 protected:
319 {
320 ShaderStats(statistics::Group *parent, int wf_size);
321
322 // some stats for measuring latency
326
327 // average ticks from vmem inst initiateAcc to coalescer issue,
329
330 // average ticks from coalescer issue to coalescer hit callback,
332
333 // average ticks from coalescer hit callback to GM pipe enqueue,
335
336 // average ticks spent in GM pipe's ordered resp buffer.
338
339 // average number of cache blocks requested by vmem inst
341
342 // average ticks for cache blocks to main memory for the Nth
343 // cache block generated by a vmem inst.
345
350};
351
352} // namespace gem5
353
354#endif // __SHADER_HH__
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
const char data[]
This class handles reads from the system/host memory space from the shader.
Definition system_hub.hh:51
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
bool timingSim
Definition shader.hh:221
Addr mmap(int length)
Definition shader.cc:105
void prepareInvalidate(HSAQueueEntry *task)
Definition shader.cc:191
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition shader.cc:376
void notifyCuSleep()
Definition shader.cc:517
void setLdsApe(Addr base, Addr limit)
Definition shader.hh:140
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition shader.cc:284
void execScheduledAdds()
Definition shader.cc:161
int64_t total_valu_insts
Definition shader.hh:261
ApertureRegister _scratchApe
Definition shader.hh:88
void setScratchApe(Addr base, Addr limit)
Definition shader.hh:153
hsail_mode_e hsail_mode
Definition shader.hh:222
EventFunctionWrapper tickEvent
Definition shader.hh:218
std::unordered_map< int, uint32_t > hwRegs
Definition shader.hh:92
std::vector< ComputeUnit * > cuList
Definition shader.hh:254
ApertureRegister _ldsApe
Definition shader.hh:87
ApertureRegister _gpuVmApe
Definition shader.hh:86
const ApertureRegister & scratchApe() const
Definition shader.hh:147
int nextSchedCu
Definition shader.hh:241
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition shader.hh:291
void ScheduleAdd(int *val, Tick when, int x)
Definition shader.cc:357
GPUDispatcher & _dispatcher
Definition shader.hh:257
uint32_t sa_n
Definition shader.hh:244
int trace_vgpr_all
Definition shader.hh:231
ShaderParams Params
Definition shader.hh:101
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick > > &roundTripTime)
Definition shader.cc:488
std::vector< uint64_t > sa_when
Definition shader.hh:249
bool processTimingPacket(PacketPtr pkt)
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition shader.cc:138
bool isScratchApe(Addr addr) const
Definition shader.hh:176
int coissue_return
Definition shader.hh:229
std::vector< int32_t > sa_x
Definition shader.hh:251
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition shader.cc:394
gem5::Shader::ShaderStats stats
const ApertureRegister & ldsApe() const
Definition shader.hh:134
bool isLdsApe(Addr addr) const
Definition shader.hh:168
ThreadContext * gpuTc
Definition shader.hh:112
bool isGpuVmApe(Addr addr) const
Definition shader.hh:160
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition shader.cc:235
GPUDispatcher & dispatcher()
Definition shader.cc:99
Addr getScratchBase()
Definition shader.hh:185
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition shader.hh:197
int impl_kern_launch_acq
Definition shader.hh:225
void incVectorInstDstOperand(int num_operands)
Definition shader.hh:312
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from CP.
Definition shader.cc:530
AMDGPUSystemHub * systemHub
Definition shader.hh:258
void setHwReg(int regIdx, uint32_t val)
Definition shader.hh:116
SimpleThread * cpuThread
Definition shader.hh:111
void updateContext(int cid)
Definition shader.cc:153
int64_t max_valu_insts
Definition shader.hh:260
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition shader.cc:408
int globalMemSize
Definition shader.hh:238
int impl_kern_end_rel
Definition shader.hh:227
uint32_t getHwReg(int regIdx)
Definition shader.hh:122
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition shader.cc:222
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition shader.cc:468
void sampleLoad(const Tick accessTime)
Definition shader.cc:461
const ApertureRegister & gpuVmApe() const
Definition shader.hh:128
void incVectorInstSrcOperand(int num_operands)
Definition shader.hh:306
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition shader.cc:427
void sampleStore(const Tick accessTime)
Definition shader.cc:451
BaseCPU * cpuPointer
Definition shader.hh:113
GPUCommandProcessor & gpuCmdProc
Definition shader.hh:256
Addr shHiddenPrivateBaseVmid
Definition shader.hh:89
Tick _lastInactiveTick
Definition shader.hh:98
std::vector< int * > sa_val
Definition shader.hh:247
Addr getHiddenPrivateBase()
Definition shader.hh:191
int _activeCus
Definition shader.hh:95
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
ThreadContext is the external interface to all thread state for anything outside of the CPU.
A simple distribution stat.
Statistics container.
Definition group.hh:93
This is a simple scalar statistic, like a counter.
A vector of scalar stats.
STL vector class.
Definition stl.hh:37
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
HSAQueuEntry is the simulator's internal representation of an AQL queue entry (task).
Port Object Declaration.
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 0 > p
Bitfield< 3 > x
Definition pagetable.hh:73
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 63 > val
Definition misc.hh:776
BitfieldType< SegDescriptorLimit > limit
Definition misc.hh:931
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
uint64_t Tick
Tick count type.
Definition types.hh:58
uint16_t RequestorID
Definition request.hh:95
static const int LDS_SIZE
Definition shader.hh:67
Declarations of a non-full system Page Table.
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Declaration of Statistics objects.
statistics::Vector vectorInstSrcOperand
Definition shader.hh:347
statistics::Distribution storeLatencyDist
Definition shader.hh:325
statistics::Distribution initToCoalesceLatency
Definition shader.hh:328
statistics::Scalar shaderActiveTicks
Definition shader.hh:346
statistics::Distribution loadLatencyDist
Definition shader.hh:324
statistics::Distribution allLatencyDist
Definition shader.hh:323
statistics::Distribution gmToCompleteLatency
Definition shader.hh:337
statistics::Distribution coalsrLineAddresses
Definition shader.hh:340
statistics::Vector vectorInstDstOperand
Definition shader.hh:348
statistics::Distribution rubyNetworkLatency
Definition shader.hh:331
statistics::Distribution * cacheBlockRoundTrip
Definition shader.hh:344
statistics::Distribution gmEnqueueLatency
Definition shader.hh:334

Generated on Mon Jul 10 2023 15:32:03 for gem5 by doxygen 1.9.7