gem5  v21.0.1.0
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __SHADER_HH__
35 #define __SHADER_HH__
36 
37 #include <functional>
38 #include <string>
39 
40 #include "arch/isa.hh"
41 #include "base/statistics.hh"
42 #include "base/stats/group.hh"
43 #include "base/types.hh"
44 #include "cpu/simple/atomic.hh"
45 #include "cpu/simple/timing.hh"
46 #include "cpu/simple_thread.hh"
47 #include "cpu/thread_context.hh"
48 #include "cpu/thread_state.hh"
51 #include "gpu-compute/gpu_tlb.hh"
53 #include "gpu-compute/lds_state.hh"
54 #include "mem/page_table.hh"
55 #include "mem/port.hh"
56 #include "mem/request.hh"
57 #include "params/Shader.hh"
58 #include "sim/faults.hh"
59 #include "sim/process.hh"
60 #include "sim/sim_object.hh"
61 
62 class BaseTLB;
64 class GPUDispatcher;
65 
66 namespace TheISA
67 {
68  class GpuTLB;
69 }
70 
71 static const int LDS_SIZE = 65536;
72 
73 // aperture (APE) registers define the base/limit
74 // pair for the ATC mapped memory space. currently
75 // the only APEs we consider are for GPUVM/LDS/scratch.
76 // the APEs are registered with unique values based
77 // on a per-device basis
79 {
82 };
83 
84 // Class Shader: This describes a single shader instance. Most
85 // configurations will only have a single shader.
86 
87 class Shader : public ClockedObject
88 {
89  private:
94 
95  // Number of active Cus attached to this shader
97 
98  // Last tick that all CUs attached to this shader were inactive
100 
101  public:
102  typedef ShaderParams Params;
104 
106  void sampleLoad(const Tick accessTime);
107  void sampleStore(const Tick accessTime);
108  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
109  void sampleLineRoundTrip(const std::map<Addr,
110  std::vector<Tick>> &roundTripTime);
111 
115 
116  const ApertureRegister&
117  gpuVmApe() const
118  {
119  return _gpuVmApe;
120  }
121 
122  const ApertureRegister&
123  ldsApe() const
124  {
125  return _ldsApe;
126  }
127 
128  const ApertureRegister&
129  scratchApe() const
130  {
131  return _scratchApe;
132  }
133 
134  bool
136  {
137  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
138 
139  return is_gpu_vm;
140  }
141 
142  bool
144  {
145  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
146 
147  return is_lds;
148  }
149 
150  bool
152  {
153  bool is_scratch
155 
156  return is_scratch;
157  }
158 
159  Addr
161  {
162  return _scratchApe.base;
163  }
164 
165  Addr
167  {
169  }
170 
171  void
172  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
173  {
174  Addr sh_hidden_base_new = queueBase - offset;
175 
176  // We are initializing sh_hidden_private_base_vmid from the
177  // amd queue descriptor from the first queue.
178  // The sh_hidden_private_base_vmid is supposed to be same for
179  // all the queues from the same process
180  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
181  // Do not panic if shHiddenPrivateBaseVmid == 0,
182  // that is if it is uninitialized. Panic only
183  // if the value is initilized and we get
184  // a differnt base later.
186  "Currently we support only single process\n");
187  }
188  shHiddenPrivateBaseVmid = sh_hidden_base_new;
189  }
190 
192 
193  // is this simulation going to be timing mode in the memory?
194  bool timingSim;
196 
197  // If set, issue acq packet @ kernel launch
199  // If set, issue rel packet @ kernel end
201  // If set, fetch returns may be coissued with instructions
203  // If set, always dump all 64 gprs to trace
205  // Number of cu units in the shader
206  int n_cu;
207  // Number of wavefront slots per SIMD per CU
208  int n_wf;
209 
210  // The size of global memory
212 
213  // Tracks CU that rr dispatcher should attempt scheduling
215 
216  // Size of scheduled add queue
217  uint32_t sa_n;
218 
219  // Pointer to value to be increments
221  // When to do the increment
223  // Amount to increment by
225 
226  // List of Compute Units (CU's)
228 
231 
232  int64_t max_valu_insts;
234 
235  Shader(const Params &p);
236  ~Shader();
237  virtual void init();
238 
239  // Run shader scheduled adds
240  void execScheduledAdds();
241 
242  // Schedule a 32-bit value to be incremented some time in the future
243  void ScheduleAdd(int *val, Tick when, int x);
244  bool processTimingPacket(PacketPtr pkt);
245 
246  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
247  MemCmd cmd, bool suppress_func_errors);
248 
249  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
250 
251  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
252  bool suppress_func_errors);
253 
254  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
255 
256  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
257  bool suppress_func_errors);
258 
259  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
260  bool suppress_func_errors, int cu_id);
261 
262  void
263  registerCU(int cu_id, ComputeUnit *compute_unit)
264  {
265  cuList[cu_id] = compute_unit;
266  }
267 
268  void prepareInvalidate(HSAQueueEntry *task);
269  void prepareFlush(GPUDynInstPtr gpuDynInst);
270 
271  bool dispatchWorkgroups(HSAQueueEntry *task);
272  Addr mmap(int length);
273  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
274  void updateContext(int cid);
275  void notifyCuSleep();
276 
277  void
278  incVectorInstSrcOperand(int num_operands)
279  {
280  stats.vectorInstSrcOperand[num_operands]++;
281  }
282 
283  void
284  incVectorInstDstOperand(int num_operands)
285  {
286  stats.vectorInstDstOperand[num_operands]++;
287  }
288 
289  protected:
290  struct ShaderStats : public Stats::Group
291  {
292  ShaderStats(Stats::Group *parent, int wf_size);
293 
294  // some stats for measuring latency
298 
299  // average ticks from vmem inst initiateAcc to coalescer issue,
301 
302  // average ticks from coalescer issue to coalescer hit callback,
304 
305  // average ticks from coalescer hit callback to GM pipe enqueue,
307 
308  // average ticks spent in GM pipe's ordered resp buffer.
310 
311  // average number of cache blocks requested by vmem inst
313 
314  // average ticks for cache blocks to main memory for the Nth
315  // cache block generated by a vmem inst.
317 
321  } stats;
322 };
323 
324 #endif // __SHADER_HH__
Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:135
hsa_queue_entry.hh
thread_state.hh
Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:224
Shader::stats
Shader::ShaderStats stats
Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:200
GPUCommandProcessor
Definition: gpu_command_processor.hh:57
Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
Definition: shader.cc:426
Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:152
Shader
Definition: shader.hh:87
data
const char data[]
Definition: circlebuf.test.cc:47
Shader::ShaderStats::initToCoalesceLatency
Stats::Distribution initToCoalesceLatency
Definition: shader.hh:300
group.hh
timing.hh
Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:191
atomic.hh
Shader::mmap
Addr mmap(int length)
Definition: shader.cc:104
Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:172
Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:263
Shader::timingSim
bool timingSim
Definition: shader.hh:194
Shader::ShaderStats::vectorInstDstOperand
Stats::Vector vectorInstDstOperand
Definition: shader.hh:320
Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:129
Shader::ShaderStats::rubyNetworkLatency
Stats::Distribution rubyNetworkLatency
Definition: shader.hh:303
Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:99
compute_unit.hh
Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:221
TheISA
Definition: thread_context.hh:52
Shader::globalMemSize
int globalMemSize
Definition: shader.hh:211
BaseTLB::Mode
Mode
Definition: tlb.hh:57
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:59
Shader::n_wf
int n_wf
Definition: shader.hh:208
Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:407
Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:214
Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:487
Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:93
ApertureRegister::limit
Addr limit
Definition: shader.hh:81
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:86
std::vector< Tick >
Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:190
Shader::ShaderStats::loadLatencyDist
Stats::Distribution loadLatencyDist
Definition: shader.hh:296
Shader::ShaderStats::vectorInstSrcOperand
Stats::Vector vectorInstSrcOperand
Definition: shader.hh:319
Shader::coissue_return
int coissue_return
Definition: shader.hh:202
faults.hh
Stats::Vector
A vector of scalar stats.
Definition: statistics.hh:2007
HSAQueueEntry
Definition: hsa_queue_entry.hh:58
Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:393
request.hh
BaseTLB
Definition: tlb.hh:50
Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:283
Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:160
GPUDispatcher
Definition: dispatcher.hh:61
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:90
Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:113
Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:137
EventFunctionWrapper
Definition: eventq.hh:1112
Shader::~Shader
~Shader()
Definition: shader.cc:145
Shader::ShaderStats::shaderActiveTicks
Stats::Scalar shaderActiveTicks
Definition: shader.hh:318
Stats::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1933
ComputeUnit
Definition: compute_unit.hh:200
Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:98
Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:229
ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:88
Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:195
sim_object.hh
Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:375
Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:91
MemCmd
Definition: packet.hh:72
Shader::ShaderStats::coalsrLineAddresses
Stats::Distribution coalsrLineAddresses
Definition: shader.hh:312
statistics.hh
Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:90
Shader::_activeCus
int _activeCus
Definition: shader.hh:96
Shader::sa_n
uint32_t sa_n
Definition: shader.hh:217
process.hh
Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:123
ArmISA::mode
Bitfield< 4, 0 > mode
Definition: miscregs_types.hh:70
Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:103
port.hh
ApertureRegister
Definition: shader.hh:78
Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:166
gpu_dyn_inst.hh
Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:220
Shader::ShaderStats::allLatencyDist
Stats::Distribution allLatencyDist
Definition: shader.hh:295
Shader::Params
ShaderParams Params
Definition: shader.hh:102
Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:204
Shader::ShaderStats::cacheBlockRoundTrip
Stats::Distribution * cacheBlockRoundTrip
Definition: shader.hh:316
RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:70
Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:103
gpu_tlb.hh
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:467
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
Stats::Distribution
A simple distribution stat.
Definition: statistics.hh:2084
Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:284
X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:80
BaseCPU
Definition: base.hh:104
Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:160
Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:232
Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:233
LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:71
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
simple_thread.hh
Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:222
Shader::ShaderStats
Definition: shader.hh:290
types.hh
Shader::n_cu
int n_cu
Definition: shader.hh:206
Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:356
Shader::ShaderStats::ShaderStats
ShaderStats(Stats::Group *parent, int wf_size)
Definition: shader.cc:525
Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:278
Shader::SIMT
@ SIMT
Definition: shader.hh:103
Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:114
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:258
Stats::Group
Statistics container.
Definition: group.hh:87
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:151
Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:143
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:112
Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:198
Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
page_table.hh
Shader::ShaderStats::gmEnqueueLatency
Stats::Distribution gmEnqueueLatency
Definition: shader.hh:306
Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:460
Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:234
ApertureRegister::base
Addr base
Definition: shader.hh:80
Shader::ShaderStats::gmToCompleteLatency
Stats::Distribution gmToCompleteLatency
Definition: shader.hh:309
Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:230
thread_context.hh
Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:450
Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:516
Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:92
Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:227
Shader::Shader
Shader(const Params &p)
Definition: shader.cc:55
Shader::ShaderStats::storeLatencyDist
Stats::Distribution storeLatencyDist
Definition: shader.hh:297
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153
Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:117
lds_state.hh

Generated on Tue Jun 22 2021 15:28:29 for gem5 by doxygen 1.8.17