gem5  v20.1.0.0
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Steve Reinhardt
34  */
35 
36 #ifndef __SHADER_HH__
37 #define __SHADER_HH__
38 
39 #include <functional>
40 #include <string>
41 
42 #include "arch/isa.hh"
43 #include "base/types.hh"
44 #include "cpu/simple/atomic.hh"
45 #include "cpu/simple/timing.hh"
46 #include "cpu/simple_thread.hh"
47 #include "cpu/thread_context.hh"
48 #include "cpu/thread_state.hh"
51 #include "gpu-compute/gpu_tlb.hh"
53 #include "gpu-compute/lds_state.hh"
54 #include "mem/page_table.hh"
55 #include "mem/port.hh"
56 #include "mem/request.hh"
57 #include "params/Shader.hh"
58 #include "sim/faults.hh"
59 #include "sim/process.hh"
60 #include "sim/sim_object.hh"
61 
62 class BaseTLB;
64 class GPUDispatcher;
65 
66 namespace TheISA
67 {
68  class GpuTLB;
69 }
70 
71 static const int LDS_SIZE = 65536;
72 
73 // aperture (APE) registers define the base/limit
74 // pair for the ATC mapped memory space. currently
75 // the only APEs we consider are for GPUVM/LDS/scratch.
76 // the APEs are registered with unique values based
77 // on a per-device basis
79 {
82 };
83 
84 // Class Shader: This describes a single shader instance. Most
85 // configurations will only have a single shader.
86 
87 class Shader : public ClockedObject
88 {
89  private:
94 
95  // Number of active Cus attached to this shader
97 
98  // Last tick that all CUs attached to this shader were inactive
100 
101  // some stats for measuring latency
105 
106  // average ticks from vmem inst initiateAcc to coalescer issue,
107  // average ticks from coalescer issue to coalescer hit callback,
108  // average ticks from coalescer hit callback to GM pipe enqueue,
109  // and average ticks spent in GM pipe's ordered resp buffer.
114 
115  // average number of cache blocks requested by vmem inst, and
116  // average ticks for cache blocks to main memory for the Nth
117  // cache block generated by a vmem inst.
120 
121  public:
122  typedef ShaderParams Params;
124 
126  void sampleLoad(const Tick accessTime);
127  void sampleStore(const Tick accessTime);
128  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
129  void sampleLineRoundTrip(const std::map<Addr,
130  std::vector<Tick>> &roundTripTime);
131 
135 
136  const ApertureRegister&
137  gpuVmApe() const
138  {
139  return _gpuVmApe;
140  }
141 
142  const ApertureRegister&
143  ldsApe() const
144  {
145  return _ldsApe;
146  }
147 
148  const ApertureRegister&
149  scratchApe() const
150  {
151  return _scratchApe;
152  }
153 
154  bool
156  {
157  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
158 
159  return is_gpu_vm;
160  }
161 
162  bool
164  {
165  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
166 
167  return is_lds;
168  }
169 
170  bool
172  {
173  bool is_scratch
175 
176  return is_scratch;
177  }
178 
179  Addr
181  {
182  return _scratchApe.base;
183  }
184 
185  Addr
187  {
189  }
190 
191  void
192  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
193  {
194  Addr sh_hidden_base_new = queueBase - offset;
195 
196  // We are initializing sh_hidden_private_base_vmid from the
197  // amd queue descriptor from the first queue.
198  // The sh_hidden_private_base_vmid is supposed to be same for
199  // all the queues from the same process
200  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
201  // Do not panic if shHiddenPrivateBaseVmid == 0,
202  // that is if it is uninitialized. Panic only
203  // if the value is initilized and we get
204  // a differnt base later.
206  "Currently we support only single process\n");
207  }
208  shHiddenPrivateBaseVmid = sh_hidden_base_new;
209  }
210 
212 
213  // is this simulation going to be timing mode in the memory?
214  bool timingSim;
216 
217  // If set, issue acq packet @ kernel launch
219  // If set, issue rel packet @ kernel end
221  // If set, fetch returns may be coissued with instructions
223  // If set, always dump all 64 gprs to trace
225  // Number of cu units in the shader
226  int n_cu;
227  // Number of wavefront slots per SIMD per CU
228  int n_wf;
229 
230  // The size of global memory
232 
233  // Tracks CU that rr dispatcher should attempt scheduling
235 
236  // Size of scheduled add queue
237  uint32_t sa_n;
238 
239  // Pointer to value to be increments
241  // When to do the increment
243  // Amount to increment by
245 
246  // List of Compute Units (CU's)
248 
251 
258  void regStats();
259 
260  int64_t max_valu_insts;
262 
263  Shader(const Params *p);
264  ~Shader();
265  virtual void init();
266 
267  // Run shader scheduled adds
268  void execScheduledAdds();
269 
270  // Schedule a 32-bit value to be incremented some time in the future
271  void ScheduleAdd(int *val, Tick when, int x);
272  bool processTimingPacket(PacketPtr pkt);
273 
274  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
275  MemCmd cmd, bool suppress_func_errors);
276 
277  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
278 
279  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
280  bool suppress_func_errors);
281 
282  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
283 
284  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
285  bool suppress_func_errors);
286 
287  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
288  bool suppress_func_errors, int cu_id);
289 
290  void
291  registerCU(int cu_id, ComputeUnit *compute_unit)
292  {
293  cuList[cu_id] = compute_unit;
294  }
295 
296  void prepareInvalidate(HSAQueueEntry *task);
297  void prepareFlush(GPUDynInstPtr gpuDynInst);
298 
299  bool dispatchWorkgroups(HSAQueueEntry *task);
300  Addr mmap(int length);
301  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
302  void updateContext(int cid);
303  void notifyCuSleep();
304 };
305 
306 #endif // __SHADER_HH__
Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:155
Shader::vectorInstDstOperand
Stats::Vector vectorInstDstOperand
Definition: shader.hh:257
hsa_queue_entry.hh
thread_state.hh
Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:244
Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:220
length
uint8_t length
Definition: inet.hh:422
GPUCommandProcessor
Definition: gpu_command_processor.hh:57
Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
Definition: shader.cc:501
Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:150
Shader
Definition: shader.hh:87
data
const char data[]
Definition: circlebuf.test.cc:42
Shader::gmToCompleteLatency
Stats::Distribution gmToCompleteLatency
Definition: shader.hh:113
timing.hh
Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:211
atomic.hh
Shader::mmap
Addr mmap(int length)
Definition: shader.cc:102
Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:192
Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:291
Shader::timingSim
bool timingSim
Definition: shader.hh:214
Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:149
Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:99
compute_unit.hh
Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:222
TheISA
Definition: decode_cache.hh:37
Shader::globalMemSize
int globalMemSize
Definition: shader.hh:231
BaseTLB::Mode
Mode
Definition: tlb.hh:57
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
Shader::n_wf
int n_wf
Definition: shader.hh:228
Shader::allLatencyDist
Stats::Distribution allLatencyDist
Definition: shader.hh:102
Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:482
Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:234
Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:562
Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:93
ApertureRegister::limit
Addr limit
Definition: shader.hh:81
RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:82
Shader::gmEnqueueLatency
Stats::Distribution gmEnqueueLatency
Definition: shader.hh:112
std::vector< Tick >
Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:194
Shader::coissue_return
int coissue_return
Definition: shader.hh:222
faults.hh
Stats::Vector
A vector of scalar stats.
Definition: statistics.hh:2575
HSAQueueEntry
Definition: hsa_queue_entry.hh:60
Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:468
request.hh
BaseTLB
Definition: tlb.hh:50
Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:358
Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:164
GPUDispatcher
Definition: dispatcher.hh:60
ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:231
Shader::storeLatencyDist
Stats::Distribution storeLatencyDist
Definition: shader.hh:104
SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:89
Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:133
Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:135
EventFunctionWrapper
Definition: eventq.hh:1101
Shader::~Shader
~Shader()
Definition: shader.cc:143
Stats::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2533
ComputeUnit
Definition: compute_unit.hh:198
Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:96
Shader::initToCoalesceLatency
Stats::Distribution initToCoalesceLatency
Definition: shader.hh:110
Shader::regStats
void regStats()
Callback to set stat parameters.
Definition: shader.cc:278
Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:249
ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:88
Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:215
Shader::vectorInstSrcOperand
Stats::Vector vectorInstSrcOperand
Definition: shader.hh:256
Shader::Shader
Shader(const Params *p)
Definition: shader.cc:54
sim_object.hh
Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:450
Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:91
MemCmd
Definition: packet.hh:71
Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:90
Shader::_activeCus
int _activeCus
Definition: shader.hh:96
Shader::sa_n
uint32_t sa_n
Definition: shader.hh:237
process.hh
Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:143
ArmISA::mode
Bitfield< 4, 0 > mode
Definition: miscregs_types.hh:70
Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:123
port.hh
ApertureRegister
Definition: shader.hh:78
Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:186
gpu_dyn_inst.hh
Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:240
Shader::shaderActiveTicks
Stats::Scalar shaderActiveTicks
Statistics.
Definition: shader.hh:255
Shader::Params
ShaderParams Params
Definition: shader.hh:122
Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:224
RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:69
Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:123
gpu_tlb.hh
X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:542
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Stats::Distribution
A simple distribution stat.
Definition: statistics.hh:2617
BaseCPU
Definition: cpu_dummy.hh:43
Shader::cacheBlockRoundTrip
Stats::Distribution * cacheBlockRoundTrip
Definition: shader.hh:119
Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:180
Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:260
Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:261
LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:71
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
simple_thread.hh
Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:242
types.hh
Shader::n_cu
int n_cu
Definition: shader.hh:226
Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:431
Shader::SIMT
@ SIMT
Definition: shader.hh:123
Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:134
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
Shader::loadLatencyDist
Stats::Distribution loadLatencyDist
Definition: shader.hh:103
addr
ip6_addr_t addr
Definition: inet.hh:423
GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Shader::coalsrLineAddresses
Stats::Distribution coalsrLineAddresses
Definition: shader.hh:118
Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:171
Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:163
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:132
Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:218
Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
page_table.hh
Shader::rubyNetworkLatency
Stats::Distribution rubyNetworkLatency
Definition: shader.hh:111
Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:535
Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
ApertureRegister::base
Addr base
Definition: shader.hh:80
Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:250
thread_context.hh
Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:525
Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:591
Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:92
Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:247
ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:153
Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:137
lds_state.hh

Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17