gem5  v21.2.1.1
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __SHADER_HH__
33 #define __SHADER_HH__
34 
35 #include <functional>
36 #include <string>
37 
38 #include "arch/gpu_isa.hh"
39 #include "base/statistics.hh"
40 #include "base/stats/group.hh"
41 #include "base/types.hh"
42 #include "cpu/simple/atomic.hh"
43 #include "cpu/simple/timing.hh"
44 #include "cpu/simple_thread.hh"
45 #include "cpu/thread_context.hh"
46 #include "cpu/thread_state.hh"
50 #include "gpu-compute/lds_state.hh"
51 #include "mem/page_table.hh"
52 #include "mem/port.hh"
53 #include "mem/request.hh"
54 #include "params/Shader.hh"
55 #include "sim/faults.hh"
56 #include "sim/process.hh"
57 #include "sim/sim_object.hh"
58 
59 namespace gem5
60 {
61 
62 class BaseTLB;
63 class GPUCommandProcessor;
64 class GPUDispatcher;
65 
66 static const int LDS_SIZE = 65536;
67 
68 // aperture (APE) registers define the base/limit
69 // pair for the ATC mapped memory space. currently
70 // the only APEs we consider are for GPUVM/LDS/scratch.
71 // the APEs are registered with unique values based
72 // on a per-device basis
74 {
77 };
78 
79 // Class Shader: This describes a single shader instance. Most
80 // configurations will only have a single shader.
81 
82 class Shader : public ClockedObject
83 {
84  private:
89 
90  // Number of active Cus attached to this shader
92 
93  // Last tick that all CUs attached to this shader were inactive
95 
96  public:
97  typedef ShaderParams Params;
99 
101  void sampleLoad(const Tick accessTime);
102  void sampleStore(const Tick accessTime);
103  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
104  void sampleLineRoundTrip(const std::map<Addr,
105  std::vector<Tick>> &roundTripTime);
106 
109  BaseCPU *cpuPointer;
110 
111  const ApertureRegister&
112  gpuVmApe() const
113  {
114  return _gpuVmApe;
115  }
116 
117  const ApertureRegister&
118  ldsApe() const
119  {
120  return _ldsApe;
121  }
122 
123  const ApertureRegister&
124  scratchApe() const
125  {
126  return _scratchApe;
127  }
128 
129  bool
131  {
132  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
133 
134  return is_gpu_vm;
135  }
136 
137  bool
139  {
140  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
141 
142  return is_lds;
143  }
144 
145  bool
147  {
148  bool is_scratch
150 
151  return is_scratch;
152  }
153 
154  Addr
156  {
157  return _scratchApe.base;
158  }
159 
160  Addr
162  {
164  }
165 
166  void
167  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
168  {
169  Addr sh_hidden_base_new = queueBase - offset;
170 
171  // We are initializing sh_hidden_private_base_vmid from the
172  // amd queue descriptor from the first queue.
173  // The sh_hidden_private_base_vmid is supposed to be same for
174  // all the queues from the same process
175  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
176  // Do not panic if shHiddenPrivateBaseVmid == 0,
177  // that is if it is uninitialized. Panic only
178  // if the value is initilized and we get
179  // a differnt base later.
181  "Currently we support only single process\n");
182  }
183  shHiddenPrivateBaseVmid = sh_hidden_base_new;
184  }
185 
187 
188  // is this simulation going to be timing mode in the memory?
189  bool timingSim;
191 
192  // If set, issue acq packet @ kernel launch
194  // If set, issue rel packet @ kernel end
196  // If set, fetch returns may be coissued with instructions
198  // If set, always dump all 64 gprs to trace
200  // Number of cu units in the shader
201  int n_cu;
202  // Number of wavefront slots per SIMD per CU
203  int n_wf;
204 
205  // The size of global memory
207 
208  // Tracks CU that rr dispatcher should attempt scheduling
210 
211  // Size of scheduled add queue
212  uint32_t sa_n;
213 
214  // Pointer to value to be increments
216  // When to do the increment
218  // Amount to increment by
220 
221  // List of Compute Units (CU's)
223 
226 
227  int64_t max_valu_insts;
229 
230  Shader(const Params &p);
231  ~Shader();
232  virtual void init();
233 
234  // Run shader scheduled adds
235  void execScheduledAdds();
236 
237  // Schedule a 32-bit value to be incremented some time in the future
238  void ScheduleAdd(int *val, Tick when, int x);
239  bool processTimingPacket(PacketPtr pkt);
240 
241  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
242  MemCmd cmd, bool suppress_func_errors);
243 
244  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
245 
246  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
247  bool suppress_func_errors);
248 
249  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
250 
251  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
252  bool suppress_func_errors);
253 
254  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
255  bool suppress_func_errors, int cu_id);
256 
257  void
258  registerCU(int cu_id, ComputeUnit *compute_unit)
259  {
260  cuList[cu_id] = compute_unit;
261  }
262 
263  void prepareInvalidate(HSAQueueEntry *task);
264  void prepareFlush(GPUDynInstPtr gpuDynInst);
265 
266  bool dispatchWorkgroups(HSAQueueEntry *task);
267  Addr mmap(int length);
268  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
269  void updateContext(int cid);
270  void notifyCuSleep();
271 
272  void
273  incVectorInstSrcOperand(int num_operands)
274  {
275  stats.vectorInstSrcOperand[num_operands]++;
276  }
277 
278  void
279  incVectorInstDstOperand(int num_operands)
280  {
281  stats.vectorInstDstOperand[num_operands]++;
282  }
283 
284  protected:
286  {
287  ShaderStats(statistics::Group *parent, int wf_size);
288 
289  // some stats for measuring latency
293 
294  // average ticks from vmem inst initiateAcc to coalescer issue,
296 
297  // average ticks from coalescer issue to coalescer hit callback,
299 
300  // average ticks from coalescer hit callback to GM pipe enqueue,
302 
303  // average ticks spent in GM pipe's ordered resp buffer.
305 
306  // average number of cache blocks requested by vmem inst
308 
309  // average ticks for cache blocks to main memory for the Nth
310  // cache block generated by a vmem inst.
312 
316  } stats;
317 };
318 
319 } // namespace gem5
320 
321 #endif // __SHADER_HH__
gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1930
gem5::Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:221
gem5::Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:258
gem5::Shader::ShaderStats::ShaderStats
ShaderStats(statistics::Group *parent, int wf_size)
Definition: shader.cc:525
gem5::Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:217
hsa_queue_entry.hh
gem5::Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:137
gem5::LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:66
gem5::Shader::~Shader
~Shader()
Definition: shader.cc:145
gem5::Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:407
thread_state.hh
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:108
gem5::Shader::sa_n
uint32_t sa_n
Definition: shader.hh:212
gem5::Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:219
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2084
gem5::Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:107
gem5::Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:460
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::Shader::ShaderStats::vectorInstDstOperand
statistics::Vector vectorInstDstOperand
Definition: shader.hh:315
gem5::Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:225
gem5::Shader::mmap
Addr mmap(int length)
Definition: shader.cc:104
group.hh
gem5::Shader::ShaderStats::gmEnqueueLatency
statistics::Distribution gmEnqueueLatency
Definition: shader.hh:301
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:279
timing.hh
gem5::Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:124
gem5::Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:215
atomic.hh
gem5::Shader::globalMemSize
int globalMemSize
Definition: shader.hh:206
gem5::BaseMMU::Mode
Mode
Definition: mmu.hh:56
gem5::Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:155
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
compute_unit.hh
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:193
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:152
gem5::Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:146
gem5::ApertureRegister::limit
Addr limit
Definition: shader.hh:76
gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:222
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition: shader.cc:426
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:167
gem5::Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:516
gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:2006
gem5::Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:160
std::vector< Tick >
gem5::Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:467
gem5::Shader::ShaderStats::vectorInstSrcOperand
statistics::Vector vectorInstSrcOperand
Definition: shader.hh:314
gem5::Shader::Shader
Shader(const Params &p)
Definition: shader.cc:55
gem5::Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:186
gem5::Shader::ShaderStats::cacheBlockRoundTrip
statistics::Distribution * cacheBlockRoundTrip
Definition: shader.hh:311
faults.hh
gem5::SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:93
request.hh
gem5::Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:98
gem5::Shader::ShaderStats::shaderActiveTicks
statistics::Scalar shaderActiveTicks
Definition: shader.hh:313
gem5::Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:283
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:98
gem5::Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:94
gem5::Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:98
gem5::MemCmd
Definition: packet.hh:75
gem5::Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:86
gem5::Shader::ShaderStats::rubyNetworkLatency
statistics::Distribution rubyNetworkLatency
Definition: shader.hh:298
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:393
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:69
sim_object.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:130
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
statistics.hh
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::Shader::stats
gem5::Shader::ShaderStats stats
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
process.hh
port.hh
gpu_dyn_inst.hh
gem5::Shader::ShaderStats
Definition: shader.hh:285
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:109
gem5::Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:118
gem5::Shader::ShaderStats::allLatencyDist
statistics::Distribution allLatencyDist
Definition: shader.hh:290
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:234
gem5::Shader::ShaderStats::initToCoalesceLatency
statistics::Distribution initToCoalesceLatency
Definition: shader.hh:295
gem5::Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:87
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:190
gem5::Shader::ShaderStats::storeLatencyDist
statistics::Distribution storeLatencyDist
Definition: shader.hh:292
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:138
gem5::Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:112
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::Shader::timingSim
bool timingSim
Definition: shader.hh:189
gem5::EventFunctionWrapper
Definition: eventq.hh:1115
gem5::Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:375
gem5::ApertureRegister::base
Addr base
Definition: shader.hh:75
gem5::Shader::coissue_return
int coissue_return
Definition: shader.hh:197
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
simple_thread.hh
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:85
gem5::Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:356
gem5::Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:98
types.hh
gem5::ApertureRegister
Definition: shader.hh:73
gem5::Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:487
gem5::Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:224
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:203
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Shader::Params
ShaderParams Params
Definition: shader.hh:97
gem5::Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:199
gem5::Shader::_activeCus
int _activeCus
Definition: shader.hh:91
gem5::Shader::ShaderStats::loadLatencyDist
statistics::Distribution loadLatencyDist
Definition: shader.hh:291
gem5::Shader::n_cu
int n_cu
Definition: shader.hh:201
gem5::Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:227
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:190
gem5::GPUDispatcher
Definition: dispatcher.hh:62
gem5::Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
page_table.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: tlb.cc:60
gem5::Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:88
gem5::Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:228
gem5::Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:450
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:273
thread_context.hh
gem5::Shader::ShaderStats::gmToCompleteLatency
statistics::Distribution gmToCompleteLatency
Definition: shader.hh:304
gem5::Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:195
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::Shader
Definition: shader.hh:82
gem5::Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:161
gem5::Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:209
gem5::Shader::ShaderStats::coalsrLineAddresses
statistics::Distribution coalsrLineAddresses
Definition: shader.hh:307
lds_state.hh

Generated on Wed May 4 2022 12:13:58 for gem5 by doxygen 1.8.17