gem5  v21.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __SHADER_HH__
35 #define __SHADER_HH__
36 
37 #include <functional>
38 #include <string>
39 
40 #include "base/statistics.hh"
41 #include "base/stats/group.hh"
42 #include "base/types.hh"
43 #include "cpu/simple/atomic.hh"
44 #include "cpu/simple/timing.hh"
45 #include "cpu/simple_thread.hh"
46 #include "cpu/thread_context.hh"
47 #include "cpu/thread_state.hh"
50 #include "gpu-compute/gpu_tlb.hh"
52 #include "gpu-compute/lds_state.hh"
53 #include "mem/page_table.hh"
54 #include "mem/port.hh"
55 #include "mem/request.hh"
56 #include "params/Shader.hh"
57 #include "sim/faults.hh"
58 #include "sim/process.hh"
59 #include "sim/sim_object.hh"
60 
61 namespace gem5
62 {
63 
64 class BaseTLB;
65 class GPUCommandProcessor;
66 class GPUDispatcher;
67 
68 static const int LDS_SIZE = 65536;
69 
70 // aperture (APE) registers define the base/limit
71 // pair for the ATC mapped memory space. currently
72 // the only APEs we consider are for GPUVM/LDS/scratch.
73 // the APEs are registered with unique values based
74 // on a per-device basis
76 {
79 };
80 
81 // Class Shader: This describes a single shader instance. Most
82 // configurations will only have a single shader.
83 
84 class Shader : public ClockedObject
85 {
86  private:
91 
92  // Number of active Cus attached to this shader
94 
95  // Last tick that all CUs attached to this shader were inactive
97 
98  public:
99  typedef ShaderParams Params;
101 
103  void sampleLoad(const Tick accessTime);
104  void sampleStore(const Tick accessTime);
105  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
106  void sampleLineRoundTrip(const std::map<Addr,
107  std::vector<Tick>> &roundTripTime);
108 
112 
113  const ApertureRegister&
114  gpuVmApe() const
115  {
116  return _gpuVmApe;
117  }
118 
119  const ApertureRegister&
120  ldsApe() const
121  {
122  return _ldsApe;
123  }
124 
125  const ApertureRegister&
126  scratchApe() const
127  {
128  return _scratchApe;
129  }
130 
131  bool
133  {
134  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
135 
136  return is_gpu_vm;
137  }
138 
139  bool
141  {
142  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
143 
144  return is_lds;
145  }
146 
147  bool
149  {
150  bool is_scratch
152 
153  return is_scratch;
154  }
155 
156  Addr
158  {
159  return _scratchApe.base;
160  }
161 
162  Addr
164  {
166  }
167 
168  void
169  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
170  {
171  Addr sh_hidden_base_new = queueBase - offset;
172 
173  // We are initializing sh_hidden_private_base_vmid from the
174  // amd queue descriptor from the first queue.
175  // The sh_hidden_private_base_vmid is supposed to be same for
176  // all the queues from the same process
177  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
178  // Do not panic if shHiddenPrivateBaseVmid == 0,
179  // that is if it is uninitialized. Panic only
180  // if the value is initilized and we get
181  // a differnt base later.
183  "Currently we support only single process\n");
184  }
185  shHiddenPrivateBaseVmid = sh_hidden_base_new;
186  }
187 
189 
190  // is this simulation going to be timing mode in the memory?
191  bool timingSim;
193 
194  // If set, issue acq packet @ kernel launch
196  // If set, issue rel packet @ kernel end
198  // If set, fetch returns may be coissued with instructions
200  // If set, always dump all 64 gprs to trace
202  // Number of cu units in the shader
203  int n_cu;
204  // Number of wavefront slots per SIMD per CU
205  int n_wf;
206 
207  // The size of global memory
209 
210  // Tracks CU that rr dispatcher should attempt scheduling
212 
213  // Size of scheduled add queue
214  uint32_t sa_n;
215 
216  // Pointer to value to be increments
218  // When to do the increment
220  // Amount to increment by
222 
223  // List of Compute Units (CU's)
225 
228 
229  int64_t max_valu_insts;
231 
232  Shader(const Params &p);
233  ~Shader();
234  virtual void init();
235 
236  // Run shader scheduled adds
237  void execScheduledAdds();
238 
239  // Schedule a 32-bit value to be incremented some time in the future
240  void ScheduleAdd(int *val, Tick when, int x);
241  bool processTimingPacket(PacketPtr pkt);
242 
243  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
244  MemCmd cmd, bool suppress_func_errors);
245 
246  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
247 
248  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
249  bool suppress_func_errors);
250 
251  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
252 
253  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
254  bool suppress_func_errors);
255 
256  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
257  bool suppress_func_errors, int cu_id);
258 
259  void
260  registerCU(int cu_id, ComputeUnit *compute_unit)
261  {
262  cuList[cu_id] = compute_unit;
263  }
264 
265  void prepareInvalidate(HSAQueueEntry *task);
266  void prepareFlush(GPUDynInstPtr gpuDynInst);
267 
268  bool dispatchWorkgroups(HSAQueueEntry *task);
269  Addr mmap(int length);
270  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
271  void updateContext(int cid);
272  void notifyCuSleep();
273 
274  void
275  incVectorInstSrcOperand(int num_operands)
276  {
277  stats.vectorInstSrcOperand[num_operands]++;
278  }
279 
280  void
281  incVectorInstDstOperand(int num_operands)
282  {
283  stats.vectorInstDstOperand[num_operands]++;
284  }
285 
286  protected:
288  {
289  ShaderStats(statistics::Group *parent, int wf_size);
290 
291  // some stats for measuring latency
295 
296  // average ticks from vmem inst initiateAcc to coalescer issue,
298 
299  // average ticks from coalescer issue to coalescer hit callback,
301 
302  // average ticks from coalescer hit callback to GM pipe enqueue,
304 
305  // average ticks spent in GM pipe's ordered resp buffer.
307 
308  // average number of cache blocks requested by vmem inst
310 
311  // average ticks for cache blocks to main memory for the Nth
312  // cache block generated by a vmem inst.
314 
318  } stats;
319 };
320 
321 } // namespace gem5
322 
323 #endif // __SHADER_HH__
gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1918
gem5::Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:224
gem5::Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:260
gem5::Shader::ShaderStats::ShaderStats
ShaderStats(statistics::Group *parent, int wf_size)
Definition: shader.cc:528
gem5::Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:219
hsa_queue_entry.hh
gem5::Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:140
gem5::LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:68
gem5::Shader::~Shader
~Shader()
Definition: shader.cc:148
gem5::Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:410
thread_state.hh
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:110
gem5::Shader::sa_n
uint32_t sa_n
Definition: shader.hh:214
gem5::Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:221
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2072
gem5::Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:109
gem5::Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:463
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::Shader::ShaderStats::vectorInstDstOperand
statistics::Vector vectorInstDstOperand
Definition: shader.hh:317
gem5::Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:227
gem5::Shader::mmap
Addr mmap(int length)
Definition: shader.cc:107
group.hh
gem5::Shader::ShaderStats::gmEnqueueLatency
statistics::Distribution gmEnqueueLatency
Definition: shader.hh:303
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:281
timing.hh
gem5::Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:126
gem5::Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:217
atomic.hh
gem5::Shader::globalMemSize
int globalMemSize
Definition: shader.hh:208
gem5::BaseMMU::Mode
Mode
Definition: mmu.hh:53
gem5::Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:157
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
compute_unit.hh
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:195
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:155
gem5::Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:148
gem5::ApertureRegister::limit
Addr limit
Definition: shader.hh:78
gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:224
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition: shader.cc:429
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:169
gem5::Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:519
gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:1994
gem5::Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:163
std::vector< Tick >
gem5::Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:470
gem5::Shader::ShaderStats::vectorInstSrcOperand
statistics::Vector vectorInstSrcOperand
Definition: shader.hh:316
gem5::Shader::Shader
Shader(const Params &p)
Definition: shader.cc:58
gem5::Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:188
gem5::Shader::ShaderStats::cacheBlockRoundTrip
statistics::Distribution * cacheBlockRoundTrip
Definition: shader.hh:313
faults.hh
gem5::SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:94
request.hh
gem5::Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:100
gem5::Shader::ShaderStats::shaderActiveTicks
statistics::Scalar shaderActiveTicks
Definition: shader.hh:315
gem5::Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:286
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:100
gem5::Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:96
gem5::Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:100
gem5::MemCmd
Definition: packet.hh:75
gem5::Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:88
gem5::Shader::ShaderStats::rubyNetworkLatency
statistics::Distribution rubyNetworkLatency
Definition: shader.hh:300
gem5::ComputeUnit
Definition: compute_unit.hh:203
gem5::Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:396
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:93
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:71
sim_object.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:132
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
statistics.hh
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::Shader::stats
gem5::Shader::ShaderStats stats
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
process.hh
port.hh
gpu_dyn_inst.hh
gem5::Shader::ShaderStats
Definition: shader.hh:287
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:111
gem5::BaseCPU
Definition: base.hh:107
gem5::Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:120
gem5::Shader::ShaderStats::allLatencyDist
statistics::Distribution allLatencyDist
Definition: shader.hh:292
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:237
gem5::Shader::ShaderStats::initToCoalesceLatency
statistics::Distribution initToCoalesceLatency
Definition: shader.hh:297
gpu_tlb.hh
gem5::Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:89
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:193
gem5::Shader::ShaderStats::storeLatencyDist
statistics::Distribution storeLatencyDist
Definition: shader.hh:294
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:140
gem5::Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:114
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::Shader::timingSim
bool timingSim
Definition: shader.hh:191
gem5::EventFunctionWrapper
Definition: eventq.hh:1115
gem5::Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:378
gem5::ApertureRegister::base
Addr base
Definition: shader.hh:77
gem5::Shader::coissue_return
int coissue_return
Definition: shader.hh:199
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
simple_thread.hh
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:87
gem5::Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:359
gem5::Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:101
types.hh
gem5::ApertureRegister
Definition: shader.hh:75
gem5::Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:490
gem5::Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:226
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:205
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Shader::Params
ShaderParams Params
Definition: shader.hh:99
gem5::Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:201
gem5::Shader::_activeCus
int _activeCus
Definition: shader.hh:93
gem5::Shader::ShaderStats::loadLatencyDist
statistics::Distribution loadLatencyDist
Definition: shader.hh:293
gem5::Shader::n_cu
int n_cu
Definition: shader.hh:203
gem5::Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:229
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:192
gem5::GPUDispatcher
Definition: dispatcher.hh:64
gem5::Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
page_table.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:90
gem5::Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:230
gem5::Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:453
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:275
thread_context.hh
gem5::Shader::ShaderStats::gmToCompleteLatency
statistics::Distribution gmToCompleteLatency
Definition: shader.hh:306
gem5::Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:197
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:73
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::Shader
Definition: shader.hh:84
gem5::Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:163
gem5::Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:211
gem5::Shader::ShaderStats::coalsrLineAddresses
statistics::Distribution coalsrLineAddresses
Definition: shader.hh:309
lds_state.hh

Generated on Tue Sep 7 2021 14:53:47 for gem5 by doxygen 1.8.17