gem5  v22.0.0.2
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __SHADER_HH__
33 #define __SHADER_HH__
34 
35 #include <functional>
36 #include <string>
37 
38 #include "arch/gpu_isa.hh"
39 #include "base/statistics.hh"
40 #include "base/stats/group.hh"
41 #include "base/types.hh"
42 #include "cpu/simple/atomic.hh"
43 #include "cpu/simple/timing.hh"
44 #include "cpu/simple_thread.hh"
45 #include "cpu/thread_context.hh"
46 #include "cpu/thread_state.hh"
47 #include "dev/amdgpu/system_hub.hh"
51 #include "gpu-compute/lds_state.hh"
52 #include "mem/page_table.hh"
53 #include "mem/port.hh"
54 #include "mem/request.hh"
55 #include "params/Shader.hh"
56 #include "sim/faults.hh"
57 #include "sim/process.hh"
58 #include "sim/sim_object.hh"
59 
60 namespace gem5
61 {
62 
63 class BaseTLB;
64 class GPUCommandProcessor;
65 class GPUDispatcher;
66 
67 static const int LDS_SIZE = 65536;
68 
69 // aperture (APE) registers define the base/limit
70 // pair for the ATC mapped memory space. currently
71 // the only APEs we consider are for GPUVM/LDS/scratch.
72 // the APEs are registered with unique values based
73 // on a per-device basis
75 {
78 };
79 
80 // Class Shader: This describes a single shader instance. Most
81 // configurations will only have a single shader.
82 
83 class Shader : public ClockedObject
84 {
85  private:
90 
91  // Number of active Cus attached to this shader
93 
94  // Last tick that all CUs attached to this shader were inactive
96 
97  public:
98  typedef ShaderParams Params;
100 
102  void sampleLoad(const Tick accessTime);
103  void sampleStore(const Tick accessTime);
104  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
105  void sampleLineRoundTrip(const std::map<Addr,
106  std::vector<Tick>> &roundTripTime);
107 
110  BaseCPU *cpuPointer;
111 
112  const ApertureRegister&
113  gpuVmApe() const
114  {
115  return _gpuVmApe;
116  }
117 
118  const ApertureRegister&
119  ldsApe() const
120  {
121  return _ldsApe;
122  }
123 
124  const ApertureRegister&
125  scratchApe() const
126  {
127  return _scratchApe;
128  }
129 
130  bool
132  {
133  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
134 
135  return is_gpu_vm;
136  }
137 
138  bool
140  {
141  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
142 
143  return is_lds;
144  }
145 
146  bool
148  {
149  bool is_scratch
151 
152  return is_scratch;
153  }
154 
155  Addr
157  {
158  return _scratchApe.base;
159  }
160 
161  Addr
163  {
165  }
166 
167  void
168  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
169  {
170  Addr sh_hidden_base_new = queueBase - offset;
171 
172  // We are initializing sh_hidden_private_base_vmid from the
173  // amd queue descriptor from the first queue.
174  // The sh_hidden_private_base_vmid is supposed to be same for
175  // all the queues from the same process
176  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
177  // Do not panic if shHiddenPrivateBaseVmid == 0,
178  // that is if it is uninitialized. Panic only
179  // if the value is initilized and we get
180  // a differnt base later.
182  "Currently we support only single process\n");
183  }
184  shHiddenPrivateBaseVmid = sh_hidden_base_new;
185  }
186 
188 
190 
191  // is this simulation going to be timing mode in the memory?
192  bool timingSim;
194 
195  // If set, issue acq packet @ kernel launch
197  // If set, issue rel packet @ kernel end
199  // If set, fetch returns may be coissued with instructions
201  // If set, always dump all 64 gprs to trace
203  // Number of cu units in the shader
204  int n_cu;
205  // Number of wavefront slots per SIMD per CU
206  int n_wf;
207 
208  // The size of global memory
210 
211  // Tracks CU that rr dispatcher should attempt scheduling
213 
214  // Size of scheduled add queue
215  uint32_t sa_n;
216 
217  // Pointer to value to be increments
219  // When to do the increment
221  // Amount to increment by
223 
224  // List of Compute Units (CU's)
226 
230 
231  int64_t max_valu_insts;
233 
234  Shader(const Params &p);
235  ~Shader();
236  virtual void init();
237 
238  // Run shader scheduled adds
239  void execScheduledAdds();
240 
241  // Schedule a 32-bit value to be incremented some time in the future
242  void ScheduleAdd(int *val, Tick when, int x);
243  bool processTimingPacket(PacketPtr pkt);
244 
245  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
246  MemCmd cmd, bool suppress_func_errors);
247 
248  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
249 
250  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
251  bool suppress_func_errors);
252 
253  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
254 
255  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
256  bool suppress_func_errors);
257 
258  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
259  bool suppress_func_errors, int cu_id);
260 
261  void
262  registerCU(int cu_id, ComputeUnit *compute_unit)
263  {
264  cuList[cu_id] = compute_unit;
265  }
266 
267  void prepareInvalidate(HSAQueueEntry *task);
268  void prepareFlush(GPUDynInstPtr gpuDynInst);
269 
270  bool dispatchWorkgroups(HSAQueueEntry *task);
271  Addr mmap(int length);
272  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
273  void updateContext(int cid);
274  void notifyCuSleep();
275 
276  void
277  incVectorInstSrcOperand(int num_operands)
278  {
279  stats.vectorInstSrcOperand[num_operands]++;
280  }
281 
282  void
283  incVectorInstDstOperand(int num_operands)
284  {
285  stats.vectorInstDstOperand[num_operands]++;
286  }
287 
288  protected:
290  {
291  ShaderStats(statistics::Group *parent, int wf_size);
292 
293  // some stats for measuring latency
297 
298  // average ticks from vmem inst initiateAcc to coalescer issue,
300 
301  // average ticks from coalescer issue to coalescer hit callback,
303 
304  // average ticks from coalescer hit callback to GM pipe enqueue,
306 
307  // average ticks spent in GM pipe's ordered resp buffer.
309 
310  // average number of cache blocks requested by vmem inst
312 
313  // average ticks for cache blocks to main memory for the Nth
314  // cache block generated by a vmem inst.
316 
320  } stats;
321 };
322 
323 } // namespace gem5
324 
325 #endif // __SHADER_HH__
gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1930
gem5::Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:222
gem5::Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:262
gem5::Shader::ShaderStats::ShaderStats
ShaderStats(statistics::Group *parent, int wf_size)
Definition: shader.cc:535
gem5::Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:220
hsa_queue_entry.hh
gem5::Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:138
gem5::LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:67
gem5::Shader::~Shader
~Shader()
Definition: shader.cc:146
gem5::Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:408
thread_state.hh
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:109
gem5::Shader::sa_n
uint32_t sa_n
Definition: shader.hh:215
gem5::Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:222
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2084
gem5::Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:108
gem5::Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:461
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::Shader::ShaderStats::vectorInstDstOperand
statistics::Vector vectorInstDstOperand
Definition: shader.hh:319
gem5::Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:228
gem5::Shader::mmap
Addr mmap(int length)
Definition: shader.cc:105
group.hh
gem5::Shader::ShaderStats::gmEnqueueLatency
statistics::Distribution gmEnqueueLatency
Definition: shader.hh:305
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:283
timing.hh
gem5::Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:125
gem5::Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:218
atomic.hh
gem5::Shader::globalMemSize
int globalMemSize
Definition: shader.hh:209
gem5::BaseMMU::Mode
Mode
Definition: mmu.hh:56
gem5::Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:156
gem5::Shader::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from CP.
Definition: shader.cc:530
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
compute_unit.hh
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:196
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:153
gem5::Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:147
gem5::ApertureRegister::limit
Addr limit
Definition: shader.hh:77
gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:225
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:769
gem5::Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition: shader.cc:427
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:168
gem5::Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:517
gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:2006
gem5::Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:161
std::vector< Tick >
gem5::Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:468
gem5::Shader::ShaderStats::vectorInstSrcOperand
statistics::Vector vectorInstSrcOperand
Definition: shader.hh:318
gem5::Shader::Shader
Shader(const Params &p)
Definition: shader.cc:56
gem5::Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:189
gem5::Shader::ShaderStats::cacheBlockRoundTrip
statistics::Distribution * cacheBlockRoundTrip
Definition: shader.hh:315
faults.hh
gem5::SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:95
request.hh
gem5::Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:99
gem5::Shader::ShaderStats::shaderActiveTicks
statistics::Scalar shaderActiveTicks
Definition: shader.hh:317
gem5::AMDGPUSystemHub
This class handles reads from the system/host memory space from the shader.
Definition: system_hub.hh:50
gem5::Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:284
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:99
gem5::Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:95
gem5::Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:99
gem5::MemCmd
Definition: packet.hh:75
gem5::Shader::systemHub
AMDGPUSystemHub * systemHub
Definition: shader.hh:229
gem5::Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:87
gem5::Shader::ShaderStats::rubyNetworkLatency
statistics::Distribution rubyNetworkLatency
Definition: shader.hh:302
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:394
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:94
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:70
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
sim_object.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:131
system_hub.hh
statistics.hh
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::VegaISA::x
Bitfield< 4 > x
Definition: pagetable.hh:61
gem5::Shader::stats
gem5::Shader::ShaderStats stats
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
process.hh
port.hh
gpu_dyn_inst.hh
gem5::Shader::ShaderStats
Definition: shader.hh:289
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:110
gem5::Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:119
gem5::Shader::ShaderStats::allLatencyDist
statistics::Distribution allLatencyDist
Definition: shader.hh:294
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
gem5::Shader::ShaderStats::initToCoalesceLatency
statistics::Distribution initToCoalesceLatency
Definition: shader.hh:299
gem5::Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:88
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:191
gem5::Shader::ShaderStats::storeLatencyDist
statistics::Distribution storeLatencyDist
Definition: shader.hh:296
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:139
gem5::Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:113
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::Shader::timingSim
bool timingSim
Definition: shader.hh:192
gem5::EventFunctionWrapper
Definition: eventq.hh:1115
gem5::Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:376
gem5::ApertureRegister::base
Addr base
Definition: shader.hh:76
gem5::Shader::coissue_return
int coissue_return
Definition: shader.hh:200
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
simple_thread.hh
gem5::Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:86
gem5::Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:357
gem5::Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:99
types.hh
gem5::ApertureRegister
Definition: shader.hh:74
gem5::Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:488
gem5::Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:227
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:206
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Shader::Params
ShaderParams Params
Definition: shader.hh:98
gem5::Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:202
gem5::Shader::_activeCus
int _activeCus
Definition: shader.hh:92
gem5::Shader::ShaderStats::loadLatencyDist
statistics::Distribution loadLatencyDist
Definition: shader.hh:295
gem5::Shader::n_cu
int n_cu
Definition: shader.hh:204
gem5::Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:231
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:193
gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95
gem5::GPUDispatcher
Definition: dispatcher.hh:62
gem5::Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
page_table.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:89
gem5::Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:232
gem5::Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:451
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:277
thread_context.hh
gem5::Shader::ShaderStats::gmToCompleteLatency
statistics::Distribution gmToCompleteLatency
Definition: shader.hh:308
gem5::Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:198
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::Shader
Definition: shader.hh:83
gem5::Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:162
gem5::Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:212
gem5::Shader::ShaderStats::coalsrLineAddresses
statistics::Distribution coalsrLineAddresses
Definition: shader.hh:311
lds_state.hh

Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17