gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
shader.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __SHADER_HH__
33 #define __SHADER_HH__
34 
35 #include <functional>
36 #include <string>
37 
38 #include "arch/gpu_isa.hh"
39 #include "base/statistics.hh"
40 #include "base/stats/group.hh"
41 #include "base/types.hh"
42 #include "cpu/simple/atomic.hh"
43 #include "cpu/simple/timing.hh"
44 #include "cpu/simple_thread.hh"
45 #include "cpu/thread_context.hh"
46 #include "cpu/thread_state.hh"
47 #include "dev/amdgpu/system_hub.hh"
51 #include "gpu-compute/lds_state.hh"
52 #include "mem/page_table.hh"
53 #include "mem/port.hh"
54 #include "mem/request.hh"
55 #include "params/Shader.hh"
56 #include "sim/faults.hh"
57 #include "sim/process.hh"
58 #include "sim/sim_object.hh"
59 
60 namespace gem5
61 {
62 
63 class BaseTLB;
64 class GPUCommandProcessor;
65 class GPUDispatcher;
66 
67 static const int LDS_SIZE = 65536;
68 
69 // aperture (APE) registers define the base/limit
70 // pair for the ATC mapped memory space. currently
71 // the only APEs we consider are for GPUVM/LDS/scratch.
72 // the APEs are registered with unique values based
73 // on a per-device basis
75 {
78 };
79 
80 // Class Shader: This describes a single shader instance. Most
81 // configurations will only have a single shader.
82 
83 class Shader : public ClockedObject
84 {
85  private:
90 
91  // Hardware regs accessed by getreg/setreg instructions, set by queues
92  std::unordered_map<int, uint32_t> hwRegs;
93 
94  // Number of active Cus attached to this shader
96 
97  // Last tick that all CUs attached to this shader were inactive
99 
100  public:
101  typedef ShaderParams Params;
103 
105  void sampleLoad(const Tick accessTime);
106  void sampleStore(const Tick accessTime);
107  void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
108  void sampleLineRoundTrip(const std::map<Addr,
109  std::vector<Tick>> &roundTripTime);
110 
114 
115  void
116  setHwReg(int regIdx, uint32_t val)
117  {
118  hwRegs[regIdx] = val;
119  }
120 
121  uint32_t
122  getHwReg(int regIdx)
123  {
124  return hwRegs[regIdx];
125  }
126 
127  const ApertureRegister&
128  gpuVmApe() const
129  {
130  return _gpuVmApe;
131  }
132 
133  const ApertureRegister&
134  ldsApe() const
135  {
136  return _ldsApe;
137  }
138 
139  void
141  {
142  _ldsApe.base = base;
143  _ldsApe.limit = limit;
144  }
145 
146  const ApertureRegister&
147  scratchApe() const
148  {
149  return _scratchApe;
150  }
151 
152  void
154  {
157  }
158 
159  bool
161  {
162  bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
163 
164  return is_gpu_vm;
165  }
166 
167  bool
169  {
170  bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
171 
172  return is_lds;
173  }
174 
175  bool
177  {
178  bool is_scratch
180 
181  return is_scratch;
182  }
183 
184  Addr
186  {
187  return _scratchApe.base;
188  }
189 
190  Addr
192  {
194  }
195 
196  void
197  initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
198  {
199  Addr sh_hidden_base_new = queueBase - offset;
200 
201  // We are initializing sh_hidden_private_base_vmid from the
202  // amd queue descriptor from the first queue.
203  // The sh_hidden_private_base_vmid is supposed to be same for
204  // all the queues from the same process
205  if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
206  // Do not panic if shHiddenPrivateBaseVmid == 0,
207  // that is if it is uninitialized. Panic only
208  // if the value is initilized and we get
209  // a differnt base later.
211  "Currently we support only single process\n");
212  }
213  shHiddenPrivateBaseVmid = sh_hidden_base_new;
214  }
215 
217 
219 
220  // is this simulation going to be timing mode in the memory?
221  bool timingSim;
223 
224  // If set, issue acq packet @ kernel launch
226  // If set, issue rel packet @ kernel end
228  // If set, fetch returns may be coissued with instructions
230  // If set, always dump all 64 gprs to trace
232  // Number of cu units in the shader
233  int n_cu;
234  // Number of wavefront slots per SIMD per CU
235  int n_wf;
236 
237  // The size of global memory
239 
240  // Tracks CU that rr dispatcher should attempt scheduling
242 
243  // Size of scheduled add queue
244  uint32_t sa_n;
245 
246  // Pointer to value to be increments
248  // When to do the increment
250  // Amount to increment by
252 
253  // List of Compute Units (CU's)
255 
259 
260  int64_t max_valu_insts;
262 
263  Shader(const Params &p);
264  ~Shader();
265  virtual void init();
266 
267  // Run shader scheduled adds
268  void execScheduledAdds();
269 
270  // Schedule a 32-bit value to be incremented some time in the future
271  void ScheduleAdd(int *val, Tick when, int x);
272  bool processTimingPacket(PacketPtr pkt);
273 
274  void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
275  MemCmd cmd, bool suppress_func_errors);
276 
277  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
278 
279  void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
280  bool suppress_func_errors);
281 
282  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
283 
284  void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
285  bool suppress_func_errors);
286 
287  void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
288  bool suppress_func_errors, int cu_id);
289 
290  void
291  registerCU(int cu_id, ComputeUnit *compute_unit)
292  {
293  cuList[cu_id] = compute_unit;
294  }
295 
296  void prepareInvalidate(HSAQueueEntry *task);
297  void prepareFlush(GPUDynInstPtr gpuDynInst);
298 
299  bool dispatchWorkgroups(HSAQueueEntry *task);
300  Addr mmap(int length);
301  void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode);
302  void updateContext(int cid);
303  void notifyCuSleep();
304 
305  void
306  incVectorInstSrcOperand(int num_operands)
307  {
308  stats.vectorInstSrcOperand[num_operands]++;
309  }
310 
311  void
312  incVectorInstDstOperand(int num_operands)
313  {
314  stats.vectorInstDstOperand[num_operands]++;
315  }
316 
317  protected:
319  {
320  ShaderStats(statistics::Group *parent, int wf_size);
321 
322  // some stats for measuring latency
326 
327  // average ticks from vmem inst initiateAcc to coalescer issue,
329 
330  // average ticks from coalescer issue to coalescer hit callback,
332 
333  // average ticks from coalescer hit callback to GM pipe enqueue,
335 
336  // average ticks spent in GM pipe's ordered resp buffer.
338 
339  // average number of cache blocks requested by vmem inst
341 
342  // average ticks for cache blocks to main memory for the Nth
343  // cache block generated by a vmem inst.
345 
349  } stats;
350 };
351 
352 } // namespace gem5
353 
354 #endif // __SHADER_HH__
gem5::statistics::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:1929
gem5::Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:222
gem5::Shader::registerCU
void registerCU(int cu_id, ComputeUnit *compute_unit)
Definition: shader.hh:291
gem5::Shader::ShaderStats::ShaderStats
ShaderStats(statistics::Group *parent, int wf_size)
Definition: shader.cc:535
gem5::Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:249
hsa_queue_entry.hh
gem5::Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:138
gem5::LDS_SIZE
static const int LDS_SIZE
Definition: shader.hh:67
gem5::Shader::~Shader
~Shader()
Definition: shader.cc:146
gem5::Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:408
thread_state.hh
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:112
gem5::Shader::sa_n
uint32_t sa_n
Definition: shader.hh:244
gem5::Shader::setLdsApe
void setLdsApe(Addr base, Addr limit)
Definition: shader.hh:140
gem5::Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:251
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2083
gem5::Shader::cpuThread
SimpleThread * cpuThread
Definition: shader.hh:111
gem5::Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:461
data
const char data[]
Definition: circlebuf.test.cc:48
gem5::Shader::ShaderStats::vectorInstDstOperand
statistics::Vector vectorInstDstOperand
Definition: shader.hh:348
gem5::Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:257
gem5::Shader::mmap
Addr mmap(int length)
Definition: shader.cc:105
group.hh
gem5::Shader::ShaderStats::gmEnqueueLatency
statistics::Distribution gmEnqueueLatency
Definition: shader.hh:334
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:312
timing.hh
gem5::Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition: shader.hh:147
gem5::Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:247
atomic.hh
gem5::Shader::globalMemSize
int globalMemSize
Definition: shader.hh:238
gem5::Shader::getHwReg
uint32_t getHwReg(int regIdx)
Definition: shader.hh:122
gem5::BaseMMU::Mode
Mode
Definition: mmu.hh:56
gem5::Shader::getScratchBase
Addr getScratchBase()
Definition: shader.hh:185
gem5::Shader::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from CP.
Definition: shader.cc:530
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60
compute_unit.hh
gem5::Shader::impl_kern_launch_acq
int impl_kern_launch_acq
Definition: shader.hh:225
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:153
gem5::Shader::isScratchApe
bool isScratchApe(Addr addr) const
Definition: shader.hh:176
gem5::ApertureRegister::limit
Addr limit
Definition: shader.hh:77
gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:254
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:776
gem5::Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition: shader.cc:427
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:197
gem5::Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:517
gem5::statistics::Vector
A vector of scalar stats.
Definition: statistics.hh:2005
gem5::Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:161
std::vector< Tick >
gem5::Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:468
gem5::X86ISA::limit
BitfieldType< SegDescriptorLimit > limit
Definition: misc.hh:931
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
gem5::Shader::ShaderStats::vectorInstSrcOperand
statistics::Vector vectorInstSrcOperand
Definition: shader.hh:347
gem5::Shader::Shader
Shader(const Params &p)
Definition: shader.cc:56
gem5::Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:218
gem5::Shader::ShaderStats::cacheBlockRoundTrip
statistics::Distribution * cacheBlockRoundTrip
Definition: shader.hh:344
faults.hh
gem5::SimpleThread
The SimpleThread object provides a combination of the ThreadState object and the ThreadContext interf...
Definition: simple_thread.hh:93
request.hh
gem5::Shader::hsail_mode_e
hsail_mode_e
Definition: shader.hh:102
gem5::Shader::ShaderStats::shaderActiveTicks
statistics::Scalar shaderActiveTicks
Definition: shader.hh:346
gem5::AMDGPUSystemHub
This class handles reads from the system/host memory space from the shader.
Definition: system_hub.hh:50
gem5::Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:284
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:102
gem5::Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:98
gem5::Shader::VECTOR_SCALAR
@ VECTOR_SCALAR
Definition: shader.hh:102
gem5::MemCmd
Definition: packet.hh:76
gem5::Shader::systemHub
AMDGPUSystemHub * systemHub
Definition: shader.hh:258
gem5::Shader::_ldsApe
ApertureRegister _ldsApe
Definition: shader.hh:87
gem5::Shader::ShaderStats::rubyNetworkLatency
statistics::Distribution rubyNetworkLatency
Definition: shader.hh:331
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:394
gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition: thread_context.hh:88
gem5::GPUCommandProcessor
Definition: gpu_command_processor.hh:70
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
sim_object.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
gem5::Shader::isGpuVmApe
bool isGpuVmApe(Addr addr) const
Definition: shader.hh:160
system_hub.hh
statistics.hh
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::VegaISA::x
Bitfield< 4 > x
Definition: pagetable.hh:61
gem5::Shader::stats
gem5::Shader::ShaderStats stats
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
process.hh
port.hh
gpu_dyn_inst.hh
gem5::Shader::ShaderStats
Definition: shader.hh:318
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:113
gem5::BaseCPU
Definition: base.hh:104
gem5::Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition: shader.hh:134
gem5::Shader::ShaderStats::allLatencyDist
statistics::Distribution allLatencyDist
Definition: shader.hh:323
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:235
gem5::Shader::ShaderStats::initToCoalesceLatency
statistics::Distribution initToCoalesceLatency
Definition: shader.hh:328
gem5::Shader::_scratchApe
ApertureRegister _scratchApe
Definition: shader.hh:88
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:191
gem5::Shader::ShaderStats::storeLatencyDist
statistics::Distribution storeLatencyDist
Definition: shader.hh:325
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Shader::setScratchApe
void setScratchApe(Addr base, Addr limit)
Definition: shader.hh:153
gem5::Shader::isLdsApe
bool isLdsApe(Addr addr) const
Definition: shader.hh:168
gem5::Shader::gpuVmApe
const ApertureRegister & gpuVmApe() const
Definition: shader.hh:128
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::Shader::timingSim
bool timingSim
Definition: shader.hh:221
gem5::EventFunctionWrapper
Definition: eventq.hh:1136
gem5::Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:376
gem5::ApertureRegister::base
Addr base
Definition: shader.hh:76
gem5::Shader::coissue_return
int coissue_return
Definition: shader.hh:229
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:214
simple_thread.hh
gem5::Shader::_gpuVmApe
ApertureRegister _gpuVmApe
Definition: shader.hh:86
gem5::Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:357
gem5::Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:99
types.hh
gem5::ApertureRegister
Definition: shader.hh:74
gem5::Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:488
gem5::Shader::gpuCmdProc
GPUCommandProcessor & gpuCmdProc
Definition: shader.hh:256
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:235
gem5::Shader::setHwReg
void setHwReg(int regIdx, uint32_t val)
Definition: shader.hh:116
gem5::statistics::Group
Statistics container.
Definition: group.hh:92
gem5::Shader::Params
ShaderParams Params
Definition: shader.hh:101
gem5::Shader::trace_vgpr_all
int trace_vgpr_all
Definition: shader.hh:231
gem5::Shader::_activeCus
int _activeCus
Definition: shader.hh:95
gem5::Shader::ShaderStats::loadLatencyDist
statistics::Distribution loadLatencyDist
Definition: shader.hh:324
gem5::Shader::n_cu
int n_cu
Definition: shader.hh:233
gem5::Shader::max_valu_insts
int64_t max_valu_insts
Definition: shader.hh:260
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:222
gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95
gem5::GPUDispatcher
Definition: dispatcher.hh:62
gem5::Shader::processTimingPacket
bool processTimingPacket(PacketPtr pkt)
gem5::Shader::hwRegs
std::unordered_map< int, uint32_t > hwRegs
Definition: shader.hh:92
page_table.hh
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::Shader::shHiddenPrivateBaseVmid
Addr shHiddenPrivateBaseVmid
Definition: shader.hh:89
gem5::Shader::total_valu_insts
int64_t total_valu_insts
Definition: shader.hh:261
gem5::Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:451
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:306
thread_context.hh
gem5::Shader::ShaderStats::gmToCompleteLatency
statistics::Distribution gmToCompleteLatency
Definition: shader.hh:337
gem5::Shader::impl_kern_end_rel
int impl_kern_end_rel
Definition: shader.hh:227
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::Shader
Definition: shader.hh:83
gem5::Shader::getHiddenPrivateBase
Addr getHiddenPrivateBase()
Definition: shader.hh:191
gem5::Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:241
gem5::Shader::ShaderStats::coalsrLineAddresses
statistics::Distribution coalsrLineAddresses
Definition: shader.hh:340
lds_state.hh

Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17