gem5  v21.1.0.2
shader.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "gpu-compute/shader.hh"
35 
36 #include <limits>
37 
38 #include "arch/x86/linux/linux.hh"
39 #include "arch/x86/page_size.hh"
40 #include "base/chunk_generator.hh"
41 #include "debug/GPUAgentDisp.hh"
42 #include "debug/GPUDisp.hh"
43 #include "debug/GPUMem.hh"
44 #include "debug/GPUShader.hh"
45 #include "debug/GPUWgLatency.hh"
50 #include "gpu-compute/wavefront.hh"
51 #include "mem/packet.hh"
53 #include "sim/sim_exit.hh"
54 
55 namespace gem5
56 {
57 
59  _activeCus(0), _lastInactiveTick(0), cpuThread(nullptr),
60  gpuTc(nullptr), cpuPointer(p.cpu_pointer),
61  tickEvent([this]{ execScheduledAdds(); }, "Shader scheduled adds event",
62  false, Event::CPU_Tick_Pri),
63  timingSim(p.timing), hsail_mode(SIMT),
64  impl_kern_launch_acq(p.impl_kern_launch_acq),
65  impl_kern_end_rel(p.impl_kern_end_rel),
66  coissue_return(1),
67  trace_vgpr_all(1), n_cu((p.CUs).size()), n_wf(p.n_wf),
68  globalMemSize(p.globalmem),
69  nextSchedCu(0), sa_n(0), gpuCmdProc(*p.gpu_cmd_proc),
70  _dispatcher(*p.dispatcher),
71  max_valu_insts(p.max_valu_insts), total_valu_insts(0),
72  stats(this, p.CUs[0]->wfSize())
73 {
74  gpuCmdProc.setShader(this);
75  _dispatcher.setShader(this);
76 
77  _gpuVmApe.base = ((Addr)1 << 61) + 0x1000000000000L;
78  _gpuVmApe.limit = (_gpuVmApe.base & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
79 
80  _ldsApe.base = ((Addr)1 << 61) + 0x0;
81  _ldsApe.limit = (_ldsApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
82 
83  _scratchApe.base = ((Addr)1 << 61) + 0x100000000L;
84  _scratchApe.limit = (_scratchApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
85 
86  shHiddenPrivateBaseVmid = 0;
87 
88  cuList.resize(n_cu);
89 
90  panic_if(n_wf <= 0, "Must have at least 1 WF Slot per SIMD");
91 
92  for (int i = 0; i < n_cu; ++i) {
93  cuList[i] = p.CUs[i];
94  assert(i == cuList[i]->cu_id);
95  cuList[i]->shader = this;
96  cuList[i]->idleCUTimeout = p.idlecu_timeout;
97  }
98 }
99 
100 GPUDispatcher&
102 {
103  return _dispatcher;
104 }
105 
106 Addr
107 Shader::mmap(int length)
108 {
109 
110  Addr start;
111 
112  // round up length to the next page
113  length = roundUp(length, X86ISA::PageBytes);
114 
115  Process *proc = gpuTc->getProcessPtr();
116  auto mem_state = proc->memState;
117 
118  if (proc->mmapGrowsDown()) {
119  DPRINTF(GPUShader, "GROWS DOWN");
120  start = mem_state->getMmapEnd() - length;
121  mem_state->setMmapEnd(start);
122  } else {
123  DPRINTF(GPUShader, "GROWS UP");
124  start = mem_state->getMmapEnd();
125  mem_state->setMmapEnd(start + length);
126 
127  // assertion to make sure we don't overwrite the stack (it grows down)
128  assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
129  mem_state->getMmapEnd());
130  }
131 
132  DPRINTF(GPUShader, "Shader::mmap start= %#x, %#x\n", start, length);
133 
134  proc->allocateMem(start, length);
135 
136  return start;
137 }
138 
139 void
141 {
142  // grab the threadContext of the thread running on the CPU
143  assert(cpuPointer);
145  assert(gpuTc);
146 }
147 
149 {
150  for (int j = 0; j < n_cu; ++j)
151  delete cuList[j];
152 }
153 
154 void
156  // context of the thread which dispatched work
157  assert(cpuPointer);
158  gpuTc = cpuPointer->getContext(cid);
159  assert(gpuTc);
160 }
161 
162 void
164 {
165  assert(!sa_when.empty());
166 
167  // apply any scheduled adds
168  for (int i = 0; i < sa_n; ++i) {
169  if (sa_when[i] <= curTick()) {
170  *sa_val[i] += sa_x[i];
171  panic_if(*sa_val[i] < 0, "Negative counter value\n");
172  sa_val.erase(sa_val.begin() + i);
173  sa_x.erase(sa_x.begin() + i);
174  sa_when.erase(sa_when.begin() + i);
175  --sa_n;
176  --i;
177  }
178  }
179  if (!sa_when.empty()) {
180  Tick shader_wakeup = *std::max_element(sa_when.begin(),
181  sa_when.end());
182  DPRINTF(GPUDisp, "Scheduling shader wakeup at %lu\n", shader_wakeup);
183  schedule(tickEvent, shader_wakeup);
184  } else {
185  DPRINTF(GPUDisp, "sa_when empty, shader going to sleep!\n");
186  }
187 }
188 
189 /*
190  * dispatcher/shader arranges invalidate requests to the CUs
191  */
192 void
194  // if invalidate has already started/finished, then do nothing
195  if (task->isInvStarted()) return;
196 
197  // invalidate has never started; it can only perform once at kernel launch
198  assert(task->outstandingInvs() == -1);
199  int kernId = task->dispatchId();
200  // counter value is 0 now, indicating the inv is about to start
201  _dispatcher.updateInvCounter(kernId, +1);
202 
203  // iterate all cus managed by the shader, to perform invalidate.
204  for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
205  // create a request to hold INV info; the request's fields will
206  // be updated in cu before use
207  auto req = std::make_shared<Request>(0, 0, 0,
208  cuList[i_cu]->requestorId(),
209  0, -1);
210 
211  _dispatcher.updateInvCounter(kernId, +1);
212  // all necessary INV flags are all set now, call cu to execute
213  cuList[i_cu]->doInvalidate(req, task->dispatchId());
214 
215  // I don't like this. This is intrusive coding.
216  cuList[i_cu]->resetRegisterPool();
217  }
218 }
219 
223 void
225  int kernId = gpuDynInst->kern_id;
226  // flush has never been started, performed only once at kernel end
227  assert(_dispatcher.getOutstandingWbs(kernId) == 0);
228 
229  // the first cu, managed by the shader, performs flush operation,
230  // assuming that L2 cache is shared by all cus in the shader
231  int i_cu = 0;
232  _dispatcher.updateWbCounter(kernId, +1);
233  cuList[i_cu]->doFlush(gpuDynInst);
234 }
235 
236 bool
238 {
239  bool scheduledSomething = false;
240  int cuCount = 0;
241  int curCu = nextSchedCu;
242  int disp_count(0);
243 
244  while (cuCount < n_cu) {
245  //Every time we try a CU, update nextSchedCu
246  nextSchedCu = (nextSchedCu + 1) % n_cu;
247 
248  // dispatch workgroup iff the following two conditions are met:
249  // (a) wg_rem is true - there are unassigned workgroups in the grid
250  // (b) there are enough free slots in cu cuList[i] for this wg
251  int num_wfs_in_wg = 0;
252  bool can_disp = cuList[curCu]->hasDispResources(task, num_wfs_in_wg);
253  if (!task->dispComplete() && can_disp) {
254  scheduledSomething = true;
255  DPRINTF(GPUDisp, "Dispatching a workgroup to CU %d: WG %d\n",
256  curCu, task->globalWgId());
257  DPRINTF(GPUAgentDisp, "Dispatching a workgroup to CU %d: WG %d\n",
258  curCu, task->globalWgId());
259  DPRINTF(GPUWgLatency, "WG Begin cycle:%d wg:%d cu:%d\n",
260  curTick(), task->globalWgId(), curCu);
261 
262  if (!cuList[curCu]->tickEvent.scheduled()) {
263  if (!_activeCus)
265  _activeCus++;
266  }
267 
268  panic_if(_activeCus <= 0 || _activeCus > cuList.size(),
269  "Invalid activeCu size\n");
270  cuList[curCu]->dispWorkgroup(task, num_wfs_in_wg);
271 
272  task->markWgDispatch();
273  ++disp_count;
274  }
275 
276  ++cuCount;
277  curCu = nextSchedCu;
278  }
279 
280  DPRINTF(GPUWgLatency, "Shader Dispatched %d Wgs\n", disp_count);
281 
282  return scheduledSomething;
283 }
284 
285 void
287  bool suppress_func_errors, int cu_id)
288 {
289  int block_size = cuList.at(cu_id)->cacheLineSize();
290  unsigned size = req->getSize();
291 
292  Addr tmp_addr;
293  BaseMMU::Mode trans_mode;
294 
295  if (cmd == MemCmd::ReadReq) {
296  trans_mode = BaseMMU::Read;
297  } else if (cmd == MemCmd::WriteReq) {
298  trans_mode = BaseMMU::Write;
299  } else {
300  fatal("unexcepted MemCmd\n");
301  }
302 
303  tmp_addr = req->getVaddr();
304  Addr split_addr = roundDown(tmp_addr + size - 1, block_size);
305 
306  assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
307 
308  // Misaligned access
309  if (split_addr > tmp_addr) {
310  RequestPtr req1, req2;
311  req->splitOnVaddr(split_addr, req1, req2);
312 
313  PacketPtr pkt1 = new Packet(req2, cmd);
314  PacketPtr pkt2 = new Packet(req1, cmd);
315 
316  functionalTLBAccess(pkt1, cu_id, trans_mode);
317  functionalTLBAccess(pkt2, cu_id, trans_mode);
318 
319  PacketPtr new_pkt1 = new Packet(pkt1->req, cmd);
320  PacketPtr new_pkt2 = new Packet(pkt2->req, cmd);
321 
322  new_pkt1->dataStatic(data);
323  new_pkt2->dataStatic((uint8_t*)data + req1->getSize());
324 
325  if (suppress_func_errors) {
326  new_pkt1->setSuppressFuncError();
327  new_pkt2->setSuppressFuncError();
328  }
329 
330  // fixme: this should be cuList[cu_id] if cu_id != n_cu
331  // The latter requires a memPort in the dispatcher
332  cuList[0]->memPort[0].sendFunctional(new_pkt1);
333  cuList[0]->memPort[0].sendFunctional(new_pkt2);
334 
335  delete new_pkt1;
336  delete new_pkt2;
337  delete pkt1;
338  delete pkt2;
339  } else {
340  PacketPtr pkt = new Packet(req, cmd);
341  functionalTLBAccess(pkt, cu_id, trans_mode);
342  PacketPtr new_pkt = new Packet(pkt->req, cmd);
343  new_pkt->dataStatic(data);
344 
345  if (suppress_func_errors) {
346  new_pkt->setSuppressFuncError();
347  };
348 
349  // fixme: this should be cuList[cu_id] if cu_id != n_cu
350  // The latter requires a memPort in the dispatcher
351  cuList[0]->memPort[0].sendFunctional(new_pkt);
352 
353  delete new_pkt;
354  delete pkt;
355  }
356 }
357 
358 void
360 {
361  sa_val.push_back(val);
362  when += curTick();
363  sa_when.push_back(when);
364  sa_x.push_back(x);
365  ++sa_n;
366  if (!tickEvent.scheduled() || (when < tickEvent.when())) {
367  DPRINTF(GPUDisp, "New scheduled add; scheduling shader wakeup at "
368  "%lu\n", when);
369  reschedule(tickEvent, when, true);
370  } else {
371  assert(tickEvent.scheduled());
372  DPRINTF(GPUDisp, "New scheduled add; wakeup already scheduled at "
373  "%lu\n", when);
374  }
375 }
376 
377 void
378 Shader::AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
379  MemCmd cmd, bool suppress_func_errors)
380 {
381  uint8_t *data_buf = (uint8_t*)ptr;
382 
383  for (ChunkGenerator gen(address, size, cuList.at(cu_id)->cacheLineSize());
384  !gen.done(); gen.next()) {
385 
386  RequestPtr req = std::make_shared<Request>(
387  gen.addr(), gen.size(), 0,
388  cuList[0]->requestorId(), 0, 0, nullptr);
389 
390  doFunctionalAccess(req, cmd, data_buf, suppress_func_errors, cu_id);
391  data_buf += gen.size();
392  }
393 }
394 
395 void
396 Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id)
397 {
398  AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq, false);
399 }
400 
401 void
402 Shader::ReadMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
403  bool suppress_func_errors)
404 {
405  AccessMem(address, ptr, size, cu_id, MemCmd::ReadReq,
406  suppress_func_errors);
407 }
408 
409 void
410 Shader::WriteMem(uint64_t address, void *ptr,uint32_t size, int cu_id)
411 {
412  AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq, false);
413 }
414 
415 void
416 Shader::WriteMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
417  bool suppress_func_errors)
418 {
419  AccessMem(address, ptr, size, cu_id, MemCmd::WriteReq,
420  suppress_func_errors);
421 }
422 
423 /*
424  * Send a packet through the appropriate TLB functional port.
425  * If cu_id=n_cu, then this is the dispatcher's TLB.
426  * Otherwise it's the TLB of the cu_id compute unit.
427  */
428 void
430 {
431  // update senderState. Need to know the gpuTc and the TLB mode
432  pkt->senderState =
433  new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
434 
435  // even when the perLaneTLB flag is turned on
436  // it's ok tp send all accesses through lane 0
437  // since the lane # is not known here,
438  // This isn't important since these are functional accesses.
439  cuList[cu_id]->tlbPort[0].sendFunctional(pkt);
440 
441  /* safe_cast the senderState */
442  TheISA::GpuTLB::TranslationState *sender_state =
443  safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
444 
445  delete sender_state->tlbEntry;
446  delete pkt->senderState;
447 }
448 
449 /*
450  * allow the shader to sample stats from constituent devices
451  */
452 void
453 Shader::sampleStore(const Tick accessTime)
454 {
455  stats.storeLatencyDist.sample(accessTime);
456  stats.allLatencyDist.sample(accessTime);
457 }
458 
459 /*
460  * allow the shader to sample stats from constituent devices
461  */
462 void
463 Shader::sampleLoad(const Tick accessTime)
464 {
465  stats.loadLatencyDist.sample(accessTime);
466  stats.allLatencyDist.sample(accessTime);
467 }
468 
469 void
471 {
472  // Only sample instructions that go all the way to main memory
473  if (roundTripTime.size() != InstMemoryHop::InstMemoryHopMax) {
474  return;
475  }
476 
477  Tick t1 = roundTripTime[0];
478  Tick t2 = roundTripTime[1];
479  Tick t3 = roundTripTime[2];
480  Tick t4 = roundTripTime[3];
481  Tick t5 = roundTripTime[4];
482 
487 }
488 
489 void
491 {
492  stats.coalsrLineAddresses.sample(lineMap.size());
493  std::vector<Tick> netTimes;
494 
495  // For each cache block address generated by a vmem inst, calculate
496  // the round-trip time for that cache block.
497  for (auto& it : lineMap) {
498  const std::vector<Tick>& timeVec = it.second;
499  if (timeVec.size() == 2) {
500  netTimes.push_back(timeVec[1] - timeVec[0]);
501  }
502  }
503 
504  // Sort the cache block round trip times so that the first
505  // distrubtion is always measuring the fastests and the last
506  // distrubtion is always measuring the slowest cache block.
507  std::sort(netTimes.begin(), netTimes.end());
508 
509  // Sample the round trip time for each N cache blocks into the
510  // Nth distribution.
511  int idx = 0;
512  for (auto& time : netTimes) {
513  stats.cacheBlockRoundTrip[idx].sample(time);
514  ++idx;
515  }
516 }
517 
518 void
520  // If all CUs attached to his shader are asleep, update shaderActiveTicks
521  panic_if(_activeCus <= 0 || _activeCus > cuList.size(),
522  "Invalid activeCu size\n");
523  _activeCus--;
524  if (!_activeCus)
526 }
527 
529  : statistics::Group(parent),
530  ADD_STAT(allLatencyDist, "delay distribution for all"),
531  ADD_STAT(loadLatencyDist, "delay distribution for loads"),
532  ADD_STAT(storeLatencyDist, "delay distribution for stores"),
533  ADD_STAT(initToCoalesceLatency,
534  "Ticks from vmem inst initiateAcc to coalescer issue"),
535  ADD_STAT(rubyNetworkLatency,
536  "Ticks from coalescer issue to coalescer hit callback"),
537  ADD_STAT(gmEnqueueLatency,
538  "Ticks from coalescer hit callback to GM pipe enqueue"),
539  ADD_STAT(gmToCompleteLatency,
540  "Ticks queued in GM pipes ordered response buffer"),
541  ADD_STAT(coalsrLineAddresses,
542  "Number of cache lines for coalesced request"),
543  ADD_STAT(shaderActiveTicks,
544  "Total ticks that any CU attached to this shader is active"),
545  ADD_STAT(vectorInstSrcOperand,
546  "vector instruction source operand distribution"),
547  ADD_STAT(vectorInstDstOperand,
548  "vector instruction destination operand distribution")
549 {
551  .init(0, 1600000, 10000)
553 
555  .init(0, 1600000, 10000)
557 
559  .init(0, 1600000, 10000)
561 
563  .init(0, 1600000, 10000)
565 
567  .init(0, 1600000, 10000)
569 
571  .init(0, 1600000, 10000)
573 
575  .init(0, 1600000, 10000)
577 
579  .init(0, 20, 1)
581 
584 
586  for (int idx = 0; idx < wf_size; ++idx) {
587  std::stringstream namestr;
588  ccprintf(namestr, "%s.cacheBlockRoundTrip%d",
589  static_cast<Shader*>(parent)->name(), idx);
591  .init(0, 1600000, 10000)
592  .name(namestr.str())
593  .desc("Coalsr-to-coalsr time for the Nth cache block in an inst")
595  }
596 }
597 
598 } // namespace gem5
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
gem5::Shader::prepareFlush
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition: shader.cc:224
gem5::Shader::ShaderStats::ShaderStats
ShaderStats(statistics::Group *parent, int wf_size)
Definition: shader.cc:528
gem5::Shader::sa_when
std::vector< uint64_t > sa_when
Definition: shader.hh:219
hsa_queue_entry.hh
gem5::Shader::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: shader.cc:140
gem5::Event::when
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:508
gem5::BaseMMU::Read
@ Read
Definition: mmu.hh:53
gem5::Shader::~Shader
~Shader()
Definition: shader.cc:148
gem5::Shader::WriteMem
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:410
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:110
gem5::Shader::sa_n
uint32_t sa_n
Definition: shader.hh:214
gem5::Shader::sa_x
std::vector< int32_t > sa_x
Definition: shader.hh:221
gem5::ArmISA::t5
Bitfield< 5 > t5
Definition: misc_types.hh:228
gem5::statistics::Distribution
A simple distribution stat.
Definition: statistics.hh:2081
gem5::Shader::sampleLoad
void sampleLoad(const Tick accessTime)
Definition: shader.cc:463
data
const char data[]
Definition: circlebuf.test.cc:48
shader.hh
gem5::Shader::ShaderStats::vectorInstDstOperand
statistics::Vector vectorInstDstOperand
Definition: shader.hh:317
gem5::X86ISA::L
Bitfield< 7, 0 > L
Definition: int.hh:59
gem5::Shader::_dispatcher
GPUDispatcher & _dispatcher
Definition: shader.hh:227
gem5::Shader::mmap
Addr mmap(int length)
Definition: shader.cc:107
gem5::Shader::ShaderStats::gmEnqueueLatency
statistics::Distribution gmEnqueueLatency
Definition: shader.hh:303
gem5::Shader::sa_val
std::vector< int * > sa_val
Definition: shader.hh:217
gem5::BaseMMU::Mode
Mode
Definition: mmu.hh:53
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:366
gem5::BaseMMU::Write
@ Write
Definition: mmu.hh:53
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
gem5::Shader::updateContext
void updateContext(int cid)
Definition: shader.cc:155
gpu_static_inst.hh
gem5::Shader::cuList
std::vector< ComputeUnit * > cuList
Definition: shader.hh:224
gem5::X86ISA::val
Bitfield< 63 > val
Definition: misc.hh:775
gem5::Shader::functionalTLBAccess
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseMMU::Mode mode)
Definition: shader.cc:429
gem5::Shader::notifyCuSleep
void notifyCuSleep()
Definition: shader.cc:519
gem5::Shader::execScheduledAdds
void execScheduledAdds()
Definition: shader.cc:163
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
std::vector< Tick >
gem5::HSAQueueEntry::markWgDispatch
void markWgDispatch()
Definition: hsa_queue_entry.hh:262
gem5::Shader::sampleInstRoundTrip
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Definition: shader.cc:470
gem5::GPUDispatcher::getOutstandingWbs
int getOutstandingWbs(int kern_id)
get kernel's outstanding cache writeback requests
Definition: dispatcher.cc:282
gem5::Shader::ShaderStats::vectorInstSrcOperand
statistics::Vector vectorInstSrcOperand
Definition: shader.hh:316
gem5::Shader::Shader
Shader(const Params &p)
Definition: shader.cc:58
gem5::Shader::tickEvent
EventFunctionWrapper tickEvent
Definition: shader.hh:188
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::Shader::ShaderStats::cacheBlockRoundTrip
statistics::Distribution * cacheBlockRoundTrip
Definition: shader.hh:313
sim_exit.hh
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1325
gem5::ccprintf
void ccprintf(cp::Print &print)
Definition: cprintf.hh:130
gem5::Shader::ShaderStats::shaderActiveTicks
statistics::Scalar shaderActiveTicks
Definition: shader.hh:315
wavefront.hh
gem5::Process::memState
std::shared_ptr< MemState > memState
Definition: process.hh:276
gem5::Process::allocateMem
void allocateMem(Addr vaddr, int64_t size, bool clobber=false)
Definition: process.cc:316
gem5::Shader::doFunctionalAccess
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
Definition: shader.cc:286
packet.hh
gem5::Shader::_lastInactiveTick
Tick _lastInactiveTick
Definition: shader.hh:96
gem5::ArmISA::t1
Bitfield< 1 > t1
Definition: misc_types.hh:232
gem5::ChunkGenerator
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
Definition: chunk_generator.hh:59
gem5::MemCmd
Definition: packet.hh:75
gem5::HSAQueueEntry::isInvStarted
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
Definition: hsa_queue_entry.hh:326
gem5::statistics::pdf
const FlagsType pdf
Print the percent of the total that this entry represents.
Definition: info.hh:62
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1134
gem5::GPUDispatcher::updateWbCounter
bool updateWbCounter(int kern_id, int val=-1)
update the counter of oustanding wb requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:268
gem5::Shader::ShaderStats::rubyNetworkLatency
statistics::Distribution rubyNetworkLatency
Definition: shader.hh:300
gem5::ArmISA::j
Bitfield< 24 > j
Definition: misc_types.hh:57
gem5::Shader::ReadMem
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Definition: shader.cc:396
gem5::statistics::DataWrap::name
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:286
gem5::ChunkGenerator::done
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
Definition: chunk_generator.hh:141
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2110
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::ArmISA::t4
Bitfield< 4 > t4
Definition: misc_types.hh:229
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:283
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Tick
uint64_t Tick
Tick count type.
Definition: types.hh:58
gem5::Shader::stats
gem5::Shader::ShaderStats stats
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::HSAQueueEntry::outstandingInvs
int outstandingInvs()
Definition: hsa_queue_entry.hh:316
page_size.hh
gem5::Shader::cpuPointer
BaseCPU * cpuPointer
Definition: shader.hh:111
gem5::EventManager::reschedule
void reschedule(Event &event, Tick when, bool always=false)
Definition: eventq.hh:1037
gem5::Shader::ShaderStats::allLatencyDist
statistics::Distribution allLatencyDist
Definition: shader.hh:292
gpu_command_processor.hh
gem5::Shader::dispatchWorkgroups
bool dispatchWorkgroups(HSAQueueEntry *task)
Definition: shader.cc:237
gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
gem5::HSAQueueEntry::dispComplete
bool dispComplete() const
Definition: hsa_queue_entry.hh:205
gem5::HSAQueueEntry::globalWgId
int globalWgId() const
Definition: hsa_queue_entry.hh:225
gem5::Shader::ShaderStats::initToCoalesceLatency
statistics::Distribution initToCoalesceLatency
Definition: shader.hh:297
gem5::BaseCPU::getContext
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition: base.hh:290
RubySystem.hh
gem5::Shader::prepareInvalidate
void prepareInvalidate(HSAQueueEntry *task)
Definition: shader.cc:193
gem5::Shader::ShaderStats::storeLatencyDist
statistics::Distribution storeLatencyDist
Definition: shader.hh:294
gem5::InstMemoryHopMax
@ InstMemoryHopMax
Definition: misc.hh:60
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:534
gem5::ArmISA::t2
Bitfield< 2 > t2
Definition: misc_types.hh:231
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::ClockedObject
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
Definition: clocked_object.hh:234
gem5::ArmISA::t3
Bitfield< 3 > t3
Definition: misc_types.hh:230
gem5::Process
Definition: process.hh:67
gem5::ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0
gem5::Shader::AccessMem
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
Definition: shader.cc:378
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::RiscvISA::x
Bitfield< 3 > x
Definition: pagetable.hh:73
gem5::statistics::DataWrap::desc
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:331
gem5::Shader::ScheduleAdd
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:359
gem5::Shader::dispatcher
GPUDispatcher & dispatcher()
Definition: shader.cc:101
gem5::statistics::oneline
const FlagsType oneline
Print all values on a single line.
Definition: info.hh:72
gem5::GPUDispatcher::updateInvCounter
void updateInvCounter(int kern_id, int val=-1)
update the counter of oustanding inv requests for the kernel kern_id: kernel id val: +1/-1,...
Definition: dispatcher.cc:248
linux.hh
gem5::Shader::sampleLineRoundTrip
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
Definition: shader.cc:490
gem5::MemCmd::WriteReq
@ WriteReq
Definition: packet.hh:89
gem5::roundUp
static constexpr T roundUp(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:260
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::Process::mmapGrowsDown
virtual bool mmapGrowsDown() const
Does mmap region grow upward or downward from mmapEnd? Most platforms grow downward,...
Definition: process.hh:135
chunk_generator.hh
gem5::Shader::Params
ShaderParams Params
Definition: shader.hh:99
gem5::Shader::_activeCus
int _activeCus
Definition: shader.hh:93
gem5::Shader::ShaderStats::loadLatencyDist
statistics::Distribution loadLatencyDist
Definition: shader.hh:293
gem5::Shader::n_cu
int n_cu
Definition: shader.hh:203
dispatcher.hh
gem5::statistics::DataWrap::flags
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition: statistics.hh:355
gem5::EventBase::CPU_Tick_Pri
static const Priority CPU_Tick_Pri
CPU ticks must come after other associated CPU events (such as writebacks).
Definition: eventq.hh:204
gem5::X86ISA::PageBytes
const Addr PageBytes
Definition: page_size.hh:49
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::statistics::VectorBase::init
Derived & init(size_type size)
Set this vector to have the given size.
Definition: statistics.hh:1037
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:155
gem5::Shader::sampleStore
void sampleStore(const Tick accessTime)
Definition: shader.cc:453
gem5::Packet::setSuppressFuncError
void setSuppressFuncError()
Definition: packet.hh:744
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
gem5::Shader::ShaderStats::gmToCompleteLatency
statistics::Distribution gmToCompleteLatency
Definition: shader.hh:306
gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition: misc_types.hh:73
gem5::Shader
Definition: shader.hh:84
gem5::Shader::nextSchedCu
int nextSchedCu
Definition: shader.hh:211
gem5::Shader::ShaderStats::coalsrLineAddresses
statistics::Distribution coalsrLineAddresses
Definition: shader.hh:309

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17