gem5  v21.1.0.2
wavefront.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "gpu-compute/wavefront.hh"
35 
36 #include "base/bitfield.hh"
37 #include "debug/GPUExec.hh"
38 #include "debug/GPUInitAbi.hh"
39 #include "debug/WavefrontStack.hh"
43 #include "gpu-compute/shader.hh"
46 
47 namespace gem5
48 {
49 
51  : SimObject(p), wfSlotId(p.wf_slot_id), simdId(p.simdId),
52  maxIbSize(p.max_ib_size), _gpuISA(*this),
53  vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
54  vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
55  sleepCnt(0), barId(WFBarrier::InvalidID), stats(this)
56 {
57  lastTrace = 0;
58  execUnitId = -1;
59  status = S_STOPPED;
62  startVgprIndex = 0;
63  startSgprIndex = 0;
64  outstandingReqs = 0;
69  rdLmReqsInPipe = 0;
70  rdGmReqsInPipe = 0;
71  wrLmReqsInPipe = 0;
72  wrGmReqsInPipe = 0;
77  lastNonIdleTick = 0;
78  ldsChunk = nullptr;
79 
80  memTraceBusy = 0;
81  oldVgprTcnt = 0xffffffffffffffffll;
82  oldDgprTcnt = 0xffffffffffffffffll;
83  oldVgpr.resize(p.wf_size);
84 
85  pendingFetch = false;
86  dropFetch = false;
87  maxVgprs = 0;
88  maxSgprs = 0;
89 
90  lastAddr.resize(p.wf_size);
91  workItemFlatId.resize(p.wf_size);
92  oldDgpr.resize(p.wf_size);
93  for (int i = 0; i < 3; ++i) {
94  workItemId[i].resize(p.wf_size);
95  }
96 
97  _execMask.set();
98  rawDist.clear();
99  lastInstExec = 0;
100  vecReads.clear();
101 }
102 
103 void
105 {
106  reservedVectorRegs = 0;
107  reservedScalarRegs = 0;
108  startVgprIndex = 0;
109  startSgprIndex = 0;
110 
116 }
117 
118 void
119 Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
120 {
121  int regInitIdx = 0;
122 
123  // iterate over all the init fields and check which
124  // bits are enabled
125  for (int en_bit = 0; en_bit < NumScalarInitFields; ++en_bit) {
126 
127  if (task->sgprBitEnabled(en_bit)) {
128  int physSgprIdx = 0;
129  uint32_t wiCount = 0;
130  uint32_t firstWave = 0;
131  int orderedAppendTerm = 0;
132  int numWfsInWg = 0;
133  uint32_t finalValue = 0;
134  Addr host_disp_pkt_addr = task->hostDispPktAddr();
135  Addr kernarg_addr = task->kernargAddr();
136  Addr hidden_priv_base(0);
137 
138  switch (en_bit) {
139  case PrivateSegBuf:
140  physSgprIdx =
141  computeUnit->registerManager->mapSgpr(this, regInitIdx);
142  computeUnit->srf[simdId]->write(physSgprIdx,
144  ++regInitIdx;
145  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
146  "Setting PrivateSegBuffer: s[%d] = %x\n",
148  wfSlotId, wfDynId, physSgprIdx,
150 
151  physSgprIdx =
152  computeUnit->registerManager->mapSgpr(this, regInitIdx);
153  computeUnit->srf[simdId]->write(physSgprIdx,
155  ++regInitIdx;
156  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
157  "Setting PrivateSegBuffer: s[%d] = %x\n",
159  wfSlotId, wfDynId, physSgprIdx,
161 
162  physSgprIdx =
163  computeUnit->registerManager->mapSgpr(this, regInitIdx);
164  computeUnit->srf[simdId]->write(physSgprIdx,
166  ++regInitIdx;
167  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
168  "Setting PrivateSegBuffer: s[%d] = %x\n",
170  wfSlotId, wfDynId, physSgprIdx,
172 
173  physSgprIdx =
174  computeUnit->registerManager->mapSgpr(this, regInitIdx);
175  computeUnit->srf[simdId]->write(physSgprIdx,
177 
178  ++regInitIdx;
179  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
180  "Setting PrivateSegBuffer: s[%d] = %x\n",
182  wfSlotId, wfDynId, physSgprIdx,
184  break;
185  case DispatchPtr:
186  physSgprIdx =
187  computeUnit->registerManager->mapSgpr(this, regInitIdx);
188  computeUnit->srf[simdId]->write(physSgprIdx,
189  bits(host_disp_pkt_addr, 31, 0));
190  ++regInitIdx;
191  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
192  "Setting DispatchPtr: s[%d] = %x\n",
194  wfSlotId, wfDynId, physSgprIdx,
195  bits(host_disp_pkt_addr, 31, 0));
196 
197  physSgprIdx =
198  computeUnit->registerManager->mapSgpr(this, regInitIdx);
199  computeUnit->srf[simdId]->write(physSgprIdx,
200  bits(host_disp_pkt_addr, 63, 32));
201  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
202  "Setting DispatchPtr: s[%d] = %x\n",
204  wfSlotId, wfDynId, physSgprIdx,
205  bits(host_disp_pkt_addr, 63, 32));
206 
207  ++regInitIdx;
208  break;
209  case QueuePtr:
210  physSgprIdx =
211  computeUnit->registerManager->mapSgpr(this, regInitIdx);
212  computeUnit->srf[simdId]->write(physSgprIdx,
213  bits(task->hostAMDQueueAddr, 31, 0));
214  ++regInitIdx;
215  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
216  "Setting QueuePtr: s[%d] = %x\n",
218  wfSlotId, wfDynId, physSgprIdx,
219  bits(task->hostAMDQueueAddr, 31, 0));
220 
221  physSgprIdx =
222  computeUnit->registerManager->mapSgpr(this, regInitIdx);
223  computeUnit->srf[simdId]->write(physSgprIdx,
224  bits(task->hostAMDQueueAddr, 63, 32));
225  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
226  "Setting QueuePtr: s[%d] = %x\n",
228  wfSlotId, wfDynId, physSgprIdx,
229  bits(task->hostAMDQueueAddr, 63, 32));
230 
231  ++regInitIdx;
232  break;
233  case KernargSegPtr:
234  physSgprIdx =
235  computeUnit->registerManager->mapSgpr(this, regInitIdx);
236  computeUnit->srf[simdId]->write(physSgprIdx,
237  bits(kernarg_addr, 31, 0));
238  ++regInitIdx;
239  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
240  "Setting KernargSegPtr: s[%d] = %x\n",
242  wfSlotId, wfDynId, physSgprIdx,
243  bits(kernarg_addr, 31, 0));
244 
245  physSgprIdx =
246  computeUnit->registerManager->mapSgpr(this, regInitIdx);
247  computeUnit->srf[simdId]->write(physSgprIdx,
248  bits(kernarg_addr, 63, 32));
249  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
250  "Setting KernargSegPtr: s[%d] = %x\n",
252  wfSlotId, wfDynId, physSgprIdx,
253  bits(kernarg_addr, 63, 32));
254 
255  ++regInitIdx;
256  break;
257  case FlatScratchInit:
258  physSgprIdx
259  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
260  computeUnit->srf[simdId]->write(physSgprIdx,
262  .scratch_backing_memory_location & 0xffffffff));
263  ++regInitIdx;
264  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
265  "Setting FlatScratch Addr: s[%d] = %x\n",
267  wfSlotId, wfDynId, physSgprIdx,
269  .scratch_backing_memory_location & 0xffffffff));
270 
271  physSgprIdx =
272  computeUnit->registerManager->mapSgpr(this, regInitIdx);
273  // This vallue should be sizeof(DWORD) aligned, that is
274  // 4 byte aligned
275  computeUnit->srf[simdId]->write(physSgprIdx,
277  ++regInitIdx;
278  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
279  "Setting FlatScratch size: s[%d] = %x\n",
281  wfSlotId, wfDynId, physSgprIdx,
306  hidden_priv_base =
307  (uint64_t)task->amdQueue.scratch_resource_descriptor[0] |
308  (((uint64_t)task->amdQueue.scratch_resource_descriptor[1]
309  & 0x000000000000ffff) << 32);
311  hidden_priv_base,
313  break;
314  case GridWorkgroupCountX:
315  physSgprIdx =
316  computeUnit->registerManager->mapSgpr(this, regInitIdx);
317  wiCount = ((task->gridSize(0) +
318  task->wgSize(0) - 1) /
319  task->wgSize(0));
320  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
321 
322  ++regInitIdx;
323  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
324  "Setting num WG X: s[%d] = %x\n",
326  wfSlotId, wfDynId, physSgprIdx, wiCount);
327  break;
328  case GridWorkgroupCountY:
329  physSgprIdx =
330  computeUnit->registerManager->mapSgpr(this, regInitIdx);
331  wiCount = ((task->gridSize(1) +
332  task->wgSize(1) - 1) /
333  task->wgSize(1));
334  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
335 
336  ++regInitIdx;
337  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
338  "Setting num WG Y: s[%d] = %x\n",
340  wfSlotId, wfDynId, physSgprIdx, wiCount);
341  break;
342  case GridWorkgroupCountZ:
343  physSgprIdx =
344  computeUnit->registerManager->mapSgpr(this, regInitIdx);
345  wiCount = ((task->gridSize(2) +
346  task->wgSize(2) - 1) /
347  task->wgSize(2));
348  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
349 
350  ++regInitIdx;
351  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
352  "Setting num WG Z: s[%d] = %x\n",
354  wfSlotId, wfDynId, physSgprIdx, wiCount);
355  break;
356  case WorkgroupIdX:
357  physSgprIdx =
358  computeUnit->registerManager->mapSgpr(this, regInitIdx);
359  computeUnit->srf[simdId]->write(physSgprIdx,
360  workGroupId[0]);
361 
362  ++regInitIdx;
363  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
364  "Setting WG ID X: s[%d] = %x\n",
366  wfSlotId, wfDynId, physSgprIdx, workGroupId[0]);
367  break;
368  case WorkgroupIdY:
369  physSgprIdx =
370  computeUnit->registerManager->mapSgpr(this, regInitIdx);
371  computeUnit->srf[simdId]->write(physSgprIdx,
372  workGroupId[1]);
373 
374  ++regInitIdx;
375  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
376  "Setting WG ID Y: s[%d] = %x\n",
378  wfSlotId, wfDynId, physSgprIdx, workGroupId[1]);
379  break;
380  case WorkgroupIdZ:
381  physSgprIdx =
382  computeUnit->registerManager->mapSgpr(this, regInitIdx);
383  computeUnit->srf[simdId]->write(physSgprIdx,
384  workGroupId[2]);
385 
386  ++regInitIdx;
387  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
388  "Setting WG ID Z: s[%d] = %x\n",
390  wfSlotId, wfDynId, physSgprIdx, workGroupId[2]);
391  break;
393  physSgprIdx =
394  computeUnit->registerManager->mapSgpr(this, regInitIdx);
408  computeUnit->srf[simdId]->write(physSgprIdx, 1024 *
409  (wgId * (wgSz / 64) + wfId) *
411 
412  ++regInitIdx;
413  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
414  "Setting Private Seg Offset: s[%d] = %x\n",
416  wfSlotId, wfDynId, physSgprIdx,
417  1024 * (wgId * (wgSz / 64) + wfId) *
419  break;
420  case WorkgroupInfo:
421  firstWave = (wfId == 0) ? 1 : 0;
422  numWfsInWg = divCeil(wgSizeInWorkItems,
423  computeUnit->wfSize());
424  finalValue = firstWave << ((sizeof(uint32_t) * 8) - 1);
425  finalValue |= (orderedAppendTerm << 6);
426  finalValue |= numWfsInWg;
427  physSgprIdx =
428  computeUnit->registerManager->mapSgpr(this, regInitIdx);
430  write(physSgprIdx, finalValue);
431 
432  ++regInitIdx;
433  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
434  "Setting WG Info: s[%d] = %x\n",
436  wfSlotId, wfDynId, physSgprIdx, finalValue);
437  break;
438  default:
439  fatal("SGPR enable bit %i not supported\n", en_bit);
440  break;
441  }
442  }
443  }
444 
445  regInitIdx = 0;
446 
447  // iterate over all the init fields and check which
448  // bits are enabled
449  for (int en_bit = 0; en_bit < NumVectorInitFields; ++en_bit) {
450  if (task->vgprBitEnabled(en_bit)) {
451  uint32_t physVgprIdx = 0;
453 
454  switch (en_bit) {
455  case WorkitemIdX:
456  {
457  physVgprIdx = computeUnit->registerManager
458  ->mapVgpr(this, regInitIdx);
459  TheGpuISA::VecElemU32 *vgpr_x
460  = raw_vgpr.as<TheGpuISA::VecElemU32>();
461 
462  for (int lane = 0; lane < workItemId[0].size(); ++lane) {
463  vgpr_x[lane] = workItemId[0][lane];
464  }
465 
466  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
467  rawDist[regInitIdx] = 0;
468  ++regInitIdx;
469  }
470  break;
471  case WorkitemIdY:
472  {
473  physVgprIdx = computeUnit->registerManager
474  ->mapVgpr(this, regInitIdx);
475  TheGpuISA::VecElemU32 *vgpr_y
476  = raw_vgpr.as<TheGpuISA::VecElemU32>();
477 
478  for (int lane = 0; lane < workItemId[1].size(); ++lane) {
479  vgpr_y[lane] = workItemId[1][lane];
480  }
481 
482  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
483  rawDist[regInitIdx] = 0;
484  ++regInitIdx;
485  }
486  break;
487  case WorkitemIdZ:
488  {
489  physVgprIdx = computeUnit->registerManager->
490  mapVgpr(this, regInitIdx);
491  TheGpuISA::VecElemU32 *vgpr_z
492  = raw_vgpr.as<TheGpuISA::VecElemU32>();
493 
494  for (int lane = 0; lane < workItemId[2].size(); ++lane) {
495  vgpr_z[lane] = workItemId[2][lane];
496  }
497 
498  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
499  rawDist[regInitIdx] = 0;
500  ++regInitIdx;
501  }
502  break;
503  }
504  }
505  }
506 }
507 
508 void
509 Wavefront::resizeRegFiles(int num_vregs, int num_sregs)
510 {
511  maxVgprs = num_vregs;
512  maxSgprs = num_sregs;
513 }
514 
516 {
517 }
518 
519 void
521 {
522  if (computeUnit->idleCUTimeout > 0) {
523  // Wavefront's status transitions to stalled or stopped
524  if ((newStatus == S_STOPPED || newStatus == S_STALLED ||
525  newStatus == S_WAITCNT || newStatus == S_BARRIER) &&
526  (status != newStatus)) {
527  computeUnit->idleWfs++;
528  assert(computeUnit->idleWfs <=
530  if (computeUnit->idleWfs ==
533  }
534  // Wavefront's status transitions to an active state (from
535  // a stopped or stalled state)
536  } else if ((status == S_STOPPED || status == S_STALLED ||
537  status == S_WAITCNT || status == S_BARRIER) &&
538  (status != newStatus)) {
539  // if all WFs in the CU were idle then check if the idleness
540  // period exceeded the timeout threshold
541  if (computeUnit->idleWfs ==
545  "CU%d has been idle for %d ticks at tick %d",
547  curTick());
548  }
549  computeUnit->idleWfs--;
550  assert(computeUnit->idleWfs >= 0);
551  }
552  }
553  status = newStatus;
554 }
555 
556 void
557 Wavefront::start(uint64_t _wf_dyn_id, Addr init_pc)
558 {
559  wfDynId = _wf_dyn_id;
560  _pc = init_pc;
561 
562  status = S_RUNNING;
563 
564  vecReads.resize(maxVgprs, 0);
565 }
566 
567 bool
569 {
570  if (ii->isGlobalMem() ||
571  (ii->isFlat() && ii->executedAs() == enums::SC_GLOBAL)) {
572  return true;
573  }
574 
575  return false;
576 }
577 
578 bool
580 {
581  if (ii->isLocalMem() ||
582  (ii->isFlat() && ii->executedAs() == enums::SC_GROUP)) {
583  return true;
584  }
585 
586  return false;
587 }
588 
589 bool
591 {
592  if (instructionBuffer.empty())
593  return false;
594 
595  GPUDynInstPtr ii = instructionBuffer.front();
596 
597  if (ii->isSleep()) {
598  return true;
599  }
600  return false;
601 }
602 
603 bool
605 {
606  if (instructionBuffer.empty())
607  return false;
608 
609  GPUDynInstPtr ii = instructionBuffer.front();
610 
611  if (ii->isWaitcnt()) {
612  // waitcnt is a scalar
613  assert(ii->isScalar());
614  return true;
615  }
616 
617  return false;
618 }
619 
620 bool
622 {
623  assert(!instructionBuffer.empty());
624  GPUDynInstPtr ii = instructionBuffer.front();
625 
626  if (status != S_STOPPED && ii->isScalar() && (ii->isNop() || ii->isReturn()
627  || ii->isEndOfKernel() || ii->isBranch() || ii->isALU() ||
628  (ii->isKernArgSeg() && ii->isLoad()))) {
629  return true;
630  }
631 
632  return false;
633 }
634 
635 bool
637 {
638  assert(!instructionBuffer.empty());
639  GPUDynInstPtr ii = instructionBuffer.front();
640 
641  if (status != S_STOPPED && !ii->isScalar() && (ii->isNop() ||
642  ii->isReturn() || ii->isBranch() || ii->isALU() || ii->isEndOfKernel()
643  || (ii->isKernArgSeg() && ii->isLoad()))) {
644  return true;
645  }
646 
647  return false;
648 }
649 
650 bool
652 {
653  assert(!instructionBuffer.empty());
654  GPUDynInstPtr ii = instructionBuffer.front();
655 
656  if (status != S_STOPPED && ii->isBarrier()) {
657  return true;
658  }
659 
660  return false;
661 }
662 
663 bool
665 {
666  assert(!instructionBuffer.empty());
667  GPUDynInstPtr ii = instructionBuffer.front();
668 
669  if (status != S_STOPPED && !ii->isScalar() && ii->isGlobalMem()) {
670  return true;
671  }
672 
673  return false;
674 }
675 
676 bool
678 {
679  assert(!instructionBuffer.empty());
680  GPUDynInstPtr ii = instructionBuffer.front();
681 
682  if (status != S_STOPPED && ii->isScalar() && ii->isGlobalMem()) {
683  return true;
684  }
685 
686  return false;
687 }
688 
689 bool
691 {
692  assert(!instructionBuffer.empty());
693  GPUDynInstPtr ii = instructionBuffer.front();
694 
695  if (status != S_STOPPED && ii->isLocalMem()) {
696  return true;
697  }
698 
699  return false;
700 }
701 
702 bool
704 {
705  assert(!instructionBuffer.empty());
706  GPUDynInstPtr ii = instructionBuffer.front();
707 
708  if (status != S_STOPPED && ii->isPrivateSeg()) {
709  return true;
710  }
711 
712  return false;
713 }
714 
715 bool
717 {
718  assert(!instructionBuffer.empty());
719  GPUDynInstPtr ii = instructionBuffer.front();
720 
721  if (status != S_STOPPED && ii->isFlat()) {
722  return true;
723  }
724 
725  return false;
726 }
727 
728 bool
730 {
731  for (auto it : instructionBuffer) {
732  GPUDynInstPtr ii = it;
733  if (ii->isReturn() || ii->isBranch() ||
734  ii->isEndOfKernel()) {
735  return true;
736  }
737  }
738 
739  return false;
740 }
741 
742 void
744 {
745  execUnitId = -1;
746 }
747 
749 {
751  wrLmReqsInPipe < 0 || rdLmReqsInPipe < 0 ||
752  outstandingReqs < 0,
753  "Negative requests in pipe for WF%d for slot%d"
754  " and SIMD%d: Rd GlobalMem Reqs=%d, Wr GlobalMem Reqs=%d,"
755  " Rd LocalMem Reqs=%d, Wr LocalMem Reqs=%d,"
756  " Outstanding Reqs=%d\n",
759 }
760 
761 void
763 {
764  if (!ii->isScalar()) {
765  if (ii->isLoad()) {
766  rdGmReqsInPipe++;
767  } else if (ii->isStore()) {
768  wrGmReqsInPipe++;
769  } else if (ii->isAtomic() || ii->isMemSync()) {
770  rdGmReqsInPipe++;
771  wrGmReqsInPipe++;
772  } else {
773  panic("Invalid memory operation!\n");
774  }
776  } else {
777  if (ii->isLoad()) {
779  } else if (ii->isStore()) {
781  } else if (ii->isAtomic() || ii->isMemSync()) {
784  } else {
785  panic("Invalid memory operation!\n");
786  }
788  }
789 }
790 
791 void
793 {
794  fatal_if(ii->isScalar(),
795  "Scalar instructions can not access Shared memory!!!");
796  if (ii->isLoad()) {
797  rdLmReqsInPipe++;
798  } else if (ii->isStore()) {
799  wrLmReqsInPipe++;
800  } else if (ii->isAtomic() || ii->isMemSync()) {
801  wrLmReqsInPipe++;
802  rdLmReqsInPipe++;
803  } else {
804  panic("Invalid memory operation!\n");
805  }
807 }
808 
811 {
812  // vector of execution unit IDs to return to schedule stage
813  // this return is only used for debugging and an assertion...
814  std::vector<int> execUnitIds;
815 
816  // Get current instruction
817  GPUDynInstPtr ii = instructionBuffer.front();
818  assert(ii);
819 
820  // Single precision ALU or Branch or Return or Special instruction
821  if (ii->isALU() || ii->isSpecialOp() ||
822  ii->isBranch() || ii->isNop() ||
823  (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
824  ii->isReturn() || ii->isEndOfKernel()) {
825  if (!ii->isScalar()) {
826  execUnitId = simdId;
827  } else {
829  }
830  // this is to enforce a fixed number of cycles per issue slot per SIMD
831  } else if (ii->isBarrier()) {
832  execUnitId = ii->isScalar() ? scalarAluGlobalIdx : simdId;
833  } else if (ii->isFlat()) {
834  assert(!ii->isScalar());
835  reserveLmResource(ii);
836  // add execUnitId, reserved by reserveLmResource, list before it is
837  // overwriten by reserveGmResource
838  execUnitIds.push_back(execUnitId);
840  reserveGmResource(ii);
842  execUnitIds.push_back(flatGmUnitId);
843  execUnitId = -1;
844  } else if (ii->isGlobalMem()) {
845  reserveGmResource(ii);
846  } else if (ii->isLocalMem()) {
847  reserveLmResource(ii);
848  } else if (ii->isPrivateSeg()) {
849  fatal_if(ii->isScalar(),
850  "Scalar instructions can not access Private memory!!!");
851  reserveGmResource(ii);
852  } else {
853  panic("reserveResources -> Couldn't process op!\n");
854  }
855 
856  if (execUnitId != -1) {
857  execUnitIds.push_back(execUnitId);
858  }
859  assert(execUnitIds.size());
860  return execUnitIds;
861 }
862 
863 void
865 {
866  // ---- Exit if wavefront is inactive ----------------------------- //
867 
868  if (status == S_STOPPED || status == S_RETURNING ||
869  status==S_STALLED || instructionBuffer.empty()) {
870  return;
871  }
872 
873  if (status == S_WAITCNT) {
885  assert(isOldestInstWaitcnt());
886  }
887 
888  // Get current instruction
889 
890  GPUDynInstPtr ii = instructionBuffer.front();
891 
892  const Addr old_pc = pc();
893  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
894  "(pc: %#x; seqNum: %d)\n", computeUnit->cu_id, simdId, wfSlotId,
895  wfDynId, ii->disassemble(), old_pc, ii->seqNum());
896 
897  ii->execute(ii);
898  // delete the dynamic instruction from the pipeline map
900  // update the instruction stats in the CU
902 
903  // inform VRF of instruction execution to schedule write-back
904  // and scoreboard ready for registers
905  if (!ii->isScalar()) {
906  computeUnit->vrf[simdId]->waveExecuteInst(this, ii);
907  }
908  computeUnit->srf[simdId]->waveExecuteInst(this, ii);
909 
910  computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecRegOperands());
911  computeUnit->shader->incVectorInstDstOperand(ii->numDstVecRegOperands());
920 
921  if (lastInstExec) {
924  }
926 
927  // want to track:
928  // number of reads that occur per value written
929 
930  // vector RAW dependency tracking
931  for (const auto& srcVecOp : ii->srcVecRegOperands()) {
932  for (const auto& virtIdx : srcVecOp.virtIndices()) {
933  // This check should never fail, but to be safe we check
934  if (rawDist.find(virtIdx) != rawDist.end()) {
936  rawDist[virtIdx]);
937  }
938  // increment number of reads to this register
939  vecReads[virtIdx]++;
940  }
941  }
942 
943  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
944  for (const auto& virtIdx : dstVecOp.virtIndices()) {
945  // rawDist is set on writes, but will not be set for the first
946  // write to each physical register
947  if (rawDist.find(virtIdx) != rawDist.end()) {
948  // Sample the number of reads that were performed
950  }
951  // on a write, reset count of reads to 0
952  vecReads[virtIdx] = 0;
953 
954  rawDist[virtIdx] = stats.numInstrExecuted.value();
955  }
956  }
957 
958  if (pc() == old_pc) {
959  // PC not modified by instruction, proceed to next
960  _gpuISA.advancePC(ii);
961  instructionBuffer.pop_front();
962  } else {
963  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave%d %s taken branch\n",
965  ii->disassemble());
966  discardFetch();
967  }
968  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] (pc: %#x)\n",
970 
972  const int num_active_lanes = execMask().count();
974  computeUnit->stats.numVecOpsExecuted += num_active_lanes;
975 
976  if (ii->isF16() && ii->isALU()) {
977  if (ii->isF32() || ii->isF64()) {
978  fatal("Instruction is tagged as both (1) F16, and (2)"
979  "either F32 or F64.");
980  }
981  computeUnit->stats.numVecOpsExecutedF16 += num_active_lanes;
982  if (ii->isFMA()) {
983  computeUnit->stats.numVecOpsExecutedFMA16 += num_active_lanes;
985  += num_active_lanes;
986  }
987  else if (ii->isMAC()) {
988  computeUnit->stats.numVecOpsExecutedMAC16 += num_active_lanes;
990  += num_active_lanes;
991  }
992  else if (ii->isMAD()) {
993  computeUnit->stats.numVecOpsExecutedMAD16 += num_active_lanes;
995  += num_active_lanes;
996  }
997  }
998  if (ii->isF32() && ii->isALU()) {
999  if (ii->isF16() || ii->isF64()) {
1000  fatal("Instruction is tagged as both (1) F32, and (2)"
1001  "either F16 or F64.");
1002  }
1003  computeUnit->stats.numVecOpsExecutedF32 += num_active_lanes;
1004  if (ii->isFMA()) {
1005  computeUnit->stats.numVecOpsExecutedFMA32 += num_active_lanes;
1007  += num_active_lanes;
1008  }
1009  else if (ii->isMAC()) {
1010  computeUnit->stats.numVecOpsExecutedMAC32 += num_active_lanes;
1012  += num_active_lanes;
1013  }
1014  else if (ii->isMAD()) {
1015  computeUnit->stats.numVecOpsExecutedMAD32 += num_active_lanes;
1017  += num_active_lanes;
1018  }
1019  }
1020  if (ii->isF64() && ii->isALU()) {
1021  if (ii->isF16() || ii->isF32()) {
1022  fatal("Instruction is tagged as both (1) F64, and (2)"
1023  "either F16 or F32.");
1024  }
1025  computeUnit->stats.numVecOpsExecutedF64 += num_active_lanes;
1026  if (ii->isFMA()) {
1027  computeUnit->stats.numVecOpsExecutedFMA64 += num_active_lanes;
1029  += num_active_lanes;
1030  }
1031  else if (ii->isMAC()) {
1032  computeUnit->stats.numVecOpsExecutedMAC64 += num_active_lanes;
1034  += num_active_lanes;
1035  }
1036  else if (ii->isMAD()) {
1037  computeUnit->stats.numVecOpsExecutedMAD64 += num_active_lanes;
1039  += num_active_lanes;
1040  }
1041  }
1042  if (isGmInstruction(ii)) {
1044  num_active_lanes);
1045  } else if (isLmInstruction(ii)) {
1047  num_active_lanes);
1048  }
1049  }
1050 
1055  if (execMask().none() && ii->isFlat()) {
1057  return;
1058  }
1059 
1060  // Update Vector ALU pipeline and other resources
1061  bool flat_as_gm = false;
1062  bool flat_as_lm = false;
1063  if (ii->isFlat()) {
1064  flat_as_gm = (ii->executedAs() == enums::SC_GLOBAL) ||
1065  (ii->executedAs() == enums::SC_PRIVATE);
1066  flat_as_lm = (ii->executedAs() == enums::SC_GROUP);
1067  }
1068 
1069  // Single precision ALU or Branch or Return or Special instruction
1070  // Note, we use the same timing regardless of SP or DP ALU operation.
1071  if (ii->isALU() || ii->isSpecialOp() ||
1072  ii->isBranch() || ii->isNop() ||
1073  (ii->isKernArgSeg() && ii->isLoad()) ||
1074  ii->isArgSeg() || ii->isEndOfKernel() || ii->isReturn()) {
1075  // this is to enforce a fixed number of cycles per issue slot per SIMD
1076  if (!ii->isScalar()) {
1078  cyclesToTicks(computeUnit->issuePeriod));
1079  } else {
1081  cyclesToTicks(computeUnit->issuePeriod));
1082  }
1083  // Barrier on Scalar ALU
1084  } else if (ii->isBarrier()) {
1086  cyclesToTicks(computeUnit->issuePeriod));
1087  // GM or Flat as GM Load
1088  } else if (ii->isLoad() && (ii->isGlobalMem() || flat_as_gm)) {
1089  if (!ii->isScalar()) {
1096  } else {
1098  cyclesToTicks(computeUnit->srf_scm_bus_latency));
1103  }
1104  // GM or Flat as GM Store
1105  } else if (ii->isStore() && (ii->isGlobalMem() || flat_as_gm)) {
1106  if (!ii->isScalar()) {
1108  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1113  } else {
1115  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1120  }
1121  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1122  (ii->isGlobalMem() || flat_as_gm)) {
1123  if (!ii->isScalar()) {
1125  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1130  } else {
1132  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1137  }
1138  // LM or Flat as LM Load
1139  } else if (ii->isLoad() && (ii->isLocalMem() || flat_as_lm)) {
1141  cyclesToTicks(computeUnit->vrf_lm_bus_latency));
1146  // LM or Flat as LM Store
1147  } else if (ii->isStore() && (ii->isLocalMem() || flat_as_lm)) {
1149  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1154  // LM or Flat as LM, Atomic or MemFence
1155  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1156  (ii->isLocalMem() || flat_as_lm)) {
1158  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1163  } else {
1164  panic("Bad instruction type!\n");
1165  }
1166 }
1167 
1170 {
1171  // Read next instruction from instruction buffer
1172  GPUDynInstPtr ii = instructionBuffer.front();
1173  // if the WF has been dispatched in the schedule stage then
1174  // check the next oldest instruction for readiness
1175  if (computeUnit->pipeMap.find(ii->seqNum()) !=
1176  computeUnit->pipeMap.end()) {
1177  if (instructionBuffer.size() > 1) {
1178  auto it = instructionBuffer.begin() + 1;
1179  return *it;
1180  } else { // No new instructions to check
1181  return nullptr;
1182  }
1183  }
1184  return ii;
1185 }
1186 
1187 void
1189 {
1190  instructionBuffer.clear();
1192 
1198 }
1199 
1200 bool
1202 {
1203  // Both vmWaitCnt && lgkmWaitCnt uninitialized means
1204  // waitCnt instruction has been dispatched but not executed yet: next
1205  // instruction should be blocked until waitCnt is executed.
1206  if (vmWaitCnt == -1 && expWaitCnt == -1 && lgkmWaitCnt == -1) {
1207  return false;
1208  }
1209 
1215  if (vmWaitCnt != -1) {
1216  if (vmemInstsIssued > vmWaitCnt) {
1217  // vmWaitCnt not satisfied
1218  return false;
1219  }
1220  }
1221 
1222  if (expWaitCnt != -1) {
1223  if (expInstsIssued > expWaitCnt) {
1224  // expWaitCnt not satisfied
1225  return false;
1226  }
1227  }
1228 
1229  if (lgkmWaitCnt != -1) {
1230  if (lgkmInstsIssued > lgkmWaitCnt) {
1231  // lgkmWaitCnt not satisfied
1232  return false;
1233  }
1234  }
1235 
1236  // if we get here all outstanding waitcnts must
1237  // be satisfied, so we resume normal operation
1238  clearWaitCnts();
1239 
1240  return true;
1241 }
1242 
1243 bool
1245 {
1246  assert(status == S_STALLED_SLEEP);
1247 
1248  // if the sleep count has not been set, then the sleep instruction has not
1249  // been executed yet, so we will return true without setting the wavefront
1250  // status
1251  if (sleepCnt == 0)
1252  return false;
1253 
1254  sleepCnt--;
1255  if (sleepCnt != 0)
1256  return false;
1257 
1258  status = S_RUNNING;
1259  return true;
1260 }
1261 
1262 void
1264 {
1265  assert(sleepCnt == 0);
1266  sleepCnt = sleep_time;
1267 }
1268 
1269 void
1270 Wavefront::setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
1271 {
1272  // the scoreboard should have set the status
1273  // to S_WAITCNT once a waitcnt instruction
1274  // was marked as ready
1275  assert(status == S_WAITCNT);
1276 
1277  // waitcnt instruction shouldn't be sending
1278  // negative counts
1279  assert(vm_wait_cnt >= 0);
1280  assert(exp_wait_cnt >= 0);
1281  assert(lgkm_wait_cnt >= 0);
1282  // waitcnts are a max of 15 because we have
1283  // only 1 nibble (4 bits) to set the counts
1284  assert(vm_wait_cnt <= 0xf);
1285  assert(exp_wait_cnt <= 0x7);
1286  assert(lgkm_wait_cnt <= 0x1f);
1287 
1294  assert(vmWaitCnt == -1);
1295  assert(expWaitCnt == -1);
1296  assert(lgkmWaitCnt == -1);
1297 
1304  if (vm_wait_cnt != 0xf)
1305  vmWaitCnt = vm_wait_cnt;
1306 
1307  if (exp_wait_cnt != 0x7)
1308  expWaitCnt = exp_wait_cnt;
1309 
1310  if (lgkm_wait_cnt != 0x1f)
1311  lgkmWaitCnt = lgkm_wait_cnt;
1312 }
1313 
1314 void
1316 {
1317  // reset the waitcnts back to
1318  // -1, indicating they are no
1319  // longer valid
1320  vmWaitCnt = -1;
1321  expWaitCnt = -1;
1322  lgkmWaitCnt = -1;
1323 
1324  // resume running normally
1325  status = S_RUNNING;
1326 }
1327 
1328 void
1330 {
1331  ++vmemInstsIssued;
1332 }
1333 
1334 void
1336 {
1337  ++expInstsIssued;
1338 }
1339 
1340 void
1342 {
1343  ++lgkmInstsIssued;
1344 }
1345 
1346 void
1348 {
1349  --vmemInstsIssued;
1350 }
1351 
1352 void
1354 {
1355  --expInstsIssued;
1356 }
1357 
1358 void
1360 {
1361  --lgkmInstsIssued;
1362 }
1363 
1364 Addr
1366 {
1367  return _pc;
1368 }
1369 
1370 void
1372 {
1373  _pc = new_pc;
1374 }
1375 
1376 VectorMask&
1378 {
1379  return _execMask;
1380 }
1381 
1382 bool
1383 Wavefront::execMask(int lane) const
1384 {
1385  return _execMask[lane];
1386 }
1387 
1388 void
1390 {
1391  /* clear busy registers */
1392  for (int i=0; i < maxVgprs; i++) {
1393  int vgprIdx = computeUnit->registerManager->mapVgpr(this, i);
1394  computeUnit->vrf[simdId]->markReg(vgprIdx, false);
1395  }
1396 
1397  /* Free registers used by this wavefront */
1398  uint32_t endIndex = (startVgprIndex + reservedVectorRegs - 1) %
1399  computeUnit->vrf[simdId]->numRegs();
1401  freeRegion(startVgprIndex, endIndex);
1402 }
1403 
1404 void
1406 {
1407  actualWgSzTotal = 1;
1408  for (int d = 0; d < HSAQueueEntry::MAX_DIM; ++d) {
1409  actualWgSz[d] = std::min(workGroupSz[d], gridSz[d]
1410  - task->wgId(d) * workGroupSz[d]);
1412  }
1413 }
1414 
1415 void
1417 {
1418  assert(bar_id >= WFBarrier::InvalidID);
1419  assert(bar_id < computeUnit->numBarrierSlots());
1420  barId = bar_id;
1421 }
1422 
1423 int
1425 {
1426  return barId;
1427 }
1428 
1429 bool
1431 {
1432  return barId > WFBarrier::InvalidID;
1433 }
1434 
1435 void
1437 {
1439 }
1440 
1442  : statistics::Group(parent),
1443  ADD_STAT(numInstrExecuted,
1444  "number of instructions executed by this WF slot"),
1445  ADD_STAT(schCycles, "number of cycles spent in schedule stage"),
1446  ADD_STAT(schStalls, "number of cycles WF is stalled in SCH stage"),
1447  ADD_STAT(schRfAccessStalls, "number of cycles wave selected in SCH but "
1448  "RF denied adding instruction"),
1449  ADD_STAT(schResourceStalls, "number of cycles stalled in sch by resource"
1450  " not available"),
1451  ADD_STAT(schOpdNrdyStalls, "number of cycles stalled in sch waiting for "
1452  "RF reads to complete"),
1453  ADD_STAT(schLdsArbStalls,
1454  "number of cycles wave stalled due to LDS-VRF arbitration"),
1455  // FIXME: the name of the WF needs to be unique
1456  ADD_STAT(numTimesBlockedDueWAXDependencies, "number of times the wf's "
1457  "instructions are blocked due to WAW or WAR dependencies"),
1458  // FIXME: the name of the WF needs to be unique
1459  ADD_STAT(numTimesBlockedDueRAWDependencies, "number of times the wf's "
1460  "instructions are blocked due to RAW dependencies"),
1461  ADD_STAT(vecRawDistance,
1462  "Count of RAW distance in dynamic instructions for this WF"),
1463  ADD_STAT(readsPerWrite, "Count of Vector reads per write for this WF")
1464 {
1465  vecRawDistance.init(0, 20, 1);
1466  readsPerWrite.init(0, 4, 1);
1467 }
1468 
1469 } // namespace gem5
gem5::Wavefront::expWaitCnt
int expWaitCnt
Definition: wavefront.hh:324
gem5::Wavefront::workItemFlatId
std::vector< uint32_t > workItemFlatId
Definition: wavefront.hh:157
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:189
gem5::Wavefront::vecReads
std::vector< int > vecReads
Definition: wavefront.hh:239
gem5::Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:74
gem5::NumVectorInitFields
@ NumVectorInitFields
Definition: kernel_code.hh:81
gem5::Wavefront::vmemInstsIssued
int vmemInstsIssued
Definition: wavefront.hh:326
gem5::FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:63
gem5::Wavefront::lastAddr
std::vector< Addr > lastAddr
Definition: wavefront.hh:155
gem5::Wavefront::isOldestInstFlatMem
bool isOldestInstFlatMem()
Definition: wavefront.cc:716
gem5::Wavefront::computeActualWgSz
void computeActualWgSz(HSAQueueEntry *task)
Definition: wavefront.cc:1405
simple_pool_manager.hh
gem5::Wavefront::S_RUNNING
@ S_RUNNING
Definition: wavefront.hh:72
gem5::ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:282
gem5::ComputeUnit::ComputeUnitStats::instInterleave
statistics::VectorDistribution instInterleave
Definition: compute_unit.hh:1090
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
statistics::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1060
gem5::WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:79
gem5::ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:325
gem5::FetchUnit::flushBuf
void flushBuf(int wfSlotId)
Definition: fetch_unit.cc:311
gem5::Wavefront::setSleepTime
void setSleepTime(int sleep_time)
Definition: wavefront.cc:1263
shader.hh
gem5::Wavefront::releaseBarrier
void releaseBarrier()
Definition: wavefront.cc:1436
gem5::Wavefront::flatLmUnitId
int flatLmUnitId
Definition: wavefront.hh:105
gem5::Wavefront::isOldestInstBarrier
bool isOldestInstBarrier()
Definition: wavefront.cc:651
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:299
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:281
gem5::Wavefront::oldVgpr
std::vector< uint32_t > oldVgpr
Definition: wavefront.hh:207
gem5::Wavefront::_execMask
VectorMask _execMask
Definition: wavefront.hh:332
gem5::Wavefront::maxSgprs
uint32_t maxSgprs
Definition: wavefront.hh:135
gem5::Wavefront::exec
void exec()
Definition: wavefront.cc:864
gem5::Gcn3ISA::VecElemU32
uint32_t VecElemU32
Definition: gpu_registers.hh:167
gem5::Wavefront::oldDgpr
std::vector< uint64_t > oldDgpr
Definition: wavefront.hh:214
gem5::PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:58
gem5::Wavefront::ldsChunk
LdsChunk * ldsChunk
Definition: wavefront.hh:225
gem5::Wavefront::isOldestInstLMem
bool isOldestInstLMem()
Definition: wavefront.cc:690
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
statistics::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1046
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:61
compute_unit.hh
gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:76
gem5::Wavefront::stopFetch
bool stopFetch()
Definition: wavefront.cc:729
gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:47
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:71
gem5::Wavefront::isOldestInstVectorALU
bool isOldestInstVectorALU()
Definition: wavefront.cc:636
gem5::Wavefront::setWaitCnts
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
Definition: wavefront.cc:1270
gem5::Wavefront::pendingFetch
bool pendingFetch
Definition: wavefront.hh:113
gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:241
gem5::ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
statistics::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:982
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:169
gem5::HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:211
gem5::Wavefront::pc
Addr pc() const
Definition: wavefront.cc:1365
gem5::Wavefront::WavefrontStats::WavefrontStats
WavefrontStats(statistics::Group *parent)
Definition: wavefront.cc:1441
gem5::Wavefront::startSgprIndex
uint32_t startSgprIndex
Definition: wavefront.hh:204
std::vector< int >
gem5::Wavefront::lastNonIdleTick
Tick lastNonIdleTick
Definition: wavefront.hh:116
gem5::Wavefront::initRegState
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition: wavefront.cc:119
gem5::Wavefront::reservedVectorRegs
int reservedVectorRegs
Definition: wavefront.hh:196
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
statistics::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1049
gem5::Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:68
gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:225
gem5::Wavefront::wgSz
uint32_t wgSz
Definition: wavefront.hh:163
gem5::PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:72
gem5::RegisterManager::vrfPoolMgrs
std::vector< PoolManager * > vrfPoolMgrs
Definition: register_manager.hh:82
gem5::Wavefront::scalarRdGmReqsInPipe
int scalarRdGmReqsInPipe
Definition: wavefront.hh:190
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
statistics::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1070
gem5::Wavefront::actualWgSz
uint32_t actualWgSz[3]
Definition: wavefront.hh:165
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:66
gem5::Wavefront::workGroupSz
uint32_t workGroupSz[3]
Definition: wavefront.hh:160
gem5::HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:185
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:355
gem5::Wavefront::_gpuISA
TheGpuISA::GPUISA _gpuISA
Definition: wavefront.hh:302
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
statistics::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1058
gem5::NumScalarInitFields
@ NumScalarInitFields
Definition: kernel_code.hh:73
gem5::ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:315
gem5::statistics::none
const FlagsType none
Nothing extra to print.
Definition: info.hh:54
gem5::Wavefront::_pc
Addr _pc
Definition: wavefront.hh:331
gem5::Wavefront::rawDist
std::unordered_map< int, uint64_t > rawDist
Definition: wavefront.hh:235
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:294
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1053
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
statistics::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1057
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1325
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:297
gem5::ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
statistics::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:981
wavefront.hh
gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:309
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:100
gem5::Wavefront::isOldestInstSleep
bool isOldestInstSleep()
Definition: wavefront.cc:590
gem5::Wavefront::workItemId
std::vector< uint32_t > workItemId[3]
Definition: wavefront.hh:156
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::Wavefront::flatGmUnitId
int flatGmUnitId
Definition: wavefront.hh:106
gem5::WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:69
gem5::WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:70
gem5::ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:346
gem5::Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition: wavefront.hh:188
gem5::Wavefront::oldVgprTcnt
uint64_t oldVgprTcnt
Definition: wavefront.hh:211
gem5::Wavefront::clearWaitCnts
void clearWaitCnts()
Definition: wavefront.cc:1315
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:162
gem5::Wavefront::wfId
uint32_t wfId
Definition: wavefront.hh:169
gem5::ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:345
gem5::Wavefront::isGmInstruction
bool isGmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:568
gem5::Wavefront::setStatus
void setStatus(status_e newStatus)
Definition: wavefront.cc:520
gem5::RegisterManager::mapSgpr
int mapSgpr(Wavefront *w, int sgprIndex)
Definition: register_manager.cc:104
gem5::Wavefront::start
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition: wavefront.cc:557
gem5::HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:312
gem5::Wavefront::freeRegisterFile
void freeRegisterFile()
Freeing VRF space.
Definition: wavefront.cc:1389
gem5::Wavefront::validateRequestCounters
void validateRequestCounters()
Definition: wavefront.cc:748
gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:246
vector_register_file.hh
gem5::WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:78
gem5::Wavefront::reservedScalarRegs
int reservedScalarRegs
Definition: wavefront.hh:198
bitfield.hh
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2110
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecuted
statistics::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1040
gem5::Wavefront::wrGmReqsInPipe
int wrGmReqsInPipe
Definition: wavefront.hh:189
gem5::WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:99
gem5::Wavefront::outstandingReqs
int outstandingReqs
Definition: wavefront.hh:173
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::Wavefront::isOldestInstGMem
bool isOldestInstGMem()
Definition: wavefront.cc:664
gem5::Wavefront::lgkmInstsIssued
int lgkmInstsIssued
Definition: wavefront.hh:328
gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:235
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
statistics::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1069
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:243
gem5::Wavefront::resizeRegFiles
void resizeRegFiles(int num_vregs, int num_sregs)
Definition: wavefront.cc:509
gem5::ArmISA::d
Bitfield< 9 > d
Definition: misc_types.hh:63
gem5::Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition: wavefront.hh:186
gem5::Wavefront::oldDgprTcnt
uint64_t oldDgprTcnt
Definition: wavefront.hh:218
gem5::Wavefront::incLGKMInstsIssued
void incLGKMInstsIssued()
Definition: wavefront.cc:1341
gem5::HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:288
gem5::KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:61
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
statistics::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1054
gem5::MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:326
gem5::Wavefront::scalarWrGmReqsInPipe
int scalarWrGmReqsInPipe
Definition: wavefront.hh:191
gem5::Wavefront::scalarOutstandingReqsRdGm
int scalarOutstandingReqsRdGm
Definition: wavefront.hh:183
gem5::Wavefront::scalarMem
int scalarMem
Definition: wavefront.hh:130
gem5::Wavefront::memTraceBusy
int memTraceBusy
Definition: wavefront.hh:193
gem5::Wavefront::wfSlotId
const int wfSlotId
Definition: wavefront.hh:98
gem5::Wavefront::execUnitId
int execUnitId
Definition: wavefront.hh:104
gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:280
gem5::ComputeUnit::ComputeUnitStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: compute_unit.hh:1035
gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:94
gem5::HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:167
gem5::Wavefront::scalarOutstandingReqsWrGm
int scalarOutstandingReqsWrGm
Definition: wavefront.hh:185
scalar_register_file.hh
gem5::ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:322
gpu_dyn_inst.hh
gem5::Wavefront::Wavefront
Wavefront(const Params &p)
Definition: wavefront.cc:50
gem5::HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:123
gem5::_amd_queue_t::scratch_workitem_byte_size
uint32_t scratch_workitem_byte_size
Definition: hsa_queue.hh:86
gem5::Wavefront::~Wavefront
~Wavefront()
Definition: wavefront.cc:515
gem5::Wavefront::expInstsIssued
int expInstsIssued
Definition: wavefront.hh:327
gem5::Wavefront::waitCntsSatisfied
bool waitCntsSatisfied()
Definition: wavefront.cc:1201
gem5::_amd_queue_t::scratch_resource_descriptor
uint32_t scratch_resource_descriptor[4]
Definition: hsa_queue.hh:83
gem5::Wavefront::isOldestInstWaitcnt
bool isOldestInstWaitcnt()
Definition: wavefront.cc:604
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:289
gem5::ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:273
gem5::ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:509
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::Wavefront::reserveLmResource
void reserveLmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:792
gem5::HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:130
gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:251
gem5::GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:65
gem5::Wavefront::lastTrace
uint64_t lastTrace
Definition: wavefront.hh:194
gem5::Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1169
gem5::ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:328
gem5::Wavefront::discardFetch
void discardFetch()
Definition: wavefront.cc:1188
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:228
gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:302
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:108
gem5::Wavefront::execMask
VectorMask & execMask()
Definition: wavefront.cc:1377
gem5::WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:80
gem5::GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:67
gem5::Wavefront::barrierId
int barrierId() const
Definition: wavefront.cc:1424
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:839
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
statistics::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1042
gem5::Wavefront::decExpInstsIssued
void decExpInstsIssued()
Definition: wavefront.cc:1353
gem5::Wavefront::Params
WavefrontParams Params
Definition: wavefront.hh:246
gem5::Wavefront::WavefrontStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: wavefront.hh:342
gem5::FetchStage::fetchUnit
FetchUnit & fetchUnit(int simdId)
Definition: fetch_stage.hh:68
gem5::RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition: register_manager.cc:97
gem5::ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:255
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:396
gem5::Wavefront::decVMemInstsIssued
void decVMemInstsIssued()
Definition: wavefront.cc:1347
gem5::ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:278
gem5::Wavefront::rdGmReqsInPipe
int rdGmReqsInPipe
Definition: wavefront.hh:187
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::Wavefront::outstandingReqsWrLm
int outstandingReqsWrLm
Definition: wavefront.hh:177
gem5::Wavefront::hasBarrier
bool hasBarrier() const
Definition: wavefront.cc:1430
gem5::ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1805
gem5::ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
statistics::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:983
gem5::Wavefront::incExpInstsIssued
void incExpInstsIssued()
Definition: wavefront.cc:1335
gem5::QueuePtr
@ QueuePtr
Definition: kernel_code.hh:60
gem5::WaitClass::set
void set(uint64_t i)
Definition: misc.hh:84
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:203
gem5::Wavefront::decLGKMInstsIssued
void decLGKMInstsIssued()
Definition: wavefront.cc:1359
gem5::Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:70
gem5::Wavefront::vmWaitCnt
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition: wavefront.hh:323
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
statistics::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1056
gem5::Wavefront::reserveResources
std::vector< int > reserveResources()
Definition: wavefront.cc:810
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
statistics::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1052
gem5::WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:68
gem5::Wavefront::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: wavefront.cc:104
gem5::Wavefront::globalMem
int globalMem
Definition: wavefront.hh:128
gem5::ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:318
gem5::Wavefront::outstandingReqsRdLm
int outstandingReqsRdLm
Definition: wavefront.hh:181
gem5::ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
statistics::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1068
gem5::Wavefront::gridSz
uint32_t gridSz[3]
Definition: wavefront.hh:161
gem5::Wavefront::actualWgSzTotal
uint32_t actualWgSzTotal
Definition: wavefront.hh:166
gem5::Wavefront::lgkmWaitCnt
int lgkmWaitCnt
Definition: wavefront.hh:325
gem5::Wavefront::scalarAlu
int scalarAlu
Definition: wavefront.hh:123
gem5::GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:66
gem5::ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:281
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
statistics::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1050
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:90
gem5::WFBarrier
WF barrier slots.
Definition: compute_unit.hh:92
gem5::DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:59
gem5::HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:293
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:205
gem5::TokenManager::recvTokens
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
Definition: token_port.cc:157
gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:233
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::ComputeUnit::ComputeUnitStats::execRateDist
statistics::Distribution execRateDist
Definition: compute_unit.hh:1038
gem5::Wavefront::maxVgprs
uint32_t maxVgprs
Definition: wavefront.hh:133
gem5::Wavefront::outstandingReqsWrGm
int outstandingReqsWrGm
Definition: wavefront.hh:175
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
statistics::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1044
gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:247
gem5::Wavefront::isOldestInstScalarALU
bool isOldestInstScalarALU()
Definition: wavefront.cc:621
gem5::Wavefront::WavefrontStats::readsPerWrite
statistics::Distribution readsPerWrite
Definition: wavefront.hh:378
gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:111
gem5::Wavefront::isLmInstruction
bool isLmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:579
gem5::Wavefront::stats
gem5::Wavefront::WavefrontStats stats
gem5::Wavefront::outstandingReqsRdGm
int outstandingReqsRdGm
Definition: wavefront.hh:179
gem5::Wavefront::sleepCnt
int sleepCnt
Definition: wavefront.hh:329
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:192
gem5::Wavefront::freeResources
void freeResources()
Definition: wavefront.cc:743
gem5::Wavefront::isOldestInstPrivMem
bool isOldestInstPrivMem()
Definition: wavefront.cc:703
gem5::Wavefront::status_e
status_e
Definition: wavefront.hh:65
gem5::Wavefront::sleepDone
bool sleepDone()
Definition: wavefront.cc:1244
gem5::Wavefront::incVMemInstsIssued
void incVMemInstsIssued()
Definition: wavefront.cc:1329
gem5::_amd_queue_t::scratch_backing_memory_location
uint64_t scratch_backing_memory_location
Definition: hsa_queue.hh:84
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:225
gem5::_amd_queue_t::compute_tmpring_size_wavesize
uint32_t compute_tmpring_size_wavesize
Definition: hsa_queue.hh:81
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40
gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:227
gem5::ComputeUnit::ComputeUnitStats::totalCycles
statistics::Scalar totalCycles
Definition: compute_unit.hh:1062
gem5::Wavefront::startVgprIndex
uint32_t startVgprIndex
Definition: wavefront.hh:201
gem5::ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:266
gem5::Wavefront::barId
int barId
Definition: wavefront.hh:333
gem5::Wavefront::scalarAluGlobalIdx
int scalarAluGlobalIdx
Definition: wavefront.hh:127
gem5::Wavefront::WavefrontStats::vecRawDistance
statistics::Distribution vecRawDistance
Definition: wavefront.hh:374
gem5::Wavefront::lastInstExec
uint64_t lastInstExec
Definition: wavefront.hh:231
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:275
gem5::Gcn3ISA::VecRegContainerU32
VecRegContainer< sizeof(VecElemU32) *NumVecElemPerVecReg > VecRegContainerU32
Definition: gpu_registers.hh:181
gem5::Wavefront::reserveGmResource
void reserveGmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:762
gem5::ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:320
gem5::Wavefront::isOldestInstScalarMem
bool isOldestInstScalarMem()
Definition: wavefront.cc:677
gem5::Wavefront::localMem
int localMem
Definition: wavefront.hh:129
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
statistics::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1048
gem5::Wavefront::workGroupId
uint32_t workGroupId[3]
Definition: wavefront.hh:159
gem5::Wavefront::dropFetch
bool dropFetch
Definition: wavefront.hh:114
gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: gpu_registers.hh:155
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:177
gem5::Wavefront::status
status_e status
Definition: wavefront.hh:330
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:101
gem5::statistics::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:619

Generated on Tue Sep 21 2021 12:25:25 for gem5 by doxygen 1.8.17