gem5  v22.0.0.2
wavefront.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "gpu-compute/wavefront.hh"
33 
34 #include "base/bitfield.hh"
35 #include "debug/GPUExec.hh"
36 #include "debug/GPUInitAbi.hh"
37 #include "debug/WavefrontStack.hh"
41 #include "gpu-compute/shader.hh"
44 
45 namespace gem5
46 {
47 
49  : SimObject(p), wfSlotId(p.wf_slot_id), simdId(p.simdId),
50  maxIbSize(p.max_ib_size), _gpuISA(*this),
51  vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
52  vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
53  sleepCnt(0), barId(WFBarrier::InvalidID), stats(this)
54 {
55  lastTrace = 0;
56  execUnitId = -1;
57  status = S_STOPPED;
60  startVgprIndex = 0;
61  startSgprIndex = 0;
62  outstandingReqs = 0;
67  rdLmReqsInPipe = 0;
68  rdGmReqsInPipe = 0;
69  wrLmReqsInPipe = 0;
70  wrGmReqsInPipe = 0;
75  lastNonIdleTick = 0;
76  ldsChunk = nullptr;
77 
78  memTraceBusy = 0;
79  oldVgprTcnt = 0xffffffffffffffffll;
80  oldDgprTcnt = 0xffffffffffffffffll;
81  oldVgpr.resize(p.wf_size);
82 
83  pendingFetch = false;
84  dropFetch = false;
85  maxVgprs = 0;
86  maxSgprs = 0;
87 
88  lastAddr.resize(p.wf_size);
89  workItemFlatId.resize(p.wf_size);
90  oldDgpr.resize(p.wf_size);
91  for (int i = 0; i < 3; ++i) {
92  workItemId[i].resize(p.wf_size);
93  }
94 
95  _execMask.set();
96  rawDist.clear();
97  lastInstExec = 0;
98  vecReads.clear();
99 }
100 
101 void
103 {
104  reservedVectorRegs = 0;
105  reservedScalarRegs = 0;
106  startVgprIndex = 0;
107  startSgprIndex = 0;
108 
114 }
115 
116 void
117 Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
118 {
119  int regInitIdx = 0;
120 
121  // iterate over all the init fields and check which
122  // bits are enabled
123  for (int en_bit = 0; en_bit < NumScalarInitFields; ++en_bit) {
124 
125  if (task->sgprBitEnabled(en_bit)) {
126  int physSgprIdx = 0;
127  uint32_t wiCount = 0;
128  uint32_t firstWave = 0;
129  int orderedAppendTerm = 0;
130  int numWfsInWg = 0;
131  uint32_t finalValue = 0;
132  Addr host_disp_pkt_addr = task->hostDispPktAddr();
133  Addr kernarg_addr = task->kernargAddr();
134  Addr hidden_priv_base(0);
135 
136  switch (en_bit) {
137  case PrivateSegBuf:
138  physSgprIdx =
139  computeUnit->registerManager->mapSgpr(this, regInitIdx);
140  computeUnit->srf[simdId]->write(physSgprIdx,
142  ++regInitIdx;
143  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
144  "Setting PrivateSegBuffer: s[%d] = %x\n",
146  wfSlotId, wfDynId, physSgprIdx,
148 
149  physSgprIdx =
150  computeUnit->registerManager->mapSgpr(this, regInitIdx);
151  computeUnit->srf[simdId]->write(physSgprIdx,
153  ++regInitIdx;
154  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
155  "Setting PrivateSegBuffer: s[%d] = %x\n",
157  wfSlotId, wfDynId, physSgprIdx,
159 
160  physSgprIdx =
161  computeUnit->registerManager->mapSgpr(this, regInitIdx);
162  computeUnit->srf[simdId]->write(physSgprIdx,
164  ++regInitIdx;
165  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
166  "Setting PrivateSegBuffer: s[%d] = %x\n",
168  wfSlotId, wfDynId, physSgprIdx,
170 
171  physSgprIdx =
172  computeUnit->registerManager->mapSgpr(this, regInitIdx);
173  computeUnit->srf[simdId]->write(physSgprIdx,
175 
176  ++regInitIdx;
177  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
178  "Setting PrivateSegBuffer: s[%d] = %x\n",
180  wfSlotId, wfDynId, physSgprIdx,
182  break;
183  case DispatchPtr:
184  physSgprIdx =
185  computeUnit->registerManager->mapSgpr(this, regInitIdx);
186  computeUnit->srf[simdId]->write(physSgprIdx,
187  bits(host_disp_pkt_addr, 31, 0));
188  ++regInitIdx;
189  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
190  "Setting DispatchPtr: s[%d] = %x\n",
192  wfSlotId, wfDynId, physSgprIdx,
193  bits(host_disp_pkt_addr, 31, 0));
194 
195  physSgprIdx =
196  computeUnit->registerManager->mapSgpr(this, regInitIdx);
197  computeUnit->srf[simdId]->write(physSgprIdx,
198  bits(host_disp_pkt_addr, 63, 32));
199  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
200  "Setting DispatchPtr: s[%d] = %x\n",
202  wfSlotId, wfDynId, physSgprIdx,
203  bits(host_disp_pkt_addr, 63, 32));
204 
205  ++regInitIdx;
206  break;
207  case QueuePtr:
208  physSgprIdx =
209  computeUnit->registerManager->mapSgpr(this, regInitIdx);
210  computeUnit->srf[simdId]->write(physSgprIdx,
211  bits(task->hostAMDQueueAddr, 31, 0));
212  ++regInitIdx;
213  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
214  "Setting QueuePtr: s[%d] = %x\n",
216  wfSlotId, wfDynId, physSgprIdx,
217  bits(task->hostAMDQueueAddr, 31, 0));
218 
219  physSgprIdx =
220  computeUnit->registerManager->mapSgpr(this, regInitIdx);
221  computeUnit->srf[simdId]->write(physSgprIdx,
222  bits(task->hostAMDQueueAddr, 63, 32));
223  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
224  "Setting QueuePtr: s[%d] = %x\n",
226  wfSlotId, wfDynId, physSgprIdx,
227  bits(task->hostAMDQueueAddr, 63, 32));
228 
229  ++regInitIdx;
230  break;
231  case KernargSegPtr:
232  physSgprIdx =
233  computeUnit->registerManager->mapSgpr(this, regInitIdx);
234  computeUnit->srf[simdId]->write(physSgprIdx,
235  bits(kernarg_addr, 31, 0));
236  ++regInitIdx;
237  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
238  "Setting KernargSegPtr: s[%d] = %x\n",
240  wfSlotId, wfDynId, physSgprIdx,
241  bits(kernarg_addr, 31, 0));
242 
243  physSgprIdx =
244  computeUnit->registerManager->mapSgpr(this, regInitIdx);
245  computeUnit->srf[simdId]->write(physSgprIdx,
246  bits(kernarg_addr, 63, 32));
247  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
248  "Setting KernargSegPtr: s[%d] = %x\n",
250  wfSlotId, wfDynId, physSgprIdx,
251  bits(kernarg_addr, 63, 32));
252 
253  ++regInitIdx;
254  break;
255  case FlatScratchInit:
256  physSgprIdx
257  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
258  computeUnit->srf[simdId]->write(physSgprIdx,
260  .scratch_backing_memory_location & 0xffffffff));
261  ++regInitIdx;
262  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
263  "Setting FlatScratch Addr: s[%d] = %x\n",
265  wfSlotId, wfDynId, physSgprIdx,
267  .scratch_backing_memory_location & 0xffffffff));
268 
269  physSgprIdx =
270  computeUnit->registerManager->mapSgpr(this, regInitIdx);
271  // This vallue should be sizeof(DWORD) aligned, that is
272  // 4 byte aligned
273  computeUnit->srf[simdId]->write(physSgprIdx,
275  ++regInitIdx;
276  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
277  "Setting FlatScratch size: s[%d] = %x\n",
279  wfSlotId, wfDynId, physSgprIdx,
304  hidden_priv_base =
305  (uint64_t)task->amdQueue.scratch_resource_descriptor[0] |
306  (((uint64_t)task->amdQueue.scratch_resource_descriptor[1]
307  & 0x000000000000ffff) << 32);
309  hidden_priv_base,
311  break;
312  case GridWorkgroupCountX:
313  physSgprIdx =
314  computeUnit->registerManager->mapSgpr(this, regInitIdx);
315  wiCount = ((task->gridSize(0) +
316  task->wgSize(0) - 1) /
317  task->wgSize(0));
318  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
319 
320  ++regInitIdx;
321  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
322  "Setting num WG X: s[%d] = %x\n",
324  wfSlotId, wfDynId, physSgprIdx, wiCount);
325  break;
326  case GridWorkgroupCountY:
327  physSgprIdx =
328  computeUnit->registerManager->mapSgpr(this, regInitIdx);
329  wiCount = ((task->gridSize(1) +
330  task->wgSize(1) - 1) /
331  task->wgSize(1));
332  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
333 
334  ++regInitIdx;
335  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
336  "Setting num WG Y: s[%d] = %x\n",
338  wfSlotId, wfDynId, physSgprIdx, wiCount);
339  break;
340  case GridWorkgroupCountZ:
341  physSgprIdx =
342  computeUnit->registerManager->mapSgpr(this, regInitIdx);
343  wiCount = ((task->gridSize(2) +
344  task->wgSize(2) - 1) /
345  task->wgSize(2));
346  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
347 
348  ++regInitIdx;
349  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
350  "Setting num WG Z: s[%d] = %x\n",
352  wfSlotId, wfDynId, physSgprIdx, wiCount);
353  break;
354  case WorkgroupIdX:
355  physSgprIdx =
356  computeUnit->registerManager->mapSgpr(this, regInitIdx);
357  computeUnit->srf[simdId]->write(physSgprIdx,
358  workGroupId[0]);
359 
360  ++regInitIdx;
361  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
362  "Setting WG ID X: s[%d] = %x\n",
364  wfSlotId, wfDynId, physSgprIdx, workGroupId[0]);
365  break;
366  case WorkgroupIdY:
367  physSgprIdx =
368  computeUnit->registerManager->mapSgpr(this, regInitIdx);
369  computeUnit->srf[simdId]->write(physSgprIdx,
370  workGroupId[1]);
371 
372  ++regInitIdx;
373  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
374  "Setting WG ID Y: s[%d] = %x\n",
376  wfSlotId, wfDynId, physSgprIdx, workGroupId[1]);
377  break;
378  case WorkgroupIdZ:
379  physSgprIdx =
380  computeUnit->registerManager->mapSgpr(this, regInitIdx);
381  computeUnit->srf[simdId]->write(physSgprIdx,
382  workGroupId[2]);
383 
384  ++regInitIdx;
385  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
386  "Setting WG ID Z: s[%d] = %x\n",
388  wfSlotId, wfDynId, physSgprIdx, workGroupId[2]);
389  break;
391  physSgprIdx =
392  computeUnit->registerManager->mapSgpr(this, regInitIdx);
406  computeUnit->srf[simdId]->write(physSgprIdx, 1024 *
407  (wgId * (wgSz / 64) + wfId) *
409 
410  ++regInitIdx;
411  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
412  "Setting Private Seg Offset: s[%d] = %x\n",
414  wfSlotId, wfDynId, physSgprIdx,
415  1024 * (wgId * (wgSz / 64) + wfId) *
417  break;
418  case WorkgroupInfo:
419  firstWave = (wfId == 0) ? 1 : 0;
420  numWfsInWg = divCeil(wgSizeInWorkItems,
421  computeUnit->wfSize());
422  finalValue = firstWave << ((sizeof(uint32_t) * 8) - 1);
423  finalValue |= (orderedAppendTerm << 6);
424  finalValue |= numWfsInWg;
425  physSgprIdx =
426  computeUnit->registerManager->mapSgpr(this, regInitIdx);
428  write(physSgprIdx, finalValue);
429 
430  ++regInitIdx;
431  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
432  "Setting WG Info: s[%d] = %x\n",
434  wfSlotId, wfDynId, physSgprIdx, finalValue);
435  break;
436  default:
437  fatal("SGPR enable bit %i not supported\n", en_bit);
438  break;
439  }
440  }
441  }
442 
443  regInitIdx = 0;
444 
445  // iterate over all the init fields and check which
446  // bits are enabled
447  for (int en_bit = 0; en_bit < NumVectorInitFields; ++en_bit) {
448  if (task->vgprBitEnabled(en_bit)) {
449  uint32_t physVgprIdx = 0;
451 
452  switch (en_bit) {
453  case WorkitemIdX:
454  {
455  physVgprIdx = computeUnit->registerManager
456  ->mapVgpr(this, regInitIdx);
457  TheGpuISA::VecElemU32 *vgpr_x
458  = raw_vgpr.as<TheGpuISA::VecElemU32>();
459 
460  for (int lane = 0; lane < workItemId[0].size(); ++lane) {
461  vgpr_x[lane] = workItemId[0][lane];
462  }
463 
464  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
465  rawDist[regInitIdx] = 0;
466  ++regInitIdx;
467  }
468  break;
469  case WorkitemIdY:
470  {
471  physVgprIdx = computeUnit->registerManager
472  ->mapVgpr(this, regInitIdx);
473  TheGpuISA::VecElemU32 *vgpr_y
474  = raw_vgpr.as<TheGpuISA::VecElemU32>();
475 
476  for (int lane = 0; lane < workItemId[1].size(); ++lane) {
477  vgpr_y[lane] = workItemId[1][lane];
478  }
479 
480  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
481  rawDist[regInitIdx] = 0;
482  ++regInitIdx;
483  }
484  break;
485  case WorkitemIdZ:
486  {
487  physVgprIdx = computeUnit->registerManager->
488  mapVgpr(this, regInitIdx);
489  TheGpuISA::VecElemU32 *vgpr_z
490  = raw_vgpr.as<TheGpuISA::VecElemU32>();
491 
492  for (int lane = 0; lane < workItemId[2].size(); ++lane) {
493  vgpr_z[lane] = workItemId[2][lane];
494  }
495 
496  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
497  rawDist[regInitIdx] = 0;
498  ++regInitIdx;
499  }
500  break;
501  }
502  }
503  }
504 }
505 
506 void
507 Wavefront::resizeRegFiles(int num_vregs, int num_sregs)
508 {
509  maxVgprs = num_vregs;
510  maxSgprs = num_sregs;
511 }
512 
514 {
515 }
516 
517 void
519 {
520  if (computeUnit->idleCUTimeout > 0) {
521  // Wavefront's status transitions to stalled or stopped
522  if ((newStatus == S_STOPPED || newStatus == S_STALLED ||
523  newStatus == S_WAITCNT || newStatus == S_BARRIER) &&
524  (status != newStatus)) {
525  computeUnit->idleWfs++;
526  assert(computeUnit->idleWfs <=
528  if (computeUnit->idleWfs ==
531  }
532  // Wavefront's status transitions to an active state (from
533  // a stopped or stalled state)
534  } else if ((status == S_STOPPED || status == S_STALLED ||
535  status == S_WAITCNT || status == S_BARRIER) &&
536  (status != newStatus)) {
537  // if all WFs in the CU were idle then check if the idleness
538  // period exceeded the timeout threshold
539  if (computeUnit->idleWfs ==
543  "CU%d has been idle for %d ticks at tick %d",
545  curTick());
546  }
547  computeUnit->idleWfs--;
548  assert(computeUnit->idleWfs >= 0);
549  }
550  }
551  status = newStatus;
552 }
553 
554 void
555 Wavefront::start(uint64_t _wf_dyn_id, Addr init_pc)
556 {
557  wfDynId = _wf_dyn_id;
558  _pc = init_pc;
559 
560  status = S_RUNNING;
561 
562  vecReads.resize(maxVgprs, 0);
563 }
564 
565 bool
567 {
568  if (ii->isGlobalMem() ||
569  (ii->isFlat() && ii->executedAs() == enums::SC_GLOBAL)) {
570  return true;
571  }
572 
573  return false;
574 }
575 
576 bool
578 {
579  if (ii->isLocalMem() ||
580  (ii->isFlat() && ii->executedAs() == enums::SC_GROUP)) {
581  return true;
582  }
583 
584  return false;
585 }
586 
587 bool
589 {
590  if (instructionBuffer.empty())
591  return false;
592 
593  GPUDynInstPtr ii = instructionBuffer.front();
594 
595  if (ii->isSleep()) {
596  return true;
597  }
598  return false;
599 }
600 
601 bool
603 {
604  if (instructionBuffer.empty())
605  return false;
606 
607  GPUDynInstPtr ii = instructionBuffer.front();
608 
609  if (ii->isWaitcnt()) {
610  // waitcnt is a scalar
611  assert(ii->isScalar());
612  return true;
613  }
614 
615  return false;
616 }
617 
618 bool
620 {
621  assert(!instructionBuffer.empty());
622  GPUDynInstPtr ii = instructionBuffer.front();
623 
624  if (status != S_STOPPED && ii->isScalar() && (ii->isNop() || ii->isReturn()
625  || ii->isEndOfKernel() || ii->isBranch() || ii->isALU() ||
626  (ii->isKernArgSeg() && ii->isLoad()))) {
627  return true;
628  }
629 
630  return false;
631 }
632 
633 bool
635 {
636  assert(!instructionBuffer.empty());
637  GPUDynInstPtr ii = instructionBuffer.front();
638 
639  if (status != S_STOPPED && !ii->isScalar() && (ii->isNop() ||
640  ii->isReturn() || ii->isBranch() || ii->isALU() || ii->isEndOfKernel()
641  || (ii->isKernArgSeg() && ii->isLoad()))) {
642  return true;
643  }
644 
645  return false;
646 }
647 
648 bool
650 {
651  assert(!instructionBuffer.empty());
652  GPUDynInstPtr ii = instructionBuffer.front();
653 
654  if (status != S_STOPPED && ii->isBarrier()) {
655  return true;
656  }
657 
658  return false;
659 }
660 
661 bool
663 {
664  assert(!instructionBuffer.empty());
665  GPUDynInstPtr ii = instructionBuffer.front();
666 
667  if (status != S_STOPPED && !ii->isScalar() && ii->isGlobalMem()) {
668  return true;
669  }
670 
671  return false;
672 }
673 
674 bool
676 {
677  assert(!instructionBuffer.empty());
678  GPUDynInstPtr ii = instructionBuffer.front();
679 
680  if (status != S_STOPPED && ii->isScalar() && ii->isGlobalMem()) {
681  return true;
682  }
683 
684  return false;
685 }
686 
687 bool
689 {
690  assert(!instructionBuffer.empty());
691  GPUDynInstPtr ii = instructionBuffer.front();
692 
693  if (status != S_STOPPED && ii->isLocalMem()) {
694  return true;
695  }
696 
697  return false;
698 }
699 
700 bool
702 {
703  assert(!instructionBuffer.empty());
704  GPUDynInstPtr ii = instructionBuffer.front();
705 
706  if (status != S_STOPPED && ii->isPrivateSeg()) {
707  return true;
708  }
709 
710  return false;
711 }
712 
713 bool
715 {
716  assert(!instructionBuffer.empty());
717  GPUDynInstPtr ii = instructionBuffer.front();
718 
719  if (status != S_STOPPED && ii->isFlat()) {
720  return true;
721  }
722 
723  return false;
724 }
725 
726 bool
728 {
729  for (auto it : instructionBuffer) {
730  GPUDynInstPtr ii = it;
731  if (ii->isReturn() || ii->isBranch() ||
732  ii->isEndOfKernel()) {
733  return true;
734  }
735  }
736 
737  return false;
738 }
739 
740 void
742 {
743  execUnitId = -1;
744 }
745 
747 {
749  wrLmReqsInPipe < 0 || rdLmReqsInPipe < 0 ||
750  outstandingReqs < 0,
751  "Negative requests in pipe for WF%d for slot%d"
752  " and SIMD%d: Rd GlobalMem Reqs=%d, Wr GlobalMem Reqs=%d,"
753  " Rd LocalMem Reqs=%d, Wr LocalMem Reqs=%d,"
754  " Outstanding Reqs=%d\n",
757 }
758 
759 void
761 {
762  if (!ii->isScalar()) {
763  if (ii->isLoad()) {
764  rdGmReqsInPipe++;
765  } else if (ii->isStore()) {
766  wrGmReqsInPipe++;
767  } else if (ii->isAtomic() || ii->isMemSync()) {
768  rdGmReqsInPipe++;
769  wrGmReqsInPipe++;
770  } else {
771  panic("Invalid memory operation!\n");
772  }
774  } else {
775  if (ii->isLoad()) {
777  } else if (ii->isStore()) {
779  } else if (ii->isAtomic() || ii->isMemSync()) {
782  } else {
783  panic("Invalid memory operation!\n");
784  }
786  }
787 }
788 
789 void
791 {
792  fatal_if(ii->isScalar(),
793  "Scalar instructions can not access Shared memory!!!");
794  if (ii->isLoad()) {
795  rdLmReqsInPipe++;
796  } else if (ii->isStore()) {
797  wrLmReqsInPipe++;
798  } else if (ii->isAtomic() || ii->isMemSync()) {
799  wrLmReqsInPipe++;
800  rdLmReqsInPipe++;
801  } else {
802  panic("Invalid memory operation!\n");
803  }
805 }
806 
809 {
810  // vector of execution unit IDs to return to schedule stage
811  // this return is only used for debugging and an assertion...
812  std::vector<int> execUnitIds;
813 
814  // Get current instruction
815  GPUDynInstPtr ii = instructionBuffer.front();
816  assert(ii);
817 
818  // Single precision ALU or Branch or Return or Special instruction
819  if (ii->isALU() || ii->isSpecialOp() ||
820  ii->isBranch() || ii->isNop() ||
821  (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
822  ii->isReturn() || ii->isEndOfKernel()) {
823  if (!ii->isScalar()) {
824  execUnitId = simdId;
825  } else {
827  }
828  // this is to enforce a fixed number of cycles per issue slot per SIMD
829  } else if (ii->isBarrier()) {
830  execUnitId = ii->isScalar() ? scalarAluGlobalIdx : simdId;
831  } else if (ii->isFlat()) {
832  assert(!ii->isScalar());
833  reserveLmResource(ii);
834  // add execUnitId, reserved by reserveLmResource, list before it is
835  // overwriten by reserveGmResource
836  execUnitIds.push_back(execUnitId);
838  reserveGmResource(ii);
840  execUnitIds.push_back(flatGmUnitId);
841  execUnitId = -1;
842  } else if (ii->isGlobalMem()) {
843  reserveGmResource(ii);
844  } else if (ii->isLocalMem()) {
845  reserveLmResource(ii);
846  } else if (ii->isPrivateSeg()) {
847  fatal_if(ii->isScalar(),
848  "Scalar instructions can not access Private memory!!!");
849  reserveGmResource(ii);
850  } else {
851  panic("reserveResources -> Couldn't process op!\n");
852  }
853 
854  if (execUnitId != -1) {
855  execUnitIds.push_back(execUnitId);
856  }
857  assert(execUnitIds.size());
858  return execUnitIds;
859 }
860 
861 void
863 {
864  // ---- Exit if wavefront is inactive ----------------------------- //
865 
866  if (status == S_STOPPED || status == S_RETURNING ||
867  status==S_STALLED || instructionBuffer.empty()) {
868  return;
869  }
870 
871  if (status == S_WAITCNT) {
883  assert(isOldestInstWaitcnt());
884  }
885 
886  // Get current instruction
887 
888  GPUDynInstPtr ii = instructionBuffer.front();
889 
890  const Addr old_pc = pc();
891  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
892  "(pc: %#x; seqNum: %d)\n", computeUnit->cu_id, simdId, wfSlotId,
893  wfDynId, ii->disassemble(), old_pc, ii->seqNum());
894 
895  ii->execute(ii);
896  // delete the dynamic instruction from the pipeline map
898  // update the instruction stats in the CU
900 
901  // inform VRF of instruction execution to schedule write-back
902  // and scoreboard ready for registers
903  if (!ii->isScalar()) {
904  computeUnit->vrf[simdId]->waveExecuteInst(this, ii);
905  }
906  computeUnit->srf[simdId]->waveExecuteInst(this, ii);
907 
908  computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecRegOperands());
909  computeUnit->shader->incVectorInstDstOperand(ii->numDstVecRegOperands());
918 
919  if (lastInstExec) {
922  }
924 
925  // want to track:
926  // number of reads that occur per value written
927 
928  // vector RAW dependency tracking
929  for (const auto& srcVecOp : ii->srcVecRegOperands()) {
930  for (const auto& virtIdx : srcVecOp.virtIndices()) {
931  // This check should never fail, but to be safe we check
932  if (rawDist.find(virtIdx) != rawDist.end()) {
934  rawDist[virtIdx]);
935  }
936  // increment number of reads to this register
937  vecReads[virtIdx]++;
938  }
939  }
940 
941  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
942  for (const auto& virtIdx : dstVecOp.virtIndices()) {
943  // rawDist is set on writes, but will not be set for the first
944  // write to each physical register
945  if (rawDist.find(virtIdx) != rawDist.end()) {
946  // Sample the number of reads that were performed
948  }
949  // on a write, reset count of reads to 0
950  vecReads[virtIdx] = 0;
951 
952  rawDist[virtIdx] = stats.numInstrExecuted.value();
953  }
954  }
955 
956  if (pc() == old_pc) {
957  // PC not modified by instruction, proceed to next
958  _gpuISA.advancePC(ii);
959  instructionBuffer.pop_front();
960  } else {
961  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave%d %s taken branch\n",
963  ii->disassemble());
964  discardFetch();
965  }
966  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] (pc: %#x)\n",
968 
970  const int num_active_lanes = execMask().count();
972  computeUnit->stats.numVecOpsExecuted += num_active_lanes;
973 
974  if (ii->isF16() && ii->isALU()) {
975  if (ii->isF32() || ii->isF64()) {
976  fatal("Instruction is tagged as both (1) F16, and (2)"
977  "either F32 or F64.");
978  }
979  computeUnit->stats.numVecOpsExecutedF16 += num_active_lanes;
980  if (ii->isFMA()) {
981  computeUnit->stats.numVecOpsExecutedFMA16 += num_active_lanes;
983  += num_active_lanes;
984  }
985  else if (ii->isMAC()) {
986  computeUnit->stats.numVecOpsExecutedMAC16 += num_active_lanes;
988  += num_active_lanes;
989  }
990  else if (ii->isMAD()) {
991  computeUnit->stats.numVecOpsExecutedMAD16 += num_active_lanes;
993  += num_active_lanes;
994  }
995  }
996  if (ii->isF32() && ii->isALU()) {
997  if (ii->isF16() || ii->isF64()) {
998  fatal("Instruction is tagged as both (1) F32, and (2)"
999  "either F16 or F64.");
1000  }
1001  computeUnit->stats.numVecOpsExecutedF32 += num_active_lanes;
1002  if (ii->isFMA()) {
1003  computeUnit->stats.numVecOpsExecutedFMA32 += num_active_lanes;
1005  += num_active_lanes;
1006  }
1007  else if (ii->isMAC()) {
1008  computeUnit->stats.numVecOpsExecutedMAC32 += num_active_lanes;
1010  += num_active_lanes;
1011  }
1012  else if (ii->isMAD()) {
1013  computeUnit->stats.numVecOpsExecutedMAD32 += num_active_lanes;
1015  += num_active_lanes;
1016  }
1017  }
1018  if (ii->isF64() && ii->isALU()) {
1019  if (ii->isF16() || ii->isF32()) {
1020  fatal("Instruction is tagged as both (1) F64, and (2)"
1021  "either F16 or F32.");
1022  }
1023  computeUnit->stats.numVecOpsExecutedF64 += num_active_lanes;
1024  if (ii->isFMA()) {
1025  computeUnit->stats.numVecOpsExecutedFMA64 += num_active_lanes;
1027  += num_active_lanes;
1028  }
1029  else if (ii->isMAC()) {
1030  computeUnit->stats.numVecOpsExecutedMAC64 += num_active_lanes;
1032  += num_active_lanes;
1033  }
1034  else if (ii->isMAD()) {
1035  computeUnit->stats.numVecOpsExecutedMAD64 += num_active_lanes;
1037  += num_active_lanes;
1038  }
1039  }
1040  if (isGmInstruction(ii)) {
1042  num_active_lanes);
1043  } else if (isLmInstruction(ii)) {
1045  num_active_lanes);
1046  }
1047  }
1048 
1053  if (execMask().none() && ii->isFlat()) {
1055  return;
1056  }
1057 
1058  // Update Vector ALU pipeline and other resources
1059  bool flat_as_gm = false;
1060  bool flat_as_lm = false;
1061  if (ii->isFlat()) {
1062  flat_as_gm = (ii->executedAs() == enums::SC_GLOBAL) ||
1063  (ii->executedAs() == enums::SC_PRIVATE);
1064  flat_as_lm = (ii->executedAs() == enums::SC_GROUP);
1065  }
1066 
1067  // Single precision ALU or Branch or Return or Special instruction
1068  // Note, we use the same timing regardless of SP or DP ALU operation.
1069  if (ii->isALU() || ii->isSpecialOp() ||
1070  ii->isBranch() || ii->isNop() ||
1071  (ii->isKernArgSeg() && ii->isLoad()) ||
1072  ii->isArgSeg() || ii->isEndOfKernel() || ii->isReturn()) {
1073  // this is to enforce a fixed number of cycles per issue slot per SIMD
1074  if (!ii->isScalar()) {
1076  cyclesToTicks(computeUnit->issuePeriod));
1077  } else {
1079  cyclesToTicks(computeUnit->issuePeriod));
1080  }
1081  // Barrier on Scalar ALU
1082  } else if (ii->isBarrier()) {
1084  cyclesToTicks(computeUnit->issuePeriod));
1085  // GM or Flat as GM Load
1086  } else if (ii->isLoad() && (ii->isGlobalMem() || flat_as_gm)) {
1087  if (!ii->isScalar()) {
1094  } else {
1096  cyclesToTicks(computeUnit->srf_scm_bus_latency));
1101  }
1102  // GM or Flat as GM Store
1103  } else if (ii->isStore() && (ii->isGlobalMem() || flat_as_gm)) {
1104  if (!ii->isScalar()) {
1106  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1111  } else {
1113  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1118  }
1119  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1120  (ii->isGlobalMem() || flat_as_gm)) {
1121  if (!ii->isScalar()) {
1123  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1128  } else {
1130  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1135  }
1136  // LM or Flat as LM Load
1137  } else if (ii->isLoad() && (ii->isLocalMem() || flat_as_lm)) {
1139  cyclesToTicks(computeUnit->vrf_lm_bus_latency));
1144  // LM or Flat as LM Store
1145  } else if (ii->isStore() && (ii->isLocalMem() || flat_as_lm)) {
1147  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1152  // LM or Flat as LM, Atomic or MemFence
1153  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1154  (ii->isLocalMem() || flat_as_lm)) {
1156  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1161  } else {
1162  panic("Bad instruction type!\n");
1163  }
1164 }
1165 
1168 {
1169  // Read next instruction from instruction buffer
1170  GPUDynInstPtr ii = instructionBuffer.front();
1171  // if the WF has been dispatched in the schedule stage then
1172  // check the next oldest instruction for readiness
1173  if (computeUnit->pipeMap.find(ii->seqNum()) !=
1174  computeUnit->pipeMap.end()) {
1175  if (instructionBuffer.size() > 1) {
1176  auto it = instructionBuffer.begin() + 1;
1177  return *it;
1178  } else { // No new instructions to check
1179  return nullptr;
1180  }
1181  }
1182  return ii;
1183 }
1184 
1185 void
1187 {
1188  instructionBuffer.clear();
1190 
1196 }
1197 
1198 bool
1200 {
1201  // Both vmWaitCnt && lgkmWaitCnt uninitialized means
1202  // waitCnt instruction has been dispatched but not executed yet: next
1203  // instruction should be blocked until waitCnt is executed.
1204  if (vmWaitCnt == -1 && expWaitCnt == -1 && lgkmWaitCnt == -1) {
1205  return false;
1206  }
1207 
1213  if (vmWaitCnt != -1) {
1214  if (vmemInstsIssued > vmWaitCnt) {
1215  // vmWaitCnt not satisfied
1216  return false;
1217  }
1218  }
1219 
1220  if (expWaitCnt != -1) {
1221  if (expInstsIssued > expWaitCnt) {
1222  // expWaitCnt not satisfied
1223  return false;
1224  }
1225  }
1226 
1227  if (lgkmWaitCnt != -1) {
1228  if (lgkmInstsIssued > lgkmWaitCnt) {
1229  // lgkmWaitCnt not satisfied
1230  return false;
1231  }
1232  }
1233 
1234  // if we get here all outstanding waitcnts must
1235  // be satisfied, so we resume normal operation
1236  clearWaitCnts();
1237 
1238  return true;
1239 }
1240 
1241 bool
1243 {
1244  assert(status == S_STALLED_SLEEP);
1245 
1246  // if the sleep count has not been set, then the sleep instruction has not
1247  // been executed yet, so we will return true without setting the wavefront
1248  // status
1249  if (sleepCnt == 0)
1250  return false;
1251 
1252  sleepCnt--;
1253  if (sleepCnt != 0)
1254  return false;
1255 
1256  status = S_RUNNING;
1257  return true;
1258 }
1259 
1260 void
1262 {
1263  assert(sleepCnt == 0);
1264  sleepCnt = sleep_time;
1265 }
1266 
1267 void
1268 Wavefront::setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
1269 {
1270  // the scoreboard should have set the status
1271  // to S_WAITCNT once a waitcnt instruction
1272  // was marked as ready
1273  assert(status == S_WAITCNT);
1274 
1275  // waitcnt instruction shouldn't be sending
1276  // negative counts
1277  assert(vm_wait_cnt >= 0);
1278  assert(exp_wait_cnt >= 0);
1279  assert(lgkm_wait_cnt >= 0);
1280  // waitcnts are a max of 15 because we have
1281  // only 1 nibble (4 bits) to set the counts
1282  assert(vm_wait_cnt <= 0xf);
1283  assert(exp_wait_cnt <= 0x7);
1284  assert(lgkm_wait_cnt <= 0x1f);
1285 
1292  assert(vmWaitCnt == -1);
1293  assert(expWaitCnt == -1);
1294  assert(lgkmWaitCnt == -1);
1295 
1302  if (vm_wait_cnt != 0xf)
1303  vmWaitCnt = vm_wait_cnt;
1304 
1305  if (exp_wait_cnt != 0x7)
1306  expWaitCnt = exp_wait_cnt;
1307 
1308  if (lgkm_wait_cnt != 0x1f)
1309  lgkmWaitCnt = lgkm_wait_cnt;
1310 }
1311 
1312 void
1314 {
1315  // reset the waitcnts back to
1316  // -1, indicating they are no
1317  // longer valid
1318  vmWaitCnt = -1;
1319  expWaitCnt = -1;
1320  lgkmWaitCnt = -1;
1321 
1322  // resume running normally
1323  status = S_RUNNING;
1324 }
1325 
1326 void
1328 {
1329  ++vmemInstsIssued;
1330 }
1331 
1332 void
1334 {
1335  ++expInstsIssued;
1336 }
1337 
1338 void
1340 {
1341  ++lgkmInstsIssued;
1342 }
1343 
1344 void
1346 {
1347  --vmemInstsIssued;
1348 }
1349 
1350 void
1352 {
1353  --expInstsIssued;
1354 }
1355 
1356 void
1358 {
1359  --lgkmInstsIssued;
1360 }
1361 
1362 Addr
1364 {
1365  return _pc;
1366 }
1367 
1368 void
1370 {
1371  _pc = new_pc;
1372 }
1373 
1374 VectorMask&
1376 {
1377  return _execMask;
1378 }
1379 
1380 bool
1381 Wavefront::execMask(int lane) const
1382 {
1383  return _execMask[lane];
1384 }
1385 
1386 void
1388 {
1389  /* clear busy registers */
1390  for (int i=0; i < maxVgprs; i++) {
1391  int vgprIdx = computeUnit->registerManager->mapVgpr(this, i);
1392  computeUnit->vrf[simdId]->markReg(vgprIdx, false);
1393  }
1394 
1395  /* Free registers used by this wavefront */
1396  uint32_t endIndex = (startVgprIndex + reservedVectorRegs - 1) %
1397  computeUnit->vrf[simdId]->numRegs();
1399  freeRegion(startVgprIndex, endIndex);
1400 }
1401 
1402 void
1404 {
1405  actualWgSzTotal = 1;
1406  for (int d = 0; d < HSAQueueEntry::MAX_DIM; ++d) {
1407  actualWgSz[d] = std::min(workGroupSz[d], gridSz[d]
1408  - task->wgId(d) * workGroupSz[d]);
1410  }
1411 }
1412 
1413 void
1415 {
1416  assert(bar_id >= WFBarrier::InvalidID);
1417  assert(bar_id < computeUnit->numBarrierSlots());
1418  barId = bar_id;
1419 }
1420 
1421 int
1423 {
1424  return barId;
1425 }
1426 
1427 bool
1429 {
1430  return barId > WFBarrier::InvalidID;
1431 }
1432 
1433 void
1435 {
1437 }
1438 
1440  : statistics::Group(parent),
1441  ADD_STAT(numInstrExecuted,
1442  "number of instructions executed by this WF slot"),
1443  ADD_STAT(schCycles, "number of cycles spent in schedule stage"),
1444  ADD_STAT(schStalls, "number of cycles WF is stalled in SCH stage"),
1445  ADD_STAT(schRfAccessStalls, "number of cycles wave selected in SCH but "
1446  "RF denied adding instruction"),
1447  ADD_STAT(schResourceStalls, "number of cycles stalled in sch by resource"
1448  " not available"),
1449  ADD_STAT(schOpdNrdyStalls, "number of cycles stalled in sch waiting for "
1450  "RF reads to complete"),
1451  ADD_STAT(schLdsArbStalls,
1452  "number of cycles wave stalled due to LDS-VRF arbitration"),
1453  // FIXME: the name of the WF needs to be unique
1454  ADD_STAT(numTimesBlockedDueWAXDependencies, "number of times the wf's "
1455  "instructions are blocked due to WAW or WAR dependencies"),
1456  // FIXME: the name of the WF needs to be unique
1457  ADD_STAT(numTimesBlockedDueRAWDependencies, "number of times the wf's "
1458  "instructions are blocked due to RAW dependencies"),
1459  ADD_STAT(vecRawDistance,
1460  "Count of RAW distance in dynamic instructions for this WF"),
1461  ADD_STAT(readsPerWrite, "Count of Vector reads per write for this WF")
1462 {
1463  vecRawDistance.init(0, 20, 1);
1464  readsPerWrite.init(0, 4, 1);
1465 }
1466 
1467 } // namespace gem5
gem5::Wavefront::expWaitCnt
int expWaitCnt
Definition: wavefront.hh:322
gem5::Wavefront::workItemFlatId
std::vector< uint32_t > workItemFlatId
Definition: wavefront.hh:155
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:190
gem5::Wavefront::vecReads
std::vector< int > vecReads
Definition: wavefront.hh:237
gem5::Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:72
gem5::NumVectorInitFields
@ NumVectorInitFields
Definition: kernel_code.hh:79
gem5::Wavefront::vmemInstsIssued
int vmemInstsIssued
Definition: wavefront.hh:324
gem5::FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:61
gem5::Wavefront::lastAddr
std::vector< Addr > lastAddr
Definition: wavefront.hh:153
gem5::Wavefront::isOldestInstFlatMem
bool isOldestInstFlatMem()
Definition: wavefront.cc:714
gem5::Wavefront::computeActualWgSz
void computeActualWgSz(HSAQueueEntry *task)
Definition: wavefront.cc:1403
simple_pool_manager.hh
gem5::Wavefront::S_RUNNING
@ S_RUNNING
Definition: wavefront.hh:70
gem5::ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:280
gem5::ComputeUnit::ComputeUnitStats::instInterleave
statistics::VectorDistribution instInterleave
Definition: compute_unit.hh:1139
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
statistics::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1109
gem5::WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:77
gem5::ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:323
gem5::FetchUnit::flushBuf
void flushBuf(int wfSlotId)
Definition: fetch_unit.cc:333
gem5::Wavefront::setSleepTime
void setSleepTime(int sleep_time)
Definition: wavefront.cc:1261
shader.hh
gem5::Wavefront::releaseBarrier
void releaseBarrier()
Definition: wavefront.cc:1434
gem5::Wavefront::flatLmUnitId
int flatLmUnitId
Definition: wavefront.hh:103
gem5::Wavefront::isOldestInstBarrier
bool isOldestInstBarrier()
Definition: wavefront.cc:649
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:297
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:283
gem5::Wavefront::oldVgpr
std::vector< uint32_t > oldVgpr
Definition: wavefront.hh:205
gem5::Wavefront::_execMask
VectorMask _execMask
Definition: wavefront.hh:330
gem5::Wavefront::maxSgprs
uint32_t maxSgprs
Definition: wavefront.hh:133
gem5::Wavefront::exec
void exec()
Definition: wavefront.cc:862
gem5::Gcn3ISA::VecElemU32
uint32_t VecElemU32
Definition: gpu_registers.hh:165
gem5::Wavefront::oldDgpr
std::vector< uint64_t > oldDgpr
Definition: wavefront.hh:212
gem5::PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:56
gem5::Wavefront::ldsChunk
LdsChunk * ldsChunk
Definition: wavefront.hh:223
gem5::Wavefront::isOldestInstLMem
bool isOldestInstLMem()
Definition: wavefront.cc:688
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
statistics::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1095
gem5::ArmISA::set
Bitfield< 12, 11 > set
Definition: misc_types.hh:703
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
compute_unit.hh
gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:74
gem5::Wavefront::stopFetch
bool stopFetch()
Definition: wavefront.cc:727
gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:69
gem5::Wavefront::isOldestInstVectorALU
bool isOldestInstVectorALU()
Definition: wavefront.cc:634
gem5::Wavefront::setWaitCnts
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
Definition: wavefront.cc:1268
gem5::Wavefront::pendingFetch
bool pendingFetch
Definition: wavefront.hh:111
gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:239
gem5::ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
statistics::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:1031
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:168
gem5::HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:209
gem5::Wavefront::pc
Addr pc() const
Definition: wavefront.cc:1363
gem5::Wavefront::WavefrontStats::WavefrontStats
WavefrontStats(statistics::Group *parent)
Definition: wavefront.cc:1439
gem5::Wavefront::startSgprIndex
uint32_t startSgprIndex
Definition: wavefront.hh:202
std::vector< int >
gem5::Wavefront::lastNonIdleTick
Tick lastNonIdleTick
Definition: wavefront.hh:114
gem5::Wavefront::initRegState
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition: wavefront.cc:117
gem5::Wavefront::reservedVectorRegs
int reservedVectorRegs
Definition: wavefront.hh:194
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
statistics::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1098
gem5::Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:66
gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:223
gem5::Wavefront::wgSz
uint32_t wgSz
Definition: wavefront.hh:161
gem5::PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:70
gem5::RegisterManager::vrfPoolMgrs
std::vector< PoolManager * > vrfPoolMgrs
Definition: register_manager.hh:80
gem5::Wavefront::scalarRdGmReqsInPipe
int scalarRdGmReqsInPipe
Definition: wavefront.hh:188
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
statistics::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1119
gem5::Wavefront::actualWgSz
uint32_t actualWgSz[3]
Definition: wavefront.hh:163
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::Wavefront::workGroupSz
uint32_t workGroupSz[3]
Definition: wavefront.hh:158
gem5::HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:183
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353
gem5::Wavefront::_gpuISA
TheGpuISA::GPUISA _gpuISA
Definition: wavefront.hh:300
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
statistics::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1107
gem5::NumScalarInitFields
@ NumScalarInitFields
Definition: kernel_code.hh:71
gem5::ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:313
gem5::statistics::none
const FlagsType none
Nothing extra to print.
Definition: info.hh:54
gem5::Wavefront::_pc
Addr _pc
Definition: wavefront.hh:329
gem5::Wavefront::rawDist
std::unordered_map< int, uint64_t > rawDist
Definition: wavefront.hh:233
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1102
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
statistics::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1106
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1328
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:295
gem5::ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
statistics::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:1030
wavefront.hh
gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:307
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:99
gem5::Wavefront::isOldestInstSleep
bool isOldestInstSleep()
Definition: wavefront.cc:588
gem5::Wavefront::workItemId
std::vector< uint32_t > workItemId[3]
Definition: wavefront.hh:154
gem5::Gcn3ISA::VecRegContainerU32
VecRegContainer< sizeof(VecElemU32) *NumVecElemPerVecReg > VecRegContainerU32
Definition: gpu_registers.hh:179
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::Wavefront::flatGmUnitId
int flatGmUnitId
Definition: wavefront.hh:104
gem5::WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:67
gem5::WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:68
gem5::ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:344
gem5::Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition: wavefront.hh:186
gem5::Wavefront::oldVgprTcnt
uint64_t oldVgprTcnt
Definition: wavefront.hh:209
gem5::Wavefront::clearWaitCnts
void clearWaitCnts()
Definition: wavefront.cc:1313
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:160
gem5::Wavefront::wfId
uint32_t wfId
Definition: wavefront.hh:167
gem5::ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:343
gem5::Wavefront::isGmInstruction
bool isGmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:566
gem5::Wavefront::setStatus
void setStatus(status_e newStatus)
Definition: wavefront.cc:518
gem5::RegisterManager::mapSgpr
int mapSgpr(Wavefront *w, int sgprIndex)
Definition: register_manager.cc:102
gem5::Wavefront::start
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition: wavefront.cc:555
gem5::HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:310
gem5::Wavefront::freeRegisterFile
void freeRegisterFile()
Freeing VRF space.
Definition: wavefront.cc:1387
gem5::Wavefront::validateRequestCounters
void validateRequestCounters()
Definition: wavefront.cc:746
gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:244
vector_register_file.hh
gem5::WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:76
gem5::Wavefront::reservedScalarRegs
int reservedScalarRegs
Definition: wavefront.hh:196
bitfield.hh
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2113
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecuted
statistics::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1089
gem5::Wavefront::wrGmReqsInPipe
int wrGmReqsInPipe
Definition: wavefront.hh:187
gem5::WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:97
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::Wavefront::outstandingReqs
int outstandingReqs
Definition: wavefront.hh:171
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::Wavefront::isOldestInstGMem
bool isOldestInstGMem()
Definition: wavefront.cc:662
gem5::Wavefront::lgkmInstsIssued
int lgkmInstsIssued
Definition: wavefront.hh:326
gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:233
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
statistics::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1118
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:241
gem5::Wavefront::resizeRegFiles
void resizeRegFiles(int num_vregs, int num_sregs)
Definition: wavefront.cc:507
gem5::ArmISA::d
Bitfield< 9 > d
Definition: misc_types.hh:64
gem5::Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition: wavefront.hh:184
gem5::Wavefront::oldDgprTcnt
uint64_t oldDgprTcnt
Definition: wavefront.hh:216
gem5::Wavefront::incLGKMInstsIssued
void incLGKMInstsIssued()
Definition: wavefront.cc:1339
gem5::HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:286
gem5::KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:59
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
statistics::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1103
gem5::Wavefront::scalarWrGmReqsInPipe
int scalarWrGmReqsInPipe
Definition: wavefront.hh:189
gem5::Wavefront::scalarOutstandingReqsRdGm
int scalarOutstandingReqsRdGm
Definition: wavefront.hh:181
gem5::Wavefront::scalarMem
int scalarMem
Definition: wavefront.hh:128
gem5::Wavefront::memTraceBusy
int memTraceBusy
Definition: wavefront.hh:191
gem5::Wavefront::wfSlotId
const int wfSlotId
Definition: wavefront.hh:96
gem5::Wavefront::execUnitId
int execUnitId
Definition: wavefront.hh:102
gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:278
gem5::ComputeUnit::ComputeUnitStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: compute_unit.hh:1084
gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:92
gem5::HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:165
gem5::Wavefront::scalarOutstandingReqsWrGm
int scalarOutstandingReqsWrGm
Definition: wavefront.hh:183
scalar_register_file.hh
gem5::ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:320
gpu_dyn_inst.hh
gem5::Wavefront::Wavefront
Wavefront(const Params &p)
Definition: wavefront.cc:48
gem5::HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:121
gem5::_amd_queue_t::scratch_workitem_byte_size
uint32_t scratch_workitem_byte_size
Definition: hsa_queue.hh:84
gem5::Wavefront::~Wavefront
~Wavefront()
Definition: wavefront.cc:513
gem5::Wavefront::expInstsIssued
int expInstsIssued
Definition: wavefront.hh:325
gem5::Wavefront::waitCntsSatisfied
bool waitCntsSatisfied()
Definition: wavefront.cc:1199
gem5::_amd_queue_t::scratch_resource_descriptor
uint32_t scratch_resource_descriptor[4]
Definition: hsa_queue.hh:81
gem5::Wavefront::isOldestInstWaitcnt
bool isOldestInstWaitcnt()
Definition: wavefront.cc:602
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:294
gem5::ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:278
gem5::ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:514
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::Wavefront::reserveLmResource
void reserveLmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:790
gem5::HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:128
gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:249
gem5::GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:63
gem5::Wavefront::lastTrace
uint64_t lastTrace
Definition: wavefront.hh:192
gem5::Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1167
gem5::ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:326
gem5::Wavefront::discardFetch
void discardFetch()
Definition: wavefront.cc:1186
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:226
gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:300
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:106
gem5::Wavefront::execMask
VectorMask & execMask()
Definition: wavefront.cc:1375
gem5::WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:78
gem5::GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:65
gem5::Wavefront::barrierId
int barrierId() const
Definition: wavefront.cc:1422
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:888
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
statistics::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1091
gem5::Wavefront::decExpInstsIssued
void decExpInstsIssued()
Definition: wavefront.cc:1351
gem5::Wavefront::Params
WavefrontParams Params
Definition: wavefront.hh:244
gem5::Wavefront::WavefrontStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: wavefront.hh:340
gem5::FetchStage::fetchUnit
FetchUnit & fetchUnit(int simdId)
Definition: fetch_stage.hh:66
gem5::RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition: register_manager.cc:95
gem5::ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:260
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:394
gem5::Wavefront::decVMemInstsIssued
void decVMemInstsIssued()
Definition: wavefront.cc:1345
gem5::ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:276
gem5::Wavefront::rdGmReqsInPipe
int rdGmReqsInPipe
Definition: wavefront.hh:185
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::Wavefront::outstandingReqsWrLm
int outstandingReqsWrLm
Definition: wavefront.hh:175
gem5::Wavefront::hasBarrier
bool hasBarrier() const
Definition: wavefront.cc:1428
gem5::ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1877
gem5::ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
statistics::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:1032
gem5::Wavefront::incExpInstsIssued
void incExpInstsIssued()
Definition: wavefront.cc:1333
gem5::QueuePtr
@ QueuePtr
Definition: kernel_code.hh:58
gem5::WaitClass::set
void set(uint64_t i)
Definition: misc.hh:82
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::Wavefront::decLGKMInstsIssued
void decLGKMInstsIssued()
Definition: wavefront.cc:1357
gem5::Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:68
gem5::Wavefront::vmWaitCnt
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition: wavefront.hh:321
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
statistics::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1105
gem5::Wavefront::reserveResources
std::vector< int > reserveResources()
Definition: wavefront.cc:808
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
statistics::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1101
gem5::WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:66
gem5::Wavefront::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: wavefront.cc:102
gem5::Wavefront::globalMem
int globalMem
Definition: wavefront.hh:126
gem5::ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:316
gem5::Wavefront::outstandingReqsRdLm
int outstandingReqsRdLm
Definition: wavefront.hh:179
gem5::ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
statistics::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1117
gem5::Wavefront::gridSz
uint32_t gridSz[3]
Definition: wavefront.hh:159
gem5::Wavefront::actualWgSzTotal
uint32_t actualWgSzTotal
Definition: wavefront.hh:164
gem5::Wavefront::lgkmWaitCnt
int lgkmWaitCnt
Definition: wavefront.hh:323
gem5::Wavefront::scalarAlu
int scalarAlu
Definition: wavefront.hh:121
gem5::GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:64
gem5::ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:286
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
statistics::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1099
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88
gem5::WFBarrier
WF barrier slots.
Definition: compute_unit.hh:90
gem5::DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:57
gem5::HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:291
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:206
gem5::TokenManager::recvTokens
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
Definition: token_port.cc:155
gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:231
gem5::statistics::Group
Statistics container.
Definition: group.hh:93
gem5::ComputeUnit::ComputeUnitStats::execRateDist
statistics::Distribution execRateDist
Definition: compute_unit.hh:1087
gem5::Wavefront::maxVgprs
uint32_t maxVgprs
Definition: wavefront.hh:131
gem5::Wavefront::outstandingReqsWrGm
int outstandingReqsWrGm
Definition: wavefront.hh:173
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
statistics::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1093
gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:245
gem5::Wavefront::isOldestInstScalarALU
bool isOldestInstScalarALU()
Definition: wavefront.cc:619
gem5::Wavefront::WavefrontStats::readsPerWrite
statistics::Distribution readsPerWrite
Definition: wavefront.hh:376
gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:109
gem5::Wavefront::isLmInstruction
bool isLmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:577
gem5::Wavefront::stats
gem5::Wavefront::WavefrontStats stats
gem5::Wavefront::outstandingReqsRdGm
int outstandingReqsRdGm
Definition: wavefront.hh:177
gem5::Wavefront::sleepCnt
int sleepCnt
Definition: wavefront.hh:327
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:193
gem5::Wavefront::freeResources
void freeResources()
Definition: wavefront.cc:741
gem5::Wavefront::isOldestInstPrivMem
bool isOldestInstPrivMem()
Definition: wavefront.cc:701
gem5::Wavefront::status_e
status_e
Definition: wavefront.hh:63
gem5::Wavefront::sleepDone
bool sleepDone()
Definition: wavefront.cc:1242
gem5::Wavefront::incVMemInstsIssued
void incVMemInstsIssued()
Definition: wavefront.cc:1327
gem5::_amd_queue_t::scratch_backing_memory_location
uint64_t scratch_backing_memory_location
Definition: hsa_queue.hh:82
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
gem5::_amd_queue_t::compute_tmpring_size_wavesize
uint32_t compute_tmpring_size_wavesize
Definition: hsa_queue.hh:79
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:225
gem5::ComputeUnit::ComputeUnitStats::totalCycles
statistics::Scalar totalCycles
Definition: compute_unit.hh:1111
gem5::Wavefront::startVgprIndex
uint32_t startVgprIndex
Definition: wavefront.hh:199
gem5::ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:271
gem5::Wavefront::barId
int barId
Definition: wavefront.hh:331
gem5::Wavefront::scalarAluGlobalIdx
int scalarAluGlobalIdx
Definition: wavefront.hh:125
gem5::Wavefront::WavefrontStats::vecRawDistance
statistics::Distribution vecRawDistance
Definition: wavefront.hh:372
gem5::Wavefront::lastInstExec
uint64_t lastInstExec
Definition: wavefront.hh:229
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:277
gem5::Wavefront::reserveGmResource
void reserveGmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:760
gem5::ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:318
gem5::Wavefront::isOldestInstScalarMem
bool isOldestInstScalarMem()
Definition: wavefront.cc:675
gem5::Wavefront::localMem
int localMem
Definition: wavefront.hh:127
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
statistics::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1097
gem5::Wavefront::workGroupId
uint32_t workGroupId[3]
Definition: wavefront.hh:157
gem5::Wavefront::dropFetch
bool dropFetch
Definition: wavefront.hh:112
gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: gpu_registers.hh:153
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::Wavefront::status
status_e status
Definition: wavefront.hh:328
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:99
gem5::statistics::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:622

Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17