gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
wavefront.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "gpu-compute/wavefront.hh"
33 
34 #include "base/bitfield.hh"
35 #include "debug/GPUExec.hh"
36 #include "debug/GPUInitAbi.hh"
37 #include "debug/WavefrontStack.hh"
41 #include "gpu-compute/shader.hh"
44 
45 namespace gem5
46 {
47 
49  : SimObject(p), wfSlotId(p.wf_slot_id), simdId(p.simdId),
50  maxIbSize(p.max_ib_size), _gpuISA(*this),
51  vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
52  vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
53  sleepCnt(0), barId(WFBarrier::InvalidID), stats(this)
54 {
55  lastTrace = 0;
56  execUnitId = -1;
57  status = S_STOPPED;
60  startVgprIndex = 0;
61  startSgprIndex = 0;
62  outstandingReqs = 0;
67  rdLmReqsInPipe = 0;
68  rdGmReqsInPipe = 0;
69  wrLmReqsInPipe = 0;
70  wrGmReqsInPipe = 0;
75  lastNonIdleTick = 0;
76  ldsChunk = nullptr;
77 
78  memTraceBusy = 0;
79  oldVgprTcnt = 0xffffffffffffffffll;
80  oldDgprTcnt = 0xffffffffffffffffll;
81  oldVgpr.resize(p.wf_size);
82 
83  pendingFetch = false;
84  dropFetch = false;
85  maxVgprs = 0;
86  maxSgprs = 0;
87 
88  lastAddr.resize(p.wf_size);
89  workItemFlatId.resize(p.wf_size);
90  oldDgpr.resize(p.wf_size);
91  for (int i = 0; i < 3; ++i) {
92  workItemId[i].resize(p.wf_size);
93  }
94 
95  _execMask.set();
96  rawDist.clear();
97  lastInstExec = 0;
98  vecReads.clear();
99 }
100 
101 void
103 {
104  reservedVectorRegs = 0;
105  reservedScalarRegs = 0;
106  startVgprIndex = 0;
107  startSgprIndex = 0;
108 
114 }
115 
116 void
117 Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
118 {
119  int regInitIdx = 0;
120 
121  // Iterate over all the init fields and check which
122  // bits are enabled. Useful information can be found here:
123  // https://github.com/ROCm-Developer-Tools/ROCm-ComputeABI-Doc/
124  // blob/master/AMDGPU-ABI.md
125  for (int en_bit = 0; en_bit < NumScalarInitFields; ++en_bit) {
126 
127  if (task->sgprBitEnabled(en_bit)) {
128  int physSgprIdx = 0;
129  uint32_t wiCount = 0;
130  uint32_t firstWave = 0;
131  int orderedAppendTerm = 0;
132  int numWfsInWg = 0;
133  uint32_t finalValue = 0;
134  Addr host_disp_pkt_addr = task->hostDispPktAddr();
135  Addr kernarg_addr = task->kernargAddr();
136  Addr hidden_priv_base(0);
137 
138  switch (en_bit) {
139  case PrivateSegBuf:
140  physSgprIdx =
141  computeUnit->registerManager->mapSgpr(this, regInitIdx);
142  computeUnit->srf[simdId]->write(physSgprIdx,
144  ++regInitIdx;
145  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
146  "Setting PrivateSegBuffer: s[%d] = %x\n",
148  wfSlotId, wfDynId, physSgprIdx,
150 
151  physSgprIdx =
152  computeUnit->registerManager->mapSgpr(this, regInitIdx);
153  computeUnit->srf[simdId]->write(physSgprIdx,
155  ++regInitIdx;
156  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
157  "Setting PrivateSegBuffer: s[%d] = %x\n",
159  wfSlotId, wfDynId, physSgprIdx,
161 
162  physSgprIdx =
163  computeUnit->registerManager->mapSgpr(this, regInitIdx);
164  computeUnit->srf[simdId]->write(physSgprIdx,
166  ++regInitIdx;
167  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
168  "Setting PrivateSegBuffer: s[%d] = %x\n",
170  wfSlotId, wfDynId, physSgprIdx,
172 
173  physSgprIdx =
174  computeUnit->registerManager->mapSgpr(this, regInitIdx);
175  computeUnit->srf[simdId]->write(physSgprIdx,
177 
178  ++regInitIdx;
179  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
180  "Setting PrivateSegBuffer: s[%d] = %x\n",
182  wfSlotId, wfDynId, physSgprIdx,
184  break;
185  case DispatchPtr:
186  physSgprIdx =
187  computeUnit->registerManager->mapSgpr(this, regInitIdx);
188  computeUnit->srf[simdId]->write(physSgprIdx,
189  bits(host_disp_pkt_addr, 31, 0));
190  ++regInitIdx;
191  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
192  "Setting DispatchPtr: s[%d] = %x\n",
194  wfSlotId, wfDynId, physSgprIdx,
195  bits(host_disp_pkt_addr, 31, 0));
196 
197  physSgprIdx =
198  computeUnit->registerManager->mapSgpr(this, regInitIdx);
199  computeUnit->srf[simdId]->write(physSgprIdx,
200  bits(host_disp_pkt_addr, 63, 32));
201  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
202  "Setting DispatchPtr: s[%d] = %x\n",
204  wfSlotId, wfDynId, physSgprIdx,
205  bits(host_disp_pkt_addr, 63, 32));
206 
207  ++regInitIdx;
208  break;
209  case QueuePtr:
210  physSgprIdx =
211  computeUnit->registerManager->mapSgpr(this, regInitIdx);
212  computeUnit->srf[simdId]->write(physSgprIdx,
213  bits(task->hostAMDQueueAddr, 31, 0));
214  ++regInitIdx;
215  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
216  "Setting QueuePtr: s[%d] = %x\n",
218  wfSlotId, wfDynId, physSgprIdx,
219  bits(task->hostAMDQueueAddr, 31, 0));
220 
221  physSgprIdx =
222  computeUnit->registerManager->mapSgpr(this, regInitIdx);
223  computeUnit->srf[simdId]->write(physSgprIdx,
224  bits(task->hostAMDQueueAddr, 63, 32));
225  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
226  "Setting QueuePtr: s[%d] = %x\n",
228  wfSlotId, wfDynId, physSgprIdx,
229  bits(task->hostAMDQueueAddr, 63, 32));
230 
231  ++regInitIdx;
232  break;
233  case KernargSegPtr:
234  physSgprIdx =
235  computeUnit->registerManager->mapSgpr(this, regInitIdx);
236  computeUnit->srf[simdId]->write(physSgprIdx,
237  bits(kernarg_addr, 31, 0));
238  ++regInitIdx;
239  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
240  "Setting KernargSegPtr: s[%d] = %x\n",
242  wfSlotId, wfDynId, physSgprIdx,
243  bits(kernarg_addr, 31, 0));
244 
245  physSgprIdx =
246  computeUnit->registerManager->mapSgpr(this, regInitIdx);
247  computeUnit->srf[simdId]->write(physSgprIdx,
248  bits(kernarg_addr, 63, 32));
249  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
250  "Setting KernargSegPtr: s[%d] = %x\n",
252  wfSlotId, wfDynId, physSgprIdx,
253  bits(kernarg_addr, 63, 32));
254 
255  ++regInitIdx;
256  break;
257  case DispatchId:
258  physSgprIdx
259  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
260  computeUnit->srf[simdId]->write(physSgprIdx,
261  task->dispatchId());
262  ++regInitIdx;
263  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
264  "Setting DispatchId: s[%d] = %x\n",
266  wfSlotId, wfDynId, physSgprIdx,
267  task->dispatchId());
268 
269  // Dispatch ID in gem5 is an int. Set upper 32-bits to zero.
270  physSgprIdx
271  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
272  computeUnit->srf[simdId]->write(physSgprIdx, 0);
273  ++regInitIdx;
274  break;
275  case FlatScratchInit:
276  physSgprIdx
277  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
278  computeUnit->srf[simdId]->write(physSgprIdx,
280  .scratch_backing_memory_location & 0xffffffff));
281  ++regInitIdx;
282  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
283  "Setting FlatScratch Addr: s[%d] = %x\n",
285  wfSlotId, wfDynId, physSgprIdx,
287  .scratch_backing_memory_location & 0xffffffff));
288 
289  physSgprIdx =
290  computeUnit->registerManager->mapSgpr(this, regInitIdx);
291  // This vallue should be sizeof(DWORD) aligned, that is
292  // 4 byte aligned
293  computeUnit->srf[simdId]->write(physSgprIdx,
295  ++regInitIdx;
296  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
297  "Setting FlatScratch size: s[%d] = %x\n",
299  wfSlotId, wfDynId, physSgprIdx,
324  hidden_priv_base =
325  (uint64_t)task->amdQueue.scratch_resource_descriptor[0] |
326  (((uint64_t)task->amdQueue.scratch_resource_descriptor[1]
327  & 0x000000000000ffff) << 32);
329  hidden_priv_base,
331  break;
332  case PrivateSegSize:
333  physSgprIdx
334  = computeUnit->registerManager->mapSgpr(this, regInitIdx);
335  computeUnit->srf[simdId]->write(physSgprIdx,
336  task->privMemPerItem());
337  ++regInitIdx;
338  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
339  "Setting private segment size: s[%d] = %x\n",
341  wfSlotId, wfDynId, physSgprIdx,
342  task->privMemPerItem());
343  break;
344  case GridWorkgroupCountX:
345  physSgprIdx =
346  computeUnit->registerManager->mapSgpr(this, regInitIdx);
347  wiCount = ((task->gridSize(0) +
348  task->wgSize(0) - 1) /
349  task->wgSize(0));
350  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
351 
352  ++regInitIdx;
353  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
354  "Setting num WG X: s[%d] = %x\n",
356  wfSlotId, wfDynId, physSgprIdx, wiCount);
357  break;
358  case GridWorkgroupCountY:
359  physSgprIdx =
360  computeUnit->registerManager->mapSgpr(this, regInitIdx);
361  wiCount = ((task->gridSize(1) +
362  task->wgSize(1) - 1) /
363  task->wgSize(1));
364  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
365 
366  ++regInitIdx;
367  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
368  "Setting num WG Y: s[%d] = %x\n",
370  wfSlotId, wfDynId, physSgprIdx, wiCount);
371  break;
372  case GridWorkgroupCountZ:
373  physSgprIdx =
374  computeUnit->registerManager->mapSgpr(this, regInitIdx);
375  wiCount = ((task->gridSize(2) +
376  task->wgSize(2) - 1) /
377  task->wgSize(2));
378  computeUnit->srf[simdId]->write(physSgprIdx, wiCount);
379 
380  ++regInitIdx;
381  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
382  "Setting num WG Z: s[%d] = %x\n",
384  wfSlotId, wfDynId, physSgprIdx, wiCount);
385  break;
386  case WorkgroupIdX:
387  physSgprIdx =
388  computeUnit->registerManager->mapSgpr(this, regInitIdx);
389  computeUnit->srf[simdId]->write(physSgprIdx,
390  workGroupId[0]);
391 
392  ++regInitIdx;
393  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
394  "Setting WG ID X: s[%d] = %x\n",
396  wfSlotId, wfDynId, physSgprIdx, workGroupId[0]);
397  break;
398  case WorkgroupIdY:
399  physSgprIdx =
400  computeUnit->registerManager->mapSgpr(this, regInitIdx);
401  computeUnit->srf[simdId]->write(physSgprIdx,
402  workGroupId[1]);
403 
404  ++regInitIdx;
405  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
406  "Setting WG ID Y: s[%d] = %x\n",
408  wfSlotId, wfDynId, physSgprIdx, workGroupId[1]);
409  break;
410  case WorkgroupIdZ:
411  physSgprIdx =
412  computeUnit->registerManager->mapSgpr(this, regInitIdx);
413  computeUnit->srf[simdId]->write(physSgprIdx,
414  workGroupId[2]);
415 
416  ++regInitIdx;
417  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
418  "Setting WG ID Z: s[%d] = %x\n",
420  wfSlotId, wfDynId, physSgprIdx, workGroupId[2]);
421  break;
423  physSgprIdx =
424  computeUnit->registerManager->mapSgpr(this, regInitIdx);
438  computeUnit->srf[simdId]->write(physSgprIdx, 1024 *
439  (wgId * (wgSz / 64) + wfId) *
441 
442  ++regInitIdx;
443  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
444  "Setting Private Seg Offset: s[%d] = %x\n",
446  wfSlotId, wfDynId, physSgprIdx,
447  1024 * (wgId * (wgSz / 64) + wfId) *
449  break;
450  case WorkgroupInfo:
451  firstWave = (wfId == 0) ? 1 : 0;
452  numWfsInWg = divCeil(wgSizeInWorkItems,
453  computeUnit->wfSize());
454  finalValue = firstWave << ((sizeof(uint32_t) * 8) - 1);
455  finalValue |= (orderedAppendTerm << 6);
456  finalValue |= numWfsInWg;
457  physSgprIdx =
458  computeUnit->registerManager->mapSgpr(this, regInitIdx);
460  write(physSgprIdx, finalValue);
461 
462  ++regInitIdx;
463  DPRINTF(GPUInitAbi, "CU%d: WF[%d][%d]: wave[%d] "
464  "Setting WG Info: s[%d] = %x\n",
466  wfSlotId, wfDynId, physSgprIdx, finalValue);
467  break;
468  default:
469  fatal("SGPR enable bit %i not supported\n", en_bit);
470  break;
471  }
472  }
473  }
474 
475  regInitIdx = 0;
476 
477  // iterate over all the init fields and check which
478  // bits are enabled
479  for (int en_bit = 0; en_bit < NumVectorInitFields; ++en_bit) {
480  if (task->vgprBitEnabled(en_bit)) {
481  uint32_t physVgprIdx = 0;
483 
484  switch (en_bit) {
485  case WorkitemIdX:
486  {
487  physVgprIdx = computeUnit->registerManager
488  ->mapVgpr(this, regInitIdx);
489  TheGpuISA::VecElemU32 *vgpr_x
490  = raw_vgpr.as<TheGpuISA::VecElemU32>();
491 
492  for (int lane = 0; lane < workItemId[0].size(); ++lane) {
493  vgpr_x[lane] = workItemId[0][lane];
494  }
495 
496  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
497  rawDist[regInitIdx] = 0;
498  ++regInitIdx;
499  }
500  break;
501  case WorkitemIdY:
502  {
503  physVgprIdx = computeUnit->registerManager
504  ->mapVgpr(this, regInitIdx);
505  TheGpuISA::VecElemU32 *vgpr_y
506  = raw_vgpr.as<TheGpuISA::VecElemU32>();
507 
508  for (int lane = 0; lane < workItemId[1].size(); ++lane) {
509  vgpr_y[lane] = workItemId[1][lane];
510  }
511 
512  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
513  rawDist[regInitIdx] = 0;
514  ++regInitIdx;
515  }
516  break;
517  case WorkitemIdZ:
518  {
519  physVgprIdx = computeUnit->registerManager->
520  mapVgpr(this, regInitIdx);
521  TheGpuISA::VecElemU32 *vgpr_z
522  = raw_vgpr.as<TheGpuISA::VecElemU32>();
523 
524  for (int lane = 0; lane < workItemId[2].size(); ++lane) {
525  vgpr_z[lane] = workItemId[2][lane];
526  }
527 
528  computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
529  rawDist[regInitIdx] = 0;
530  ++regInitIdx;
531  }
532  break;
533  }
534  }
535  }
536 }
537 
538 void
539 Wavefront::resizeRegFiles(int num_vregs, int num_sregs)
540 {
541  maxVgprs = num_vregs;
542  maxSgprs = num_sregs;
543 }
544 
546 {
547 }
548 
549 void
551 {
552  if (computeUnit->idleCUTimeout > 0) {
553  // Wavefront's status transitions to stalled or stopped
554  if ((newStatus == S_STOPPED || newStatus == S_STALLED ||
555  newStatus == S_WAITCNT || newStatus == S_BARRIER) &&
556  (status != newStatus)) {
557  computeUnit->idleWfs++;
558  assert(computeUnit->idleWfs <=
560  if (computeUnit->idleWfs ==
563  }
564  // Wavefront's status transitions to an active state (from
565  // a stopped or stalled state)
566  } else if ((status == S_STOPPED || status == S_STALLED ||
567  status == S_WAITCNT || status == S_BARRIER) &&
568  (status != newStatus)) {
569  // if all WFs in the CU were idle then check if the idleness
570  // period exceeded the timeout threshold
571  if (computeUnit->idleWfs ==
575  "CU%d has been idle for %d ticks at tick %d",
577  curTick());
578  }
579  computeUnit->idleWfs--;
580  assert(computeUnit->idleWfs >= 0);
581  }
582  }
583  status = newStatus;
584 }
585 
586 void
587 Wavefront::start(uint64_t _wf_dyn_id, Addr init_pc)
588 {
589  wfDynId = _wf_dyn_id;
590  _pc = init_pc;
591 
592  status = S_RUNNING;
593 
594  vecReads.resize(maxVgprs, 0);
595 }
596 
597 bool
599 {
600  if (ii->isGlobalMem() ||
601  (ii->isFlat() && ii->executedAs() == enums::SC_GLOBAL)) {
602  return true;
603  }
604 
605  return false;
606 }
607 
608 bool
610 {
611  if (ii->isLocalMem() ||
612  (ii->isFlat() && ii->executedAs() == enums::SC_GROUP)) {
613  return true;
614  }
615 
616  return false;
617 }
618 
619 bool
621 {
622  if (instructionBuffer.empty())
623  return false;
624 
625  GPUDynInstPtr ii = instructionBuffer.front();
626 
627  if (ii->isSleep()) {
628  return true;
629  }
630  return false;
631 }
632 
633 bool
635 {
636  if (instructionBuffer.empty())
637  return false;
638 
639  GPUDynInstPtr ii = instructionBuffer.front();
640 
641  if (ii->isWaitcnt()) {
642  // waitcnt is a scalar
643  assert(ii->isScalar());
644  return true;
645  }
646 
647  return false;
648 }
649 
650 bool
652 {
653  assert(!instructionBuffer.empty());
654  GPUDynInstPtr ii = instructionBuffer.front();
655 
656  if (status != S_STOPPED && ii->isScalar() && (ii->isNop() || ii->isReturn()
657  || ii->isEndOfKernel() || ii->isBranch() || ii->isALU() ||
658  (ii->isKernArgSeg() && ii->isLoad()))) {
659  return true;
660  }
661 
662  return false;
663 }
664 
665 bool
667 {
668  assert(!instructionBuffer.empty());
669  GPUDynInstPtr ii = instructionBuffer.front();
670 
671  if (status != S_STOPPED && !ii->isScalar() && (ii->isNop() ||
672  ii->isReturn() || ii->isBranch() || ii->isALU() || ii->isEndOfKernel()
673  || (ii->isKernArgSeg() && ii->isLoad()))) {
674  return true;
675  }
676 
677  return false;
678 }
679 
680 bool
682 {
683  assert(!instructionBuffer.empty());
684  GPUDynInstPtr ii = instructionBuffer.front();
685 
686  if (status != S_STOPPED && ii->isBarrier()) {
687  return true;
688  }
689 
690  return false;
691 }
692 
693 bool
695 {
696  assert(!instructionBuffer.empty());
697  GPUDynInstPtr ii = instructionBuffer.front();
698 
699  if (status != S_STOPPED && !ii->isScalar() && ii->isGlobalMem()) {
700  return true;
701  }
702 
703  return false;
704 }
705 
706 bool
708 {
709  assert(!instructionBuffer.empty());
710  GPUDynInstPtr ii = instructionBuffer.front();
711 
712  if (status != S_STOPPED && ii->isScalar() && ii->isGlobalMem()) {
713  return true;
714  }
715 
716  return false;
717 }
718 
719 bool
721 {
722  assert(!instructionBuffer.empty());
723  GPUDynInstPtr ii = instructionBuffer.front();
724 
725  if (status != S_STOPPED && ii->isLocalMem()) {
726  return true;
727  }
728 
729  return false;
730 }
731 
732 bool
734 {
735  assert(!instructionBuffer.empty());
736  GPUDynInstPtr ii = instructionBuffer.front();
737 
738  if (status != S_STOPPED && ii->isPrivateSeg()) {
739  return true;
740  }
741 
742  return false;
743 }
744 
745 bool
747 {
748  assert(!instructionBuffer.empty());
749  GPUDynInstPtr ii = instructionBuffer.front();
750 
751  if (status != S_STOPPED && ii->isFlat()) {
752  return true;
753  }
754 
755  return false;
756 }
757 
758 bool
760 {
761  for (auto it : instructionBuffer) {
762  GPUDynInstPtr ii = it;
763  if (ii->isReturn() || ii->isBranch() ||
764  ii->isEndOfKernel()) {
765  return true;
766  }
767  }
768 
769  return false;
770 }
771 
772 void
774 {
775  execUnitId = -1;
776 }
777 
779 {
781  wrLmReqsInPipe < 0 || rdLmReqsInPipe < 0 ||
782  outstandingReqs < 0,
783  "Negative requests in pipe for WF%d for slot%d"
784  " and SIMD%d: Rd GlobalMem Reqs=%d, Wr GlobalMem Reqs=%d,"
785  " Rd LocalMem Reqs=%d, Wr LocalMem Reqs=%d,"
786  " Outstanding Reqs=%d\n",
789 }
790 
791 void
793 {
794  if (!ii->isScalar()) {
795  if (ii->isLoad()) {
796  rdGmReqsInPipe++;
797  } else if (ii->isStore()) {
798  wrGmReqsInPipe++;
799  } else if (ii->isAtomic() || ii->isMemSync()) {
800  rdGmReqsInPipe++;
801  wrGmReqsInPipe++;
802  } else {
803  panic("Invalid memory operation!\n");
804  }
806  } else {
807  if (ii->isLoad()) {
809  } else if (ii->isStore()) {
811  } else if (ii->isAtomic() || ii->isMemSync()) {
814  } else {
815  panic("Invalid memory operation!\n");
816  }
818  }
819 }
820 
821 void
823 {
824  fatal_if(ii->isScalar(),
825  "Scalar instructions can not access Shared memory!!!");
826  if (ii->isLoad()) {
827  rdLmReqsInPipe++;
828  } else if (ii->isStore()) {
829  wrLmReqsInPipe++;
830  } else if (ii->isAtomic() || ii->isMemSync()) {
831  wrLmReqsInPipe++;
832  rdLmReqsInPipe++;
833  } else {
834  panic("Invalid memory operation!\n");
835  }
837 }
838 
841 {
842  // vector of execution unit IDs to return to schedule stage
843  // this return is only used for debugging and an assertion...
844  std::vector<int> execUnitIds;
845 
846  // Get current instruction
847  GPUDynInstPtr ii = instructionBuffer.front();
848  assert(ii);
849 
850  // Single precision ALU or Branch or Return or Special instruction
851  if (ii->isALU() || ii->isSpecialOp() ||
852  ii->isBranch() || ii->isNop() ||
853  (ii->isKernArgSeg() && ii->isLoad()) || ii->isArgSeg() ||
854  ii->isReturn() || ii->isEndOfKernel()) {
855  if (!ii->isScalar()) {
856  execUnitId = simdId;
857  } else {
859  }
860  // this is to enforce a fixed number of cycles per issue slot per SIMD
861  } else if (ii->isBarrier()) {
862  execUnitId = ii->isScalar() ? scalarAluGlobalIdx : simdId;
863  } else if (ii->isFlat()) {
864  assert(!ii->isScalar());
865  reserveLmResource(ii);
866  // add execUnitId, reserved by reserveLmResource, list before it is
867  // overwriten by reserveGmResource
868  execUnitIds.push_back(execUnitId);
870  reserveGmResource(ii);
872  execUnitIds.push_back(flatGmUnitId);
873  execUnitId = -1;
874  } else if (ii->isGlobalMem()) {
875  reserveGmResource(ii);
876  } else if (ii->isLocalMem()) {
877  reserveLmResource(ii);
878  } else if (ii->isPrivateSeg()) {
879  fatal_if(ii->isScalar(),
880  "Scalar instructions can not access Private memory!!!");
881  reserveGmResource(ii);
882  } else {
883  panic("reserveResources -> Couldn't process op!\n");
884  }
885 
886  if (execUnitId != -1) {
887  execUnitIds.push_back(execUnitId);
888  }
889  assert(execUnitIds.size());
890  return execUnitIds;
891 }
892 
893 void
895 {
896  // ---- Exit if wavefront is inactive ----------------------------- //
897 
898  if (status == S_STOPPED || status == S_RETURNING ||
899  status==S_STALLED || instructionBuffer.empty()) {
900  return;
901  }
902 
903  if (status == S_WAITCNT) {
915  assert(isOldestInstWaitcnt());
916  }
917 
918  // Get current instruction
919 
920  GPUDynInstPtr ii = instructionBuffer.front();
921 
922  const Addr old_pc = pc();
923  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] Executing inst: %s "
924  "(pc: %#x; seqNum: %d)\n", computeUnit->cu_id, simdId, wfSlotId,
925  wfDynId, ii->disassemble(), old_pc, ii->seqNum());
926 
927  ii->execute(ii);
928  // delete the dynamic instruction from the pipeline map
930  // update the instruction stats in the CU
932 
933  // inform VRF of instruction execution to schedule write-back
934  // and scoreboard ready for registers
935  if (!ii->isScalar()) {
936  computeUnit->vrf[simdId]->waveExecuteInst(this, ii);
937  }
938  computeUnit->srf[simdId]->waveExecuteInst(this, ii);
939 
940  computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecRegOperands());
941  computeUnit->shader->incVectorInstDstOperand(ii->numDstVecRegOperands());
950 
951  if (lastInstExec) {
954  }
956 
957  // want to track:
958  // number of reads that occur per value written
959 
960  // vector RAW dependency tracking
961  for (const auto& srcVecOp : ii->srcVecRegOperands()) {
962  for (const auto& virtIdx : srcVecOp.virtIndices()) {
963  // This check should never fail, but to be safe we check
964  if (rawDist.find(virtIdx) != rawDist.end()) {
966  rawDist[virtIdx]);
967  }
968  // increment number of reads to this register
969  vecReads[virtIdx]++;
970  }
971  }
972 
973  for (const auto& dstVecOp : ii->dstVecRegOperands()) {
974  for (const auto& virtIdx : dstVecOp.virtIndices()) {
975  // rawDist is set on writes, but will not be set for the first
976  // write to each physical register
977  if (rawDist.find(virtIdx) != rawDist.end()) {
978  // Sample the number of reads that were performed
980  }
981  // on a write, reset count of reads to 0
982  vecReads[virtIdx] = 0;
983 
984  rawDist[virtIdx] = stats.numInstrExecuted.value();
985  }
986  }
987 
988  if (pc() == old_pc) {
989  // PC not modified by instruction, proceed to next
990  _gpuISA.advancePC(ii);
991  instructionBuffer.pop_front();
992  } else {
993  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave%d %s taken branch\n",
995  ii->disassemble());
996  discardFetch();
997  }
998  DPRINTF(GPUExec, "CU%d: WF[%d][%d]: wave[%d] (pc: %#x)\n",
1000 
1002  const int num_active_lanes = execMask().count();
1004  computeUnit->stats.numVecOpsExecuted += num_active_lanes;
1005 
1006  if (ii->isF16() && ii->isALU()) {
1007  if (ii->isF32() || ii->isF64()) {
1008  fatal("Instruction is tagged as both (1) F16, and (2)"
1009  "either F32 or F64.");
1010  }
1011  computeUnit->stats.numVecOpsExecutedF16 += num_active_lanes;
1012  if (ii->isFMA()) {
1013  computeUnit->stats.numVecOpsExecutedFMA16 += num_active_lanes;
1015  += num_active_lanes;
1016  }
1017  else if (ii->isMAC()) {
1018  computeUnit->stats.numVecOpsExecutedMAC16 += num_active_lanes;
1020  += num_active_lanes;
1021  }
1022  else if (ii->isMAD()) {
1023  computeUnit->stats.numVecOpsExecutedMAD16 += num_active_lanes;
1025  += num_active_lanes;
1026  }
1027  }
1028  if (ii->isF32() && ii->isALU()) {
1029  if (ii->isF16() || ii->isF64()) {
1030  fatal("Instruction is tagged as both (1) F32, and (2)"
1031  "either F16 or F64.");
1032  }
1033  computeUnit->stats.numVecOpsExecutedF32 += num_active_lanes;
1034  if (ii->isFMA()) {
1035  computeUnit->stats.numVecOpsExecutedFMA32 += num_active_lanes;
1037  += num_active_lanes;
1038  }
1039  else if (ii->isMAC()) {
1040  computeUnit->stats.numVecOpsExecutedMAC32 += num_active_lanes;
1042  += num_active_lanes;
1043  }
1044  else if (ii->isMAD()) {
1045  computeUnit->stats.numVecOpsExecutedMAD32 += num_active_lanes;
1047  += num_active_lanes;
1048  }
1049  }
1050  if (ii->isF64() && ii->isALU()) {
1051  if (ii->isF16() || ii->isF32()) {
1052  fatal("Instruction is tagged as both (1) F64, and (2)"
1053  "either F16 or F32.");
1054  }
1055  computeUnit->stats.numVecOpsExecutedF64 += num_active_lanes;
1056  if (ii->isFMA()) {
1057  computeUnit->stats.numVecOpsExecutedFMA64 += num_active_lanes;
1059  += num_active_lanes;
1060  }
1061  else if (ii->isMAC()) {
1062  computeUnit->stats.numVecOpsExecutedMAC64 += num_active_lanes;
1064  += num_active_lanes;
1065  }
1066  else if (ii->isMAD()) {
1067  computeUnit->stats.numVecOpsExecutedMAD64 += num_active_lanes;
1069  += num_active_lanes;
1070  }
1071  }
1072  if (isGmInstruction(ii)) {
1074  num_active_lanes);
1075  } else if (isLmInstruction(ii)) {
1077  num_active_lanes);
1078  }
1079  }
1080 
1085  if (execMask().none() && ii->isFlat()) {
1087  return;
1088  }
1089 
1090  // Update Vector ALU pipeline and other resources
1091  bool flat_as_gm = false;
1092  bool flat_as_lm = false;
1093  if (ii->isFlat()) {
1094  flat_as_gm = (ii->executedAs() == enums::SC_GLOBAL) ||
1095  (ii->executedAs() == enums::SC_PRIVATE);
1096  flat_as_lm = (ii->executedAs() == enums::SC_GROUP);
1097  }
1098 
1099  // Single precision ALU or Branch or Return or Special instruction
1100  // Note, we use the same timing regardless of SP or DP ALU operation.
1101  if (ii->isALU() || ii->isSpecialOp() ||
1102  ii->isBranch() || ii->isNop() ||
1103  (ii->isKernArgSeg() && ii->isLoad()) ||
1104  ii->isArgSeg() || ii->isEndOfKernel() || ii->isReturn()) {
1105  // this is to enforce a fixed number of cycles per issue slot per SIMD
1106  if (!ii->isScalar()) {
1108  cyclesToTicks(computeUnit->issuePeriod));
1109  } else {
1111  cyclesToTicks(computeUnit->issuePeriod));
1112  }
1113  // Barrier on Scalar ALU
1114  } else if (ii->isBarrier()) {
1116  cyclesToTicks(computeUnit->issuePeriod));
1117  // GM or Flat as GM Load
1118  } else if (ii->isLoad() && (ii->isGlobalMem() || flat_as_gm)) {
1119  if (!ii->isScalar()) {
1126  } else {
1128  cyclesToTicks(computeUnit->srf_scm_bus_latency));
1133  }
1134  // GM or Flat as GM Store
1135  } else if (ii->isStore() && (ii->isGlobalMem() || flat_as_gm)) {
1136  if (!ii->isScalar()) {
1138  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1143  } else {
1145  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1150  }
1151  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1152  (ii->isGlobalMem() || flat_as_gm)) {
1153  if (!ii->isScalar()) {
1155  cyclesToTicks(Cycles(2 * computeUnit->vrf_gm_bus_latency)));
1160  } else {
1162  cyclesToTicks(Cycles(2 * computeUnit->srf_scm_bus_latency)));
1167  }
1168  // LM or Flat as LM Load
1169  } else if (ii->isLoad() && (ii->isLocalMem() || flat_as_lm)) {
1171  cyclesToTicks(computeUnit->vrf_lm_bus_latency));
1176  // LM or Flat as LM Store
1177  } else if (ii->isStore() && (ii->isLocalMem() || flat_as_lm)) {
1179  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1184  // LM or Flat as LM, Atomic or MemFence
1185  } else if ((ii->isAtomic() || ii->isMemSync()) &&
1186  (ii->isLocalMem() || flat_as_lm)) {
1188  cyclesToTicks(Cycles(2 * computeUnit->vrf_lm_bus_latency)));
1193  } else {
1194  panic("Bad instruction type!\n");
1195  }
1196 }
1197 
1200 {
1201  // Read next instruction from instruction buffer
1202  GPUDynInstPtr ii = instructionBuffer.front();
1203  // if the WF has been dispatched in the schedule stage then
1204  // check the next oldest instruction for readiness
1205  if (computeUnit->pipeMap.find(ii->seqNum()) !=
1206  computeUnit->pipeMap.end()) {
1207  if (instructionBuffer.size() > 1) {
1208  auto it = instructionBuffer.begin() + 1;
1209  return *it;
1210  } else { // No new instructions to check
1211  return nullptr;
1212  }
1213  }
1214  return ii;
1215 }
1216 
1217 void
1219 {
1220  instructionBuffer.clear();
1222 
1228 }
1229 
1230 bool
1232 {
1233  // Both vmWaitCnt && lgkmWaitCnt uninitialized means
1234  // waitCnt instruction has been dispatched but not executed yet: next
1235  // instruction should be blocked until waitCnt is executed.
1236  if (vmWaitCnt == -1 && expWaitCnt == -1 && lgkmWaitCnt == -1) {
1237  return false;
1238  }
1239 
1245  if (vmWaitCnt != -1) {
1246  if (vmemInstsIssued > vmWaitCnt) {
1247  // vmWaitCnt not satisfied
1248  return false;
1249  }
1250  }
1251 
1252  if (expWaitCnt != -1) {
1253  if (expInstsIssued > expWaitCnt) {
1254  // expWaitCnt not satisfied
1255  return false;
1256  }
1257  }
1258 
1259  if (lgkmWaitCnt != -1) {
1260  if (lgkmInstsIssued > lgkmWaitCnt) {
1261  // lgkmWaitCnt not satisfied
1262  return false;
1263  }
1264  }
1265 
1266  // if we get here all outstanding waitcnts must
1267  // be satisfied, so we resume normal operation
1268  clearWaitCnts();
1269 
1270  return true;
1271 }
1272 
1273 bool
1275 {
1276  assert(status == S_STALLED_SLEEP);
1277 
1278  // if the sleep count has not been set, then the sleep instruction has not
1279  // been executed yet, so we will return true without setting the wavefront
1280  // status
1281  if (sleepCnt == 0)
1282  return false;
1283 
1284  sleepCnt--;
1285  if (sleepCnt != 0)
1286  return false;
1287 
1288  status = S_RUNNING;
1289  return true;
1290 }
1291 
1292 void
1294 {
1295  assert(sleepCnt == 0);
1296  sleepCnt = sleep_time;
1297 }
1298 
1299 void
1300 Wavefront::setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
1301 {
1302  // the scoreboard should have set the status
1303  // to S_WAITCNT once a waitcnt instruction
1304  // was marked as ready
1305  assert(status == S_WAITCNT);
1306 
1307  // waitcnt instruction shouldn't be sending
1308  // negative counts
1309  assert(vm_wait_cnt >= 0);
1310  assert(exp_wait_cnt >= 0);
1311  assert(lgkm_wait_cnt >= 0);
1312  // waitcnts are a max of 15 because we have
1313  // only 1 nibble (4 bits) to set the counts
1314  assert(vm_wait_cnt <= 0xf);
1315  assert(exp_wait_cnt <= 0x7);
1316  assert(lgkm_wait_cnt <= 0x1f);
1317 
1324  assert(vmWaitCnt == -1);
1325  assert(expWaitCnt == -1);
1326  assert(lgkmWaitCnt == -1);
1327 
1334  if (vm_wait_cnt != 0xf)
1335  vmWaitCnt = vm_wait_cnt;
1336 
1337  if (exp_wait_cnt != 0x7)
1338  expWaitCnt = exp_wait_cnt;
1339 
1340  if (lgkm_wait_cnt != 0x1f)
1341  lgkmWaitCnt = lgkm_wait_cnt;
1342 }
1343 
1344 void
1346 {
1347  // reset the waitcnts back to
1348  // -1, indicating they are no
1349  // longer valid
1350  vmWaitCnt = -1;
1351  expWaitCnt = -1;
1352  lgkmWaitCnt = -1;
1353 
1354  // resume running normally
1355  status = S_RUNNING;
1356 }
1357 
1358 void
1360 {
1361  ++vmemInstsIssued;
1362 }
1363 
1364 void
1366 {
1367  ++expInstsIssued;
1368 }
1369 
1370 void
1372 {
1373  ++lgkmInstsIssued;
1374 }
1375 
1376 void
1378 {
1379  --vmemInstsIssued;
1380 }
1381 
1382 void
1384 {
1385  --expInstsIssued;
1386 }
1387 
1388 void
1390 {
1391  --lgkmInstsIssued;
1392 }
1393 
1394 Addr
1396 {
1397  return _pc;
1398 }
1399 
1400 void
1402 {
1403  _pc = new_pc;
1404 }
1405 
1406 VectorMask&
1408 {
1409  return _execMask;
1410 }
1411 
1412 bool
1413 Wavefront::execMask(int lane) const
1414 {
1415  return _execMask[lane];
1416 }
1417 
1418 void
1420 {
1421  /* clear busy registers */
1422  for (int i=0; i < maxVgprs; i++) {
1423  int vgprIdx = computeUnit->registerManager->mapVgpr(this, i);
1424  computeUnit->vrf[simdId]->markReg(vgprIdx, false);
1425  }
1426 
1427  /* Free registers used by this wavefront */
1428  uint32_t endIndex = (startVgprIndex + reservedVectorRegs - 1) %
1429  computeUnit->vrf[simdId]->numRegs();
1431  freeRegion(startVgprIndex, endIndex);
1432 }
1433 
1434 void
1436 {
1437  actualWgSzTotal = 1;
1438  for (int d = 0; d < HSAQueueEntry::MAX_DIM; ++d) {
1439  actualWgSz[d] = std::min(workGroupSz[d], gridSz[d]
1440  - task->wgId(d) * workGroupSz[d]);
1442  }
1443 }
1444 
1445 void
1447 {
1448  assert(bar_id >= WFBarrier::InvalidID);
1449  assert(bar_id < computeUnit->numBarrierSlots());
1450  barId = bar_id;
1451 }
1452 
1453 int
1455 {
1456  return barId;
1457 }
1458 
1459 bool
1461 {
1462  return barId > WFBarrier::InvalidID;
1463 }
1464 
1465 void
1467 {
1469 }
1470 
1472  : statistics::Group(parent),
1473  ADD_STAT(numInstrExecuted,
1474  "number of instructions executed by this WF slot"),
1475  ADD_STAT(schCycles, "number of cycles spent in schedule stage"),
1476  ADD_STAT(schStalls, "number of cycles WF is stalled in SCH stage"),
1477  ADD_STAT(schRfAccessStalls, "number of cycles wave selected in SCH but "
1478  "RF denied adding instruction"),
1479  ADD_STAT(schResourceStalls, "number of cycles stalled in sch by resource"
1480  " not available"),
1481  ADD_STAT(schOpdNrdyStalls, "number of cycles stalled in sch waiting for "
1482  "RF reads to complete"),
1483  ADD_STAT(schLdsArbStalls,
1484  "number of cycles wave stalled due to LDS-VRF arbitration"),
1485  // FIXME: the name of the WF needs to be unique
1486  ADD_STAT(numTimesBlockedDueWAXDependencies, "number of times the wf's "
1487  "instructions are blocked due to WAW or WAR dependencies"),
1488  // FIXME: the name of the WF needs to be unique
1489  ADD_STAT(numTimesBlockedDueRAWDependencies, "number of times the wf's "
1490  "instructions are blocked due to RAW dependencies"),
1491  ADD_STAT(vecRawDistance,
1492  "Count of RAW distance in dynamic instructions for this WF"),
1493  ADD_STAT(readsPerWrite, "Count of Vector reads per write for this WF")
1494 {
1495  vecRawDistance.init(0, 20, 1);
1496  readsPerWrite.init(0, 4, 1);
1497 }
1498 
1499 } // namespace gem5
gem5::Wavefront::expWaitCnt
int expWaitCnt
Definition: wavefront.hh:322
gem5::Wavefront::workItemFlatId
std::vector< uint32_t > workItemFlatId
Definition: wavefront.hh:155
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:200
gem5::Wavefront::vecReads
std::vector< int > vecReads
Definition: wavefront.hh:237
gem5::Wavefront::S_STALLED
@ S_STALLED
Definition: wavefront.hh:72
gem5::NumVectorInitFields
@ NumVectorInitFields
Definition: kernel_code.hh:79
gem5::Wavefront::vmemInstsIssued
int vmemInstsIssued
Definition: wavefront.hh:324
gem5::init_pc
static void init_pc(py::module_ &m_native)
Definition: core.cc:168
gem5::FlatScratchInit
@ FlatScratchInit
Definition: kernel_code.hh:61
gem5::Wavefront::lastAddr
std::vector< Addr > lastAddr
Definition: wavefront.hh:153
gem5::Wavefront::isOldestInstFlatMem
bool isOldestInstFlatMem()
Definition: wavefront.cc:746
gem5::Wavefront::computeActualWgSz
void computeActualWgSz(HSAQueueEntry *task)
Definition: wavefront.cc:1435
simple_pool_manager.hh
gem5::Wavefront::S_RUNNING
@ S_RUNNING
Definition: wavefront.hh:70
gem5::ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:280
gem5::ComputeUnit::ComputeUnitStats::instInterleave
statistics::VectorDistribution instInterleave
Definition: compute_unit.hh:1141
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedTwoOpFP
statistics::Scalar numVecOpsExecutedTwoOpFP
Definition: compute_unit.hh:1111
gem5::WorkitemIdY
@ WorkitemIdY
Definition: kernel_code.hh:77
gem5::ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:323
gem5::FetchUnit::flushBuf
void flushBuf(int wfSlotId)
Definition: fetch_unit.cc:333
gem5::Wavefront::setSleepTime
void setSleepTime(int sleep_time)
Definition: wavefront.cc:1293
shader.hh
gem5::Wavefront::releaseBarrier
void releaseBarrier()
Definition: wavefront.cc:1466
gem5::HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition: hsa_queue_entry.hh:218
gem5::Wavefront::flatLmUnitId
int flatLmUnitId
Definition: wavefront.hh:103
gem5::Wavefront::isOldestInstBarrier
bool isOldestInstBarrier()
Definition: wavefront.cc:681
gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition: compute_unit.hh:297
gem5::Shader::incVectorInstDstOperand
void incVectorInstDstOperand(int num_operands)
Definition: shader.hh:312
gem5::Wavefront::oldVgpr
std::vector< uint32_t > oldVgpr
Definition: wavefront.hh:205
gem5::Wavefront::_execMask
VectorMask _execMask
Definition: wavefront.hh:330
gem5::Wavefront::maxSgprs
uint32_t maxSgprs
Definition: wavefront.hh:133
gem5::Wavefront::exec
void exec()
Definition: wavefront.cc:894
gem5::Gcn3ISA::VecElemU32
uint32_t VecElemU32
Definition: gpu_registers.hh:165
gem5::Wavefront::oldDgpr
std::vector< uint64_t > oldDgpr
Definition: wavefront.hh:212
gem5::PrivateSegBuf
@ PrivateSegBuf
Definition: kernel_code.hh:56
gem5::Wavefront::ldsChunk
LdsChunk * ldsChunk
Definition: wavefront.hh:223
gem5::Wavefront::isOldestInstLMem
bool isOldestInstLMem()
Definition: wavefront.cc:720
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF64
statistics::Scalar numVecOpsExecutedF64
Definition: compute_unit.hh:1097
gem5::ArmISA::set
Bitfield< 12, 11 > set
Definition: misc_types.hh:760
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60
compute_unit.hh
gem5::Wavefront::S_STALLED_SLEEP
@ S_STALLED_SLEEP
Definition: wavefront.hh:74
gem5::Wavefront::stopFetch
bool stopFetch()
Definition: wavefront.cc:759
gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
gem5::ComputeUnit::stats
gem5::ComputeUnit::ComputeUnitStats stats
gem5::WorkgroupInfo
@ WorkgroupInfo
Definition: kernel_code.hh:69
gem5::Wavefront::isOldestInstVectorALU
bool isOldestInstVectorALU()
Definition: wavefront.cc:666
gem5::Wavefront::setWaitCnts
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt)
Definition: wavefront.cc:1300
gem5::Wavefront::pendingFetch
bool pendingFetch
Definition: wavefront.hh:111
gem5::ComputeUnit::srfToScalarMemPipeBus
WaitClass srfToScalarMemPipeBus
Definition: compute_unit.hh:239
gem5::ComputeUnit::ComputeUnitStats::instCyclesScMemPerSimd
statistics::Vector instCyclesScMemPerSimd
Definition: compute_unit.hh:1033
gem5::Shader::initShHiddenPrivateBase
void initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
Definition: shader.hh:197
gem5::HSAQueueEntry::wgId
int wgId(int dim) const
Definition: hsa_queue_entry.hh:233
gem5::Wavefront::pc
Addr pc() const
Definition: wavefront.cc:1395
gem5::Wavefront::WavefrontStats::WavefrontStats
WavefrontStats(statistics::Group *parent)
Definition: wavefront.cc:1471
gem5::Wavefront::startSgprIndex
uint32_t startSgprIndex
Definition: wavefront.hh:202
std::vector< int >
gem5::Wavefront::lastNonIdleTick
Tick lastNonIdleTick
Definition: wavefront.hh:114
gem5::Wavefront::initRegState
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
Definition: wavefront.cc:117
gem5::Wavefront::reservedVectorRegs
int reservedVectorRegs
Definition: wavefront.hh:194
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA32
statistics::Scalar numVecOpsExecutedFMA32
Definition: compute_unit.hh:1100
gem5::Wavefront::S_STOPPED
@ S_STOPPED
Definition: wavefront.hh:66
gem5::ComputeUnit::vrfToGlobalMemPipeBus
WaitClass vrfToGlobalMemPipeBus
Definition: compute_unit.hh:223
gem5::Wavefront::wgSz
uint32_t wgSz
Definition: wavefront.hh:161
gem5::PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition: kernel_code.hh:70
gem5::RegisterManager::vrfPoolMgrs
std::vector< PoolManager * > vrfPoolMgrs
Definition: register_manager.hh:80
gem5::Wavefront::scalarRdGmReqsInPipe
int scalarRdGmReqsInPipe
Definition: wavefront.hh:188
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerLMemInstrDist
statistics::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:1121
gem5::Wavefront::actualWgSz
uint32_t actualWgSz[3]
Definition: wavefront.hh:163
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::Wavefront::workGroupSz
uint32_t workGroupSz[3]
Definition: wavefront.hh:158
gem5::HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition: hsa_queue_entry.hh:207
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353
gem5::Wavefront::_gpuISA
TheGpuISA::GPUISA _gpuISA
Definition: wavefront.hh:300
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD64
statistics::Scalar numVecOpsExecutedMAD64
Definition: compute_unit.hh:1109
gem5::NumScalarInitFields
@ NumScalarInitFields
Definition: kernel_code.hh:71
gem5::ComputeUnit::issuePeriod
Cycles issuePeriod
Definition: compute_unit.hh:313
gem5::statistics::none
const FlagsType none
Nothing extra to print.
Definition: info.hh:53
gem5::Wavefront::_pc
Addr _pc
Definition: wavefront.hh:329
gem5::Wavefront::rawDist
std::unordered_map< int, uint64_t > rawDist
Definition: wavefront.hh:233
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC32
statistics::Scalar numVecOpsExecutedMAC32
Definition: compute_unit.hh:1104
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD32
statistics::Scalar numVecOpsExecutedMAD32
Definition: compute_unit.hh:1108
gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1327
gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:295
gem5::ComputeUnit::ComputeUnitStats::instCyclesVMemPerSimd
statistics::Vector instCyclesVMemPerSimd
Definition: compute_unit.hh:1032
wavefront.hh
gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition: hsa_queue_entry.hh:331
gem5::Shader::SIMT
@ SIMT
Definition: shader.hh:102
gem5::Wavefront::isOldestInstSleep
bool isOldestInstSleep()
Definition: wavefront.cc:620
gem5::Wavefront::workItemId
std::vector< uint32_t > workItemId[3]
Definition: wavefront.hh:154
gem5::Gcn3ISA::VecRegContainerU32
VecRegContainer< sizeof(VecElemU32) *NumVecElemPerVecReg > VecRegContainerU32
Definition: gpu_registers.hh:179
gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:78
gem5::Wavefront::flatGmUnitId
int flatGmUnitId
Definition: wavefront.hh:104
gem5::WorkgroupIdY
@ WorkgroupIdY
Definition: kernel_code.hh:67
gem5::WorkgroupIdZ
@ WorkgroupIdZ
Definition: kernel_code.hh:68
gem5::ComputeUnit::idleWfs
int idleWfs
Definition: compute_unit.hh:344
gem5::PrivateSegSize
@ PrivateSegSize
Definition: kernel_code.hh:62
gem5::Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition: wavefront.hh:186
gem5::Wavefront::oldVgprTcnt
uint64_t oldVgprTcnt
Definition: wavefront.hh:209
gem5::Wavefront::clearWaitCnts
void clearWaitCnts()
Definition: wavefront.cc:1345
gem5::Wavefront::wgId
uint32_t wgId
Definition: wavefront.hh:160
gem5::Wavefront::wfId
uint32_t wfId
Definition: wavefront.hh:167
gem5::ComputeUnit::idleCUTimeout
Tick idleCUTimeout
Definition: compute_unit.hh:343
gem5::Wavefront::isGmInstruction
bool isGmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:598
gem5::Wavefront::setStatus
void setStatus(status_e newStatus)
Definition: wavefront.cc:550
gem5::RegisterManager::mapSgpr
int mapSgpr(Wavefront *w, int sgprIndex)
Definition: register_manager.cc:102
gem5::Wavefront::start
void start(uint64_t _wfDynId, uint64_t _base_ptr)
Definition: wavefront.cc:587
gem5::HSAQueueEntry::MAX_DIM
const static int MAX_DIM
Definition: hsa_queue_entry.hh:334
gem5::Wavefront::freeRegisterFile
void freeRegisterFile()
Freeing VRF space.
Definition: wavefront.cc:1419
gem5::Wavefront::validateRequestCounters
void validateRequestCounters()
Definition: wavefront.cc:778
gem5::ComputeUnit::numVectorALUs
int numVectorALUs
Definition: compute_unit.hh:244
vector_register_file.hh
gem5::WorkitemIdX
@ WorkitemIdX
Definition: kernel_code.hh:76
gem5::Wavefront::reservedScalarRegs
int reservedScalarRegs
Definition: wavefront.hh:196
bitfield.hh
gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition: statistics.hh:2112
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecuted
statistics::Scalar numVecOpsExecuted
Definition: compute_unit.hh:1091
gem5::Wavefront::wrGmReqsInPipe
int wrGmReqsInPipe
Definition: wavefront.hh:187
gem5::WFBarrier::InvalidID
static const int InvalidID
Definition: compute_unit.hh:97
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::Wavefront::outstandingReqs
int outstandingReqs
Definition: wavefront.hh:171
gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:227
gem5::Wavefront::isOldestInstGMem
bool isOldestInstGMem()
Definition: wavefront.cc:694
gem5::Wavefront::lgkmInstsIssued
int lgkmInstsIssued
Definition: wavefront.hh:326
gem5::ComputeUnit::vectorSharedMemUnit
WaitClass vectorSharedMemUnit
Definition: compute_unit.hh:233
gem5::ComputeUnit::ComputeUnitStats::activeLanesPerGMemInstrDist
statistics::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:1120
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:210
ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
gem5::ComputeUnit::scalarMemUnit
WaitClass scalarMemUnit
Definition: compute_unit.hh:241
gem5::Wavefront::resizeRegFiles
void resizeRegFiles(int num_vregs, int num_sregs)
Definition: wavefront.cc:539
gem5::ArmISA::d
Bitfield< 9 > d
Definition: misc_types.hh:64
gem5::Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition: wavefront.hh:184
gem5::Wavefront::oldDgprTcnt
uint64_t oldDgprTcnt
Definition: wavefront.hh:216
gem5::Wavefront::incLGKMInstsIssued
void incLGKMInstsIssued()
Definition: wavefront.cc:1371
gem5::HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:310
gem5::KernargSegPtr
@ KernargSegPtr
Definition: kernel_code.hh:59
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC64
statistics::Scalar numVecOpsExecutedMAC64
Definition: compute_unit.hh:1105
gem5::Wavefront::scalarWrGmReqsInPipe
int scalarWrGmReqsInPipe
Definition: wavefront.hh:189
gem5::Wavefront::scalarOutstandingReqsRdGm
int scalarOutstandingReqsRdGm
Definition: wavefront.hh:181
gem5::Wavefront::scalarMem
int scalarMem
Definition: wavefront.hh:128
gem5::Wavefront::memTraceBusy
int memTraceBusy
Definition: wavefront.hh:191
gem5::Wavefront::wfSlotId
const int wfSlotId
Definition: wavefront.hh:96
gem5::DispatchId
@ DispatchId
Definition: kernel_code.hh:60
gem5::Wavefront::execUnitId
int execUnitId
Definition: wavefront.hh:102
gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition: compute_unit.hh:278
gem5::ComputeUnit::ComputeUnitStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: compute_unit.hh:1086
gem5::Wavefront::S_BARRIER
@ S_BARRIER
WF is stalled at a barrier.
Definition: wavefront.hh:92
gem5::HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition: hsa_queue_entry.hh:189
gem5::Wavefront::scalarOutstandingReqsWrGm
int scalarOutstandingReqsWrGm
Definition: wavefront.hh:183
scalar_register_file.hh
gem5::ComputeUnit::vrf_lm_bus_latency
Cycles vrf_lm_bus_latency
Definition: compute_unit.hh:320
gpu_dyn_inst.hh
gem5::Wavefront::Wavefront
Wavefront(const Params &p)
Definition: wavefront.cc:48
gem5::HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition: hsa_queue_entry.hh:145
gem5::_amd_queue_t::scratch_workitem_byte_size
uint32_t scratch_workitem_byte_size
Definition: hsa_queue.hh:84
gem5::Wavefront::~Wavefront
~Wavefront()
Definition: wavefront.cc:545
gem5::Wavefront::expInstsIssued
int expInstsIssued
Definition: wavefront.hh:325
gem5::Wavefront::waitCntsSatisfied
bool waitCntsSatisfied()
Definition: wavefront.cc:1231
gem5::_amd_queue_t::scratch_resource_descriptor
uint32_t scratch_resource_descriptor[4]
Definition: hsa_queue.hh:81
gem5::Wavefront::isOldestInstWaitcnt
bool isOldestInstWaitcnt()
Definition: wavefront.cc:634
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::ComputeUnit::mapWaveToScalarMem
int mapWaveToScalarMem(Wavefront *w) const
Definition: compute_unit.cc:298
gem5::ComputeUnit::mapWaveToGlobalMem
int mapWaveToGlobalMem(Wavefront *w) const
Definition: compute_unit.cc:282
gem5::ComputeUnit::deleteFromPipeMap
void deleteFromPipeMap(Wavefront *w)
Definition: compute_unit.cc:518
gem5::SimObject
Abstract superclass for simulation objects.
Definition: sim_object.hh:146
gem5::Wavefront::reserveLmResource
void reserveLmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:822
gem5::HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition: hsa_queue_entry.hh:152
gem5::ComputeUnit::scalarALUs
std::vector< WaitClass > scalarALUs
Definition: compute_unit.hh:249
gem5::GridWorkgroupCountX
@ GridWorkgroupCountX
Definition: kernel_code.hh:63
gem5::Wavefront::lastTrace
uint64_t lastTrace
Definition: wavefront.hh:192
gem5::Wavefront::nextInstr
GPUDynInstPtr nextInstr()
Definition: wavefront.cc:1199
gem5::ComputeUnit::instExecPerSimd
std::vector< uint64_t > instExecPerSimd
Definition: compute_unit.hh:326
gem5::Wavefront::discardFetch
void discardFetch()
Definition: wavefront.cc:1218
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:226
gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition: hsa_queue_entry.hh:324
gem5::Wavefront::computeUnit
ComputeUnit * computeUnit
Definition: wavefront.hh:106
gem5::Wavefront::execMask
VectorMask & execMask()
Definition: wavefront.cc:1407
gem5::WorkitemIdZ
@ WorkitemIdZ
Definition: kernel_code.hh:78
gem5::GridWorkgroupCountZ
@ GridWorkgroupCountZ
Definition: kernel_code.hh:65
gem5::Wavefront::barrierId
int barrierId() const
Definition: wavefront.cc:1454
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::ComputeUnit::getTokenManager
TokenManager * getTokenManager()
Definition: compute_unit.hh:890
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF16
statistics::Scalar numVecOpsExecutedF16
Definition: compute_unit.hh:1093
gem5::Wavefront::decExpInstsIssued
void decExpInstsIssued()
Definition: wavefront.cc:1383
gem5::Wavefront::Params
WavefrontParams Params
Definition: wavefront.hh:244
gem5::Wavefront::WavefrontStats::numInstrExecuted
statistics::Scalar numInstrExecuted
Definition: wavefront.hh:340
gem5::FetchStage::fetchUnit
FetchUnit & fetchUnit(int simdId)
Definition: fetch_stage.hh:66
gem5::RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition: register_manager.cc:95
gem5::ComputeUnit::mapWaveToScalarAlu
int mapWaveToScalarAlu(Wavefront *w) const
Definition: compute_unit.cc:264
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:396
gem5::Wavefront::decVMemInstsIssued
void decVMemInstsIssued()
Definition: wavefront.cc:1377
gem5::ComputeUnit::pipeMap
std::unordered_set< uint64_t > pipeMap
Definition: compute_unit.hh:276
gem5::Wavefront::rdGmReqsInPipe
int rdGmReqsInPipe
Definition: wavefront.hh:185
gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition: intmath.hh:110
gem5::Wavefront::outstandingReqsWrLm
int outstandingReqsWrLm
Definition: wavefront.hh:175
gem5::Wavefront::hasBarrier
bool hasBarrier() const
Definition: wavefront.cc:1460
gem5::ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1884
gem5::ComputeUnit::ComputeUnitStats::instCyclesLdsPerSimd
statistics::Vector instCyclesLdsPerSimd
Definition: compute_unit.hh:1034
gem5::Wavefront::incExpInstsIssued
void incExpInstsIssued()
Definition: wavefront.cc:1365
gem5::QueuePtr
@ QueuePtr
Definition: kernel_code.hh:58
gem5::WaitClass::set
void set(uint64_t i)
Definition: misc.hh:82
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:214
gem5::Wavefront::decLGKMInstsIssued
void decLGKMInstsIssued()
Definition: wavefront.cc:1389
gem5::Wavefront::S_RETURNING
@ S_RETURNING
Definition: wavefront.hh:68
gem5::Wavefront::vmWaitCnt
int vmWaitCnt
the following are used for waitcnt instructions vmWaitCnt: once set, we wait for the oustanding numbe...
Definition: wavefront.hh:321
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAD16
statistics::Scalar numVecOpsExecutedMAD16
Definition: compute_unit.hh:1107
gem5::Wavefront::reserveResources
std::vector< int > reserveResources()
Definition: wavefront.cc:840
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedMAC16
statistics::Scalar numVecOpsExecutedMAC16
Definition: compute_unit.hh:1103
gem5::WorkgroupIdX
@ WorkgroupIdX
Definition: kernel_code.hh:66
gem5::Wavefront::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: wavefront.cc:102
gem5::Wavefront::globalMem
int globalMem
Definition: wavefront.hh:126
gem5::ComputeUnit::vrf_gm_bus_latency
Cycles vrf_gm_bus_latency
Definition: compute_unit.hh:316
gem5::Wavefront::outstandingReqsRdLm
int outstandingReqsRdLm
Definition: wavefront.hh:179
gem5::ComputeUnit::ComputeUnitStats::controlFlowDivergenceDist
statistics::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:1119
gem5::Wavefront::gridSz
uint32_t gridSz[3]
Definition: wavefront.hh:159
gem5::Wavefront::actualWgSzTotal
uint32_t actualWgSzTotal
Definition: wavefront.hh:164
gem5::Wavefront::lgkmWaitCnt
int lgkmWaitCnt
Definition: wavefront.hh:323
gem5::Wavefront::scalarAlu
int scalarAlu
Definition: wavefront.hh:121
gem5::GridWorkgroupCountY
@ GridWorkgroupCountY
Definition: kernel_code.hh:64
gem5::ComputeUnit::mapWaveToLocalMem
int mapWaveToLocalMem(Wavefront *w) const
Definition: compute_unit.cc:290
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA64
statistics::Scalar numVecOpsExecutedFMA64
Definition: compute_unit.hh:1101
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88
gem5::WFBarrier
WF barrier slots.
Definition: compute_unit.hh:90
gem5::DispatchPtr
@ DispatchPtr
Definition: kernel_code.hh:57
gem5::HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition: hsa_queue_entry.hh:315
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:235
gem5::TokenManager::recvTokens
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
Definition: token_port.cc:155
gem5::ComputeUnit::vrfToLocalMemPipeBus
WaitClass vrfToLocalMemPipeBus
Definition: compute_unit.hh:231
gem5::statistics::Group
Statistics container.
Definition: group.hh:92
gem5::ComputeUnit::ComputeUnitStats::execRateDist
statistics::Distribution execRateDist
Definition: compute_unit.hh:1089
gem5::Wavefront::maxVgprs
uint32_t maxVgprs
Definition: wavefront.hh:131
gem5::Wavefront::outstandingReqsWrGm
int outstandingReqsWrGm
Definition: wavefront.hh:173
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedF32
statistics::Scalar numVecOpsExecutedF32
Definition: compute_unit.hh:1095
gem5::ComputeUnit::vectorALUs
std::vector< WaitClass > vectorALUs
Definition: compute_unit.hh:245
gem5::Wavefront::isOldestInstScalarALU
bool isOldestInstScalarALU()
Definition: wavefront.cc:651
gem5::Wavefront::WavefrontStats::readsPerWrite
statistics::Distribution readsPerWrite
Definition: wavefront.hh:376
gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:109
gem5::Wavefront::isLmInstruction
bool isLmInstruction(GPUDynInstPtr ii)
Definition: wavefront.cc:609
gem5::Wavefront::stats
gem5::Wavefront::WavefrontStats stats
gem5::Wavefront::outstandingReqsRdGm
int outstandingReqsRdGm
Definition: wavefront.hh:177
gem5::Wavefront::sleepCnt
int sleepCnt
Definition: wavefront.hh:327
gem5::Shader::hsail_mode
hsail_mode_e hsail_mode
Definition: shader.hh:222
gem5::Wavefront::freeResources
void freeResources()
Definition: wavefront.cc:773
gem5::Wavefront::isOldestInstPrivMem
bool isOldestInstPrivMem()
Definition: wavefront.cc:733
gem5::Wavefront::status_e
status_e
Definition: wavefront.hh:63
gem5::Wavefront::sleepDone
bool sleepDone()
Definition: wavefront.cc:1274
gem5::Wavefront::incVMemInstsIssued
void incVMemInstsIssued()
Definition: wavefront.cc:1359
gem5::_amd_queue_t::scratch_backing_memory_location
uint64_t scratch_backing_memory_location
Definition: hsa_queue.hh:82
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:236
gem5::_amd_queue_t::compute_tmpring_size_wavesize
uint32_t compute_tmpring_size_wavesize
Definition: hsa_queue.hh:79
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::ComputeUnit::vectorGlobalMemUnit
WaitClass vectorGlobalMemUnit
Definition: compute_unit.hh:225
gem5::ComputeUnit::ComputeUnitStats::totalCycles
statistics::Scalar totalCycles
Definition: compute_unit.hh:1113
gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition: hsa_queue_entry.hh:177
gem5::Wavefront::startVgprIndex
uint32_t startVgprIndex
Definition: wavefront.hh:199
gem5::ComputeUnit::mapWaveToScalarAluGlobalIdx
int mapWaveToScalarAluGlobalIdx(Wavefront *w) const
Definition: compute_unit.cc:275
gem5::Wavefront::barId
int barId
Definition: wavefront.hh:331
gem5::Wavefront::scalarAluGlobalIdx
int scalarAluGlobalIdx
Definition: wavefront.hh:125
gem5::Wavefront::WavefrontStats::vecRawDistance
statistics::Distribution vecRawDistance
Definition: wavefront.hh:372
gem5::Wavefront::lastInstExec
uint64_t lastInstExec
Definition: wavefront.hh:229
gem5::Shader::incVectorInstSrcOperand
void incVectorInstSrcOperand(int num_operands)
Definition: shader.hh:306
gem5::Wavefront::reserveGmResource
void reserveGmResource(GPUDynInstPtr ii)
Definition: wavefront.cc:792
gem5::ComputeUnit::srf_scm_bus_latency
Cycles srf_scm_bus_latency
Definition: compute_unit.hh:318
gem5::Wavefront::isOldestInstScalarMem
bool isOldestInstScalarMem()
Definition: wavefront.cc:707
gem5::Wavefront::localMem
int localMem
Definition: wavefront.hh:127
gem5::ComputeUnit::ComputeUnitStats::numVecOpsExecutedFMA16
statistics::Scalar numVecOpsExecutedFMA16
Definition: compute_unit.hh:1099
gem5::Wavefront::workGroupId
uint32_t workGroupId[3]
Definition: wavefront.hh:157
gem5::Wavefront::dropFetch
bool dropFetch
Definition: wavefront.hh:112
gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition: gpu_registers.hh:153
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:188
gem5::Wavefront::status
status_e status
Definition: wavefront.hh:328
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:99
gem5::statistics::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:621

Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17