gem5 v24.0.0.0
Loading...
Searching...
No Matches
sopp.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33#include "debug/GPUSync.hh"
34#include "gpu-compute/shader.hh"
35
36namespace gem5
37{
38
39namespace VegaISA
40{
41 // --- Inst_SOPP__S_NOP class methods ---
42
44 : Inst_SOPP(iFmt, "s_nop")
45 {
46 setFlag(Nop);
47 } // Inst_SOPP__S_NOP
48
50 {
51 } // ~Inst_SOPP__S_NOP
52
53 // --- description from .arch file ---
54 // Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time,
55 // 7 = 8 times.
56 // This instruction may be used to introduce wait states to resolve
57 // hazards; see the shader programming guide for details. Compare with
58 // S_SLEEP.
59 void
61 {
62 } // execute
63 // --- Inst_SOPP__S_ENDPGM class methods ---
64
66 : Inst_SOPP(iFmt, "s_endpgm")
67 {
68 setFlag(EndOfKernel);
69 } // Inst_SOPP__S_ENDPGM
70
72 {
73 } // ~Inst_SOPP__S_ENDPGM
74
75 // --- description from .arch file ---
76 // End of program; terminate wavefront.
77 // The hardware implicitly executes S_WAITCNT 0 before executing this
78 // --- instruction.
79 // See S_ENDPGM_SAVED for the context-switch version of this instruction.
80 void
82 {
83 Wavefront *wf = gpuDynInst->wavefront();
84 ComputeUnit *cu = gpuDynInst->computeUnit();
85
86 // delete extra instructions fetched for completed work-items
87 wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1,
88 wf->instructionBuffer.end());
89
90 if (wf->pendingFetch) {
91 wf->dropFetch = true;
92 }
93
95 .flushBuf(wf->wfSlotId);
97
98 int refCount = wf->computeUnit->getLds()
100
106 int bar_id = WFBarrier::InvalidID;
107 if (wf->hasBarrier()) {
108 assert(wf->getStatus() != Wavefront::S_BARRIER);
109 bar_id = wf->barrierId();
110 assert(bar_id != WFBarrier::InvalidID);
111 wf->releaseBarrier();
112 cu->decMaxBarrierCnt(bar_id);
113 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
114 "program and decrementing max barrier count for "
115 "barrier Id%d. New max count: %d.\n", cu->cu_id,
116 wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id,
117 cu->maxBarrierCnt(bar_id));
118 }
119
120 DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
121 wf->computeUnit->cu_id, wf->wgId, refCount);
122
126
127 panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less "
128 "than zero\n", wf->computeUnit->cu_id);
129
130 DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
131 wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId);
132
133 for (int i = 0; i < wf->vecReads.size(); i++) {
134 if (wf->rawDist.find(i) != wf->rawDist.end()) {
135 wf->stats.readsPerWrite.sample(wf->vecReads.at(i));
136 }
137 }
138 wf->vecReads.clear();
139 wf->rawDist.clear();
140 wf->lastInstExec = 0;
141
142 if (!refCount) {
149 if (bar_id != WFBarrier::InvalidID) {
150 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
151 "now complete. Releasing barrier Id%d.\n", cu->cu_id,
152 wf->simdId, wf->wfSlotId, wf->wfDynId,
153 wf->barrierId());
154 cu->releaseBarrier(bar_id);
155 }
156
165 //check whether the workgroup is indicating the kernel end, i.e.,
166 //the last workgroup in the kernel
167 bool kernelEnd =
169
170 bool relNeeded =
172
173 //if it is not a kernel end, then retire the workgroup directly
174 if (!kernelEnd || !relNeeded) {
178
179 return;
180 }
181
189 setFlag(MemSync);
190 setFlag(GlobalSegment);
191 // Notify Memory System of Kernel Completion
192 // Kernel End = isKernel + isMemSync
194 gpuDynInst->simdId = wf->simdId;
195 gpuDynInst->wfSlotId = wf->wfSlotId;
196 gpuDynInst->wfDynId = wf->wfDynId;
197
198 DPRINTF(GPUExec, "inject global memory fence for CU%d: "
199 "WF[%d][%d][%d]\n", wf->computeUnit->cu_id,
200 wf->simdId, wf->wfSlotId, wf->wfDynId);
201
202 // call shader to prepare the flush operations
203 wf->computeUnit->shader->prepareFlush(gpuDynInst);
204
206 } else {
208 }
209 } // execute
210
211 // --- Inst_SOPP__S_BRANCH class methods ---
212
214 : Inst_SOPP(iFmt, "s_branch")
215 {
217 } // Inst_SOPP__S_BRANCH
218
220 {
221 } // ~Inst_SOPP__S_BRANCH
222
223 // --- description from .arch file ---
224 // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
225 // For a long jump, use S_SETPC.
226 void
228 {
229 Wavefront *wf = gpuDynInst->wavefront();
230 Addr pc = gpuDynInst->pc();
232
233 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
234
235 wf->pc(pc);
236 } // execute
237 // --- Inst_SOPP__S_WAKEUP class methods ---
238
240 : Inst_SOPP(iFmt, "s_wakeup")
241 {
242 } // Inst_SOPP__S_WAKEUP
243
245 {
246 } // ~Inst_SOPP__S_WAKEUP
247
248 // --- description from .arch file ---
249 // Allow a wave to 'ping' all the other waves in its threadgroup to force
250 // them to wake up immediately from an S_SLEEP instruction. The ping is
251 // ignored if the waves are not sleeping.
252 // This allows for more efficient polling on a memory location. The waves
253 // which are polling can sit in a long S_SLEEP between memory reads, but
254 // the wave which writes the value can tell them all to wake up early now
255 // that the data is available. This is useful for fBarrier implementations
256 // (speedup).
257 // This method is also safe from races because if any wave misses the ping,
258 // everything still works fine (whoever missed it just completes their
259 // normal S_SLEEP).
260 void
262 {
264 } // execute
265 // --- Inst_SOPP__S_CBRANCH_SCC0 class methods ---
266
268 : Inst_SOPP(iFmt, "s_cbranch_scc0")
269 {
271 } // Inst_SOPP__S_CBRANCH_SCC0
272
274 {
275 } // ~Inst_SOPP__S_CBRANCH_SCC0
276
277 // --- description from .arch file ---
278 // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
279 // else NOP.
280 void
282 {
283 Wavefront *wf = gpuDynInst->wavefront();
284 Addr pc = gpuDynInst->pc();
286 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
287
288 scc.read();
289
290 if (!scc.rawData()) {
291 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
292 }
293
294 wf->pc(pc);
295 } // execute
296 // --- Inst_SOPP__S_CBRANCH_SCC1 class methods ---
297
299 : Inst_SOPP(iFmt, "s_cbranch_scc1")
300 {
302 } // Inst_SOPP__S_CBRANCH_SCC1
303
305 {
306 } // ~Inst_SOPP__S_CBRANCH_SCC1
307
308 // --- description from .arch file ---
309 // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
310 // else NOP.
311 void
313 {
314 Wavefront *wf = gpuDynInst->wavefront();
315 Addr pc = gpuDynInst->pc();
317 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
318
319 scc.read();
320
321 if (scc.rawData()) {
322 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
323 }
324
325 wf->pc(pc);
326 } // execute
327 // --- Inst_SOPP__S_CBRANCH_VCCZ class methods ---
328
330 : Inst_SOPP(iFmt, "s_cbranch_vccz")
331 {
333 setFlag(ReadsVCC);
334 } // Inst_SOPP__S_CBRANCH_VCCZ
335
337 {
338 } // ~Inst_SOPP__S_CBRANCH_VCCZ
339
340 // --- description from .arch file ---
341 // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
342 // else NOP.
343 void
345 {
346 Wavefront *wf = gpuDynInst->wavefront();
347 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
348 Addr pc = gpuDynInst->pc();
350
351 vcc.read();
352
353 if (!vcc.rawData()) {
354 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
355 }
356
357 wf->pc(pc);
358 } // execute
359 // --- Inst_SOPP__S_CBRANCH_VCCNZ class methods ---
360
362 : Inst_SOPP(iFmt, "s_cbranch_vccnz")
363 {
365 setFlag(ReadsVCC);
366 } // Inst_SOPP__S_CBRANCH_VCCNZ
367
369 {
370 } // ~Inst_SOPP__S_CBRANCH_VCCNZ
371
372 // --- description from .arch file ---
373 // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
374 // else NOP.
375 void
377 {
378 Wavefront *wf = gpuDynInst->wavefront();
379 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
380
381 vcc.read();
382
383 if (vcc.rawData()) {
384 Addr pc = gpuDynInst->pc();
386 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
387 wf->pc(pc);
388 }
389 } // execute
390 // --- Inst_SOPP__S_CBRANCH_EXECZ class methods ---
391
393 : Inst_SOPP(iFmt, "s_cbranch_execz")
394 {
396 setFlag(ReadsEXEC);
397 } // Inst_SOPP__S_CBRANCH_EXECZ
398
400 {
401 } // ~Inst_SOPP__S_CBRANCH_EXECZ
402
403 // --- description from .arch file ---
404 // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
405 // else NOP.
406 void
408 {
409 Wavefront *wf = gpuDynInst->wavefront();
410
411 if (wf->execMask().none()) {
412 Addr pc = gpuDynInst->pc();
414 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
415 wf->pc(pc);
416 }
417 } // execute
418 // --- Inst_SOPP__S_CBRANCH_EXECNZ class methods ---
419
421 : Inst_SOPP(iFmt, "s_cbranch_execnz")
422 {
424 setFlag(ReadsEXEC);
425 } // Inst_SOPP__S_CBRANCH_EXECNZ
426
428 {
429 } // ~Inst_SOPP__S_CBRANCH_EXECNZ
430
431 // --- description from .arch file ---
432 // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
433 // else NOP.
434 void
436 {
437 Wavefront *wf = gpuDynInst->wavefront();
438
439 if (wf->execMask().any()) {
440 Addr pc = gpuDynInst->pc();
442 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
443 wf->pc(pc);
444 }
445 } // execute
446 // --- Inst_SOPP__S_BARRIER class methods ---
447
449 : Inst_SOPP(iFmt, "s_barrier")
450 {
451 setFlag(MemBarrier);
452 } // Inst_SOPP__S_BARRIER
453
455 {
456 } // ~Inst_SOPP__S_BARRIER
457
458 // --- description from .arch file ---
459 // Synchronize waves within a threadgroup.
460 // If not all waves of the threadgroup have been created yet, waits for
461 // entire group before proceeding.
462 // If some waves in the threadgroup have already terminated, this waits on
463 // only the surviving waves.
464 // Barriers are legal inside trap handlers.
465 void
467 {
468 Wavefront *wf = gpuDynInst->wavefront();
469 ComputeUnit *cu = gpuDynInst->computeUnit();
470
471 if (wf->hasBarrier()) {
472 int bar_id = wf->barrierId();
473 assert(wf->getStatus() == Wavefront::S_BARRIER);
474 cu->incNumAtBarrier(bar_id);
475 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
476 "barrier Id%d. %d waves now at barrier, %d waves "
477 "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId,
478 wf->wfDynId, bar_id, cu->numAtBarrier(bar_id),
479 cu->numYetToReachBarrier(bar_id));
480 }
481 } // execute
482 // --- Inst_SOPP__S_SETKILL class methods ---
483
485 : Inst_SOPP(iFmt, "s_setkill")
486 {
487 } // Inst_SOPP__S_SETKILL
488
490 {
491 } // ~Inst_SOPP__S_SETKILL
492
493 // --- description from .arch file ---
494 // set KILL bit to value of SIMM16[0].
495 // Used primarily for debugging kill wave host command behavior.
496 void
498 {
500 } // execute
501 // --- Inst_SOPP__S_WAITCNT class methods ---
502
504 : Inst_SOPP(iFmt, "s_waitcnt")
505 {
506 setFlag(ALU);
507 setFlag(Waitcnt);
508 } // Inst_SOPP__S_WAITCNT
509
511 {
512 } // ~Inst_SOPP__S_WAITCNT
513
514 // --- description from .arch file ---
515 // Wait for the counts of outstanding lds, vector-memory and
516 // --- export/vmem-write-data to be at or below the specified levels.
517 // SIMM16[3:0] = vmcount (vector memory operations),
518 // SIMM16[6:4] = export/mem-write-data count,
519 // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
520 void
522 {
523 ScalarRegI32 vm_cnt = 0;
524 ScalarRegI32 exp_cnt = 0;
525 ScalarRegI32 lgkm_cnt = 0;
526 vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0);
527 exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4);
528 lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8);
529 gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT);
530 gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
531 } // execute
532 // --- Inst_SOPP__S_SETHALT class methods ---
533
535 : Inst_SOPP(iFmt, "s_sethalt")
536 {
537 } // Inst_SOPP__S_SETHALT
538
540 {
541 } // ~Inst_SOPP__S_SETHALT
542
543 // --- description from .arch file ---
544 // Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume.
545 // The halt flag is ignored while PRIV == 1 (inside trap handlers) but the
546 // shader will halt immediately after the handler returns if HALT is still
547 // set at that time.
548 void
550 {
552 } // execute
553 // --- Inst_SOPP__S_SLEEP class methods ---
554
556 : Inst_SOPP(iFmt, "s_sleep")
557 {
558 setFlag(ALU);
559 setFlag(Sleep);
560 } // Inst_SOPP__S_SLEEP
561
563 {
564 } // ~Inst_SOPP__S_SLEEP
565
566 // --- description from .arch file ---
567 // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
568 // The exact amount of delay is approximate. Compare with S_NOP.
569 void
571 {
573 gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP);
574 // sleep duration is specified in multiples of 64 cycles
575 gpuDynInst->wavefront()->setSleepTime(64 * simm16);
576 } // execute
577 // --- Inst_SOPP__S_SETPRIO class methods ---
578
580 : Inst_SOPP(iFmt, "s_setprio")
581 {
582 setFlag(ALU);
583 } // Inst_SOPP__S_SETPRIO
584
586 {
587 } // ~Inst_SOPP__S_SETPRIO
588
589 // --- description from .arch file ---
590 // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
591 // 3 = highest.
592 // The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0],
593 // WaveAge[3:0]}.
594 void
596 {
598 ScalarRegU32 userPrio = simm16 & 0x3;
599
600 warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio);
601 } // execute
602 // --- Inst_SOPP__S_SENDMSG class methods ---
603
605 : Inst_SOPP(iFmt, "s_sendmsg")
606 {
607 } // Inst_SOPP__S_SENDMSG
608
610 {
611 } // ~Inst_SOPP__S_SENDMSG
612
613 // --- description from .arch file ---
614 // Send a message upstream to VGT or the interrupt handler.
615 // SIMM16[9:0] contains the message type and is documented in the shader
616 // --- programming guide.
617 void
619 {
621 } // execute
622 // --- Inst_SOPP__S_SENDMSGHALT class methods ---
623
625 : Inst_SOPP(iFmt, "s_sendmsghalt")
626 {
627 } // Inst_SOPP__S_SENDMSGHALT
628
630 {
631 } // ~Inst_SOPP__S_SENDMSGHALT
632
633 // --- description from .arch file ---
634 // Send a message and then HALT the wavefront; see S_SENDMSG for details.
635 void
637 {
639 } // execute
640 // --- Inst_SOPP__S_TRAP class methods ---
641
643 : Inst_SOPP(iFmt, "s_trap")
644 {
645 } // Inst_SOPP__S_TRAP
646
648 {
649 } // ~Inst_SOPP__S_TRAP
650
651 // --- description from .arch file ---
652 // TrapID = SIMM16[7:0];
653 // Wait for all instructions to complete;
654 // set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0],
655 // PC[47:0]};
656 // PC = TBA (trap base address);
657 // PRIV = 1.
658 // Enter the trap handler. This instruction may be generated internally as
659 // well in response to a host trap (HT = 1) or an exception.
660 // TrapID 0 is reserved for hardware use and should not be used in a
661 // shader-generated trap.
662 void
664 {
666 } // execute
667 // --- Inst_SOPP__S_ICACHE_INV class methods ---
668
670 : Inst_SOPP(iFmt, "s_icache_inv")
671 {
672 setFlag(MemBarrier);
673 setFlag(GPUStaticInst::MemSync);
674 setFlag(MemSync);
675 } // Inst_SOPP__S_ICACHE_INV
676
678 {
679 } // ~Inst_SOPP__S_ICACHE_INV
680
681 // --- description from .arch file ---
682 // Invalidate entire L1 instruction cache.
683 // You must have 12 separate S_NOP instructions or a jump/branch
684 // instruction after this instruction
685 // to ensure the SQ instruction buffer is purged.
686 void
688 {
689 Wavefront *wf = gpuDynInst->wavefront();
690
691 if (gpuDynInst->exec_mask.none()) {
692 wf->decLGKMInstsIssued();
693 return;
694 }
695
696 gpuDynInst->execUnitId = wf->execUnitId;
697 gpuDynInst->latency.init(gpuDynInst->computeUnit());
698 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
699
700 gpuDynInst->resetEntireStatusVector();
701 gpuDynInst->setStatusVector(0, 1);
702 RequestPtr req = std::make_shared<Request>(0, 0, 0,
703 gpuDynInst->computeUnit()->
704 requestorId(), 0,
705 gpuDynInst->wfDynId);
706 gpuDynInst->setRequestFlags(req);
707 gpuDynInst->computeUnit()->scalarMemoryPipe.
708 injectScalarMemFence(gpuDynInst, false, req);
709 } // execute
710 // --- Inst_SOPP__S_INCPERFLEVEL class methods ---
711
713 : Inst_SOPP(iFmt, "s_incperflevel")
714 {
715 } // Inst_SOPP__S_INCPERFLEVEL
716
718 {
719 } // ~Inst_SOPP__S_INCPERFLEVEL
720
721 // --- description from .arch file ---
722 // Increment performance counter specified in SIMM16[3:0] by 1.
723 void
725 {
727 } // execute
728 // --- Inst_SOPP__S_DECPERFLEVEL class methods ---
729
731 : Inst_SOPP(iFmt, "s_decperflevel")
732 {
733 } // Inst_SOPP__S_DECPERFLEVEL
734
736 {
737 } // ~Inst_SOPP__S_DECPERFLEVEL
738
739 // --- description from .arch file ---
740 // Decrement performance counter specified in SIMM16[3:0] by 1.
741 void
743 {
745 } // execute
746 // --- Inst_SOPP__S_TTRACEDATA class methods ---
747
749 : Inst_SOPP(iFmt, "s_ttracedata")
750 {
751 } // Inst_SOPP__S_TTRACEDATA
752
754 {
755 } // ~Inst_SOPP__S_TTRACEDATA
756
757 // --- description from .arch file ---
758 // Send M0 as user data to the thread trace stream.
759 void
761 {
763 } // execute
764 // --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods ---
765
767 InFmt_SOPP *iFmt)
768 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys")
769 {
771 } // Inst_SOPP__S_CBRANCH_CDBGSYS
772
774 {
775 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
776
777 // --- description from .arch file ---
778 // if (conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4)
779 // + 4;
780 // else NOP.
781 void
786 // --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods ---
787
789 InFmt_SOPP *iFmt)
790 : Inst_SOPP(iFmt, "s_cbranch_cdbguser")
791 {
793 } // Inst_SOPP__S_CBRANCH_CDBGUSER
794
796 {
797 } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
798
799 // --- description from .arch file ---
800 // if (conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4;
801 // else NOP.
802 void
807 // --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods ---
808
810 InFmt_SOPP *iFmt)
811 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user")
812 {
814 } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
815
818 {
819 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
820
821 // --- description from .arch file ---
822 // if (conditional_debug_system || conditional_debug_user) then PC = PC +
823 // --- signext(SIMM16 * 4) + 4;
824 // else NOP.
825 void
830 // --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods ---
831
834 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user")
835 {
837 } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
838
841 {
842 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
843
844 // --- description from .arch file ---
845 // if (conditional_debug_system && conditional_debug_user) then PC = PC +
846 // --- signext(SIMM16 * 4) + 4;
847 // else NOP.
848 void
853 // --- Inst_SOPP__S_ENDPGM_SAVED class methods ---
854
856 : Inst_SOPP(iFmt, "s_endpgm_saved")
857 {
858 } // Inst_SOPP__S_ENDPGM_SAVED
859
861 {
862 } // ~Inst_SOPP__S_ENDPGM_SAVED
863
864 // --- description from .arch file ---
865 // End of program; signal that a wave has been saved by the context-switch
866 // trap handler and terminate wavefront.
867 // The hardware implicitly executes S_WAITCNT 0 before executing this
868 // instruction.
869 // Use S_ENDPGM in all cases unless you are executing the context-switch
870 // save handler.
871 void
873 {
875 } // execute
876 // --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods ---
877
879 InFmt_SOPP *iFmt)
880 : Inst_SOPP(iFmt, "s_set_gpr_idx_off")
881 {
882 } // Inst_SOPP__S_SET_GPR_IDX_OFF
883
885 {
886 } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
887
888 // --- description from .arch file ---
889 // MODE.gpr_idx_en = 0.
890 // Clear GPR indexing mode. Vector operations after this will not perform
891 // --- relative GPR addressing regardless of the contents of M0. This
892 // --- instruction does not modify M0.
893 void
898 // --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods ---
899
901 InFmt_SOPP *iFmt)
902 : Inst_SOPP(iFmt, "s_set_gpr_idx_mode")
903 {
904 } // Inst_SOPP__S_SET_GPR_IDX_MODE
905
907 {
908 } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
909
910 // --- description from .arch file ---
911 // M0[15:12] = SIMM4.
912 // Modify the mode used for vector GPR indexing.
913 // The raw contents of the source field are read and used to set the enable
914 // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
915 // and SIMM4[3] = VDST_REL.
916 void
921} // namespace VegaISA
922} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
void releaseBarrier(int bar_id)
int numYetToReachBarrier(int bar_id)
LdsState & getLds() const
int maxBarrierCnt(int bar_id)
int numAtBarrier(int bar_id)
void incNumAtBarrier(int bar_id)
RegisterManager * registerManager
void decMaxBarrierCnt(int bar_id)
FetchStage fetchStage
gem5::ComputeUnit::ComputeUnitStats stats
FetchUnit & fetchUnit(int simdId)
void flushBuf(int wfSlotId)
bool isReachingKernelEnd(Wavefront *wf)
void notifyWgCompl(Wavefront *wf)
When an end program instruction detects that the last WF in a WG has completed it will call this meth...
void setFlag(Flags flag)
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition lds_state.hh:376
void freeRegisters(Wavefront *w)
GPUDispatcher & dispatcher()
Definition shader.cc:111
int impl_kern_end_rel
Definition shader.hh:239
void prepareFlush(GPUDynInstPtr gpuDynInst)
dispatcher/shader arranges flush requests to the CUs
Definition shader.cc:241
Base class for branch operations.
Definition branch.hh:49
Nop class.
Definition nop.hh:49
void execute(GPUDynInstPtr) override
Definition sopp.cc:466
void execute(GPUDynInstPtr) override
Definition sopp.cc:227
Inst_SOPP__S_BRANCH(InFmt_SOPP *)
Definition sopp.cc:213
void execute(GPUDynInstPtr) override
Definition sopp.cc:782
void execute(GPUDynInstPtr) override
Definition sopp.cc:803
void execute(GPUDynInstPtr) override
Definition sopp.cc:435
void execute(GPUDynInstPtr) override
Definition sopp.cc:407
void execute(GPUDynInstPtr) override
Definition sopp.cc:281
void execute(GPUDynInstPtr) override
Definition sopp.cc:312
void execute(GPUDynInstPtr) override
Definition sopp.cc:376
void execute(GPUDynInstPtr) override
Definition sopp.cc:344
void execute(GPUDynInstPtr) override
Definition sopp.cc:742
void execute(GPUDynInstPtr) override
Definition sopp.cc:872
Inst_SOPP__S_ENDPGM(InFmt_SOPP *)
Definition sopp.cc:65
void execute(GPUDynInstPtr) override
Definition sopp.cc:81
void execute(GPUDynInstPtr) override
Definition sopp.cc:687
void execute(GPUDynInstPtr) override
Definition sopp.cc:724
Inst_SOPP__S_NOP(InFmt_SOPP *)
Definition sopp.cc:43
void execute(GPUDynInstPtr) override
Definition sopp.cc:60
void execute(GPUDynInstPtr) override
Definition sopp.cc:636
void execute(GPUDynInstPtr) override
Definition sopp.cc:618
void execute(GPUDynInstPtr) override
Definition sopp.cc:549
void execute(GPUDynInstPtr) override
Definition sopp.cc:497
void execute(GPUDynInstPtr) override
Definition sopp.cc:595
void execute(GPUDynInstPtr) override
Definition sopp.cc:917
void execute(GPUDynInstPtr) override
Definition sopp.cc:894
void execute(GPUDynInstPtr) override
Definition sopp.cc:570
Inst_SOPP__S_SLEEP(InFmt_SOPP *)
Definition sopp.cc:555
Inst_SOPP__S_TRAP(InFmt_SOPP *)
Definition sopp.cc:642
void execute(GPUDynInstPtr) override
Definition sopp.cc:663
void execute(GPUDynInstPtr) override
Definition sopp.cc:760
void execute(GPUDynInstPtr) override
Definition sopp.cc:521
void execute(GPUDynInstPtr) override
Definition sopp.cc:261
Inst_SOPP__S_WAKEUP(InFmt_SOPP *)
Definition sopp.cc:239
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392
static const int InvalidID
Addr pc() const
bool hasBarrier() const
void setStatus(status_e newStatus)
Definition wavefront.cc:573
const int simdId
Definition wavefront.hh:101
ComputeUnit * computeUnit
Definition wavefront.hh:108
std::vector< int > vecReads
Definition wavefront.hh:244
std::deque< GPUDynInstPtr > instructionBuffer
Definition wavefront.hh:111
void releaseBarrier()
uint32_t dispatchId
Definition wavefront.hh:173
status_e getStatus()
Definition wavefront.hh:141
const int wfSlotId
Definition wavefront.hh:98
std::unordered_map< int, uint64_t > rawDist
Definition wavefront.hh:240
void decLGKMInstsIssued()
void barrierId(int bar_id)
uint64_t lastInstExec
Definition wavefront.hh:236
@ S_BARRIER
WF is stalled at a barrier.
Definition wavefront.hh:92
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition wavefront.hh:88
gem5::Wavefront::WavefrontStats stats
VectorMask & execMask()
uint64_t wfDynId
Definition wavefront.hh:233
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
#define warn_once(...)
Definition logging.hh:260
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 4 > pc
uint16_t ScalarRegU16
uint32_t ScalarRegU32
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
statistics::Distribution readsPerWrite
Definition wavefront.hh:383

Generated on Tue Jun 18 2024 16:23:48 for gem5 by doxygen 1.11.0