gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
mubuf.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods ---
40
41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
42 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
43 : Inst_MUBUF(iFmt, "buffer_load_format_x")
44 {
45 setFlag(MemoryRef);
47 setFlag(GlobalSegment);
48 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
49
51 {
52 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
53
54 // --- description from .arch file ---
55 // Untyped buffer load 1 dword with format conversion.
56 void
61
62 void
66
67 void
71 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods ---
72
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
74 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
75 : Inst_MUBUF(iFmt, "buffer_load_format_xy")
76 {
77 setFlag(MemoryRef);
79 setFlag(GlobalSegment);
80 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
81
83 {
84 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
85
86 // --- description from .arch file ---
87 // Untyped buffer load 2 dwords with format conversion.
88 void
93
94 void
98
99 void
103 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods ---
104
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
106 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
107 : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
108 {
109 setFlag(MemoryRef);
110 setFlag(Load);
111 setFlag(GlobalSegment);
112 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
113
115 {
116 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
117
118 // --- description from .arch file ---
119 // Untyped buffer load 3 dwords with format conversion.
120 void
125
126 void
130
131 void
135 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods ---
136
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
138 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
139 : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
140 {
141 setFlag(MemoryRef);
142 setFlag(Load);
143 setFlag(GlobalSegment);
144 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
145
147 {
148 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
149
150 // --- description from .arch file ---
151 // Untyped buffer load 4 dwords with format conversion.
152 void
157
158 void
162
163 void
167 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods ---
168
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X
170 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
171 : Inst_MUBUF(iFmt, "buffer_store_format_x")
172 {
173 setFlag(MemoryRef);
174 setFlag(Store);
175 setFlag(GlobalSegment);
176 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
177
179 {
180 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
181
182 // --- description from .arch file ---
183 // Untyped buffer store 1 dword with format conversion.
184 void
189
190 void
192 {
193 } // initiateAcc
194
195 void
199 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods ---
200
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
202 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
203 : Inst_MUBUF(iFmt, "buffer_store_format_xy")
204 {
205 setFlag(MemoryRef);
206 setFlag(Store);
207 setFlag(GlobalSegment);
208 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
209
211 {
212 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
213
214 // --- description from .arch file ---
215 // Untyped buffer store 2 dwords with format conversion.
216 void
221
222 void
226
227 void
231 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods ---
232
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
234 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
235 : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
236 {
237 setFlag(MemoryRef);
238 setFlag(Store);
239 setFlag(GlobalSegment);
240 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
241
243 {
244 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
245
246 // --- description from .arch file ---
247 // Untyped buffer store 3 dwords with format conversion.
248 void
253
254 void
258
259 void
263 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods ---
264
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
266 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
267 : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
268 {
269 setFlag(MemoryRef);
270 setFlag(Store);
271 setFlag(GlobalSegment);
272 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
273
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
275 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
276 {
277 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
278
279 // --- description from .arch file ---
280 // Untyped buffer store 4 dwords with format conversion.
281 void
286
287 void
291
292 void
296 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods ---
297
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
299 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
300 : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
301 {
302 setFlag(MemoryRef);
303 setFlag(Load);
304 setFlag(GlobalSegment);
305 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
306
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
308 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
309 {
310 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
311
312 // --- description from .arch file ---
313 // Untyped buffer load 1 dword with format conversion.
314 void
319
320 void
324
325 void
329 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods ---
330
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
332 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
333 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
334 {
335 setFlag(MemoryRef);
336 setFlag(Load);
337 setFlag(GlobalSegment);
338 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
339
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
341 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
342 {
343 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
344
345 // --- description from .arch file ---
346 // Untyped buffer load 2 dwords with format conversion.
347 void
352
353 void
355 GPUDynInstPtr gpuDynInst)
356 {
357 } // initiateAcc
358
359 void
364 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods ---
365
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
367 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
368 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
369 {
370 setFlag(MemoryRef);
371 setFlag(Load);
372 setFlag(GlobalSegment);
373 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
374
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
376 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
377 {
378 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
379
380 // --- description from .arch file ---
381 // Untyped buffer load 3 dwords with format conversion.
382 void
387
388 void
390 GPUDynInstPtr gpuDynInst)
391 {
392 } // initiateAcc
393
394 void
399 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods ---
400
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
402 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
403 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
404 {
405 setFlag(MemoryRef);
406 setFlag(Load);
407 setFlag(GlobalSegment);
408 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
409
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
411 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
412 {
413 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
414
415 // --- description from .arch file ---
416 // Untyped buffer load 4 dwords with format conversion.
417 void
422
423 void
425 GPUDynInstPtr gpuDynInst)
426 {
427 } // initiateAcc
428
429 void
434 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods ---
435
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
437 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
438 : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
439 {
440 setFlag(Store);
441 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
442
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
444 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
445 {
446 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
447
448 // --- description from .arch file ---
449 // Untyped buffer store 1 dword with format conversion.
450 void
455
456 void
458 GPUDynInstPtr gpuDynInst)
459 {
460 } // initiateAcc
461
462 void
467 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods ---
468
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
470 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
471 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
472 {
473 setFlag(MemoryRef);
474 setFlag(Store);
475 setFlag(GlobalSegment);
476 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
477
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
479 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
480 {
481 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
482
483 // --- description from .arch file ---
484 // Untyped buffer store 2 dwords with format conversion.
485 void
490
491 void
493 GPUDynInstPtr gpuDynInst)
494 {
495 } // initiateAcc
496
497 void
502 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods ---
503
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
505 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
506 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
507 {
508 setFlag(MemoryRef);
509 setFlag(Store);
510 setFlag(GlobalSegment);
511 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
512
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
514 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
515 {
516 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
517
518 // --- description from .arch file ---
519 // Untyped buffer store 3 dwords with format conversion.
520 void
525
526 void
528 GPUDynInstPtr gpuDynInst)
529 {
530 } // initiateAcc
531
532 void
537 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods ---
538
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
540 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
541 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
542 {
543 setFlag(MemoryRef);
544 setFlag(Store);
545 setFlag(GlobalSegment);
546 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
547
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
549 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
550 {
551 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
552
553 // --- description from .arch file ---
554 // Untyped buffer store 4 dwords with format conversion.
555 void
560
561 void
563 GPUDynInstPtr gpuDynInst)
564 {
565 } // initiateAcc
566
567 void
572 // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods ---
573
574 Inst_MUBUF__BUFFER_LOAD_UBYTE
575 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
576 : Inst_MUBUF(iFmt, "buffer_load_ubyte")
577 {
578 setFlag(MemoryRef);
579 setFlag(Load);
580 if (instData.LDS) {
581 setFlag(GroupSegment);
582 } else {
583 setFlag(GlobalSegment);
584 }
585 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
586
588 {
589 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
590
591 // --- description from .arch file ---
592 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
593 void
595 {
596 Wavefront *wf = gpuDynInst->wavefront();
597
598 if (gpuDynInst->exec_mask.none()) {
599 wf->decVMemInstsIssued();
600 wf->untrackVMemInst(gpuDynInst);
601 return;
602 }
603
604 gpuDynInst->execUnitId = wf->execUnitId;
605 gpuDynInst->latency.init(gpuDynInst->computeUnit());
606 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
607
608 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
609 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
610 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
611 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
612
613 rsrcDesc.read();
614 offset.read();
615
616 int inst_offset = instData.OFFSET;
617
618 if (!instData.IDXEN && !instData.OFFEN) {
621 addr0, addr1, rsrcDesc, offset, inst_offset);
622 } else if (!instData.IDXEN && instData.OFFEN) {
623 addr0.read();
626 addr0, addr1, rsrcDesc, offset, inst_offset);
627 } else if (instData.IDXEN && !instData.OFFEN) {
628 addr0.read();
631 addr1, addr0, rsrcDesc, offset, inst_offset);
632 } else {
633 addr0.read();
634 addr1.read();
637 addr1, addr0, rsrcDesc, offset, inst_offset);
638 }
639
640 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
641 } // execute
642
643 void
645 {
646 initMemRead<VecElemU8>(gpuDynInst);
647 } // initiateAcc
648
649 void
651 {
652 if (instData.LDS) {
653 ldsComplete<1>(gpuDynInst);
654
655 return;
656 }
657
658 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
659
660 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
661 if (gpuDynInst->exec_mask[lane]) {
662 if (!oobMask[lane]) {
663 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
664 gpuDynInst->d_data))[lane]);
665 } else {
666 vdst[lane] = 0;
667 }
668 }
669 }
670
671 vdst.write();
672 } // execute
673
674 // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods ---
675
676 Inst_MUBUF__BUFFER_LOAD_SBYTE
677 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
678 : Inst_MUBUF(iFmt, "buffer_load_sbyte")
679 {
680 setFlag(MemoryRef);
681 setFlag(Load);
682 setFlag(GlobalSegment);
683 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
684
686 {
687 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
688
689 // --- description from .arch file ---
690 // Untyped buffer load signed byte (sign extend to VGPR destination).
691 void
696
697 void
699 {
700 } // initiateAcc
701
702 void
706 // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods ---
707
708 Inst_MUBUF__BUFFER_LOAD_USHORT
709 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
710 : Inst_MUBUF(iFmt, "buffer_load_ushort")
711 {
712 setFlag(MemoryRef);
713 setFlag(Load);
714 if (instData.LDS) {
715 setFlag(GroupSegment);
716 } else {
717 setFlag(GlobalSegment);
718 }
719 } // Inst_MUBUF__BUFFER_LOAD_USHORT
720
722 {
723 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
724
725 // --- description from .arch file ---
726 // Untyped buffer load unsigned short (zero extend to VGPR destination).
727 void
729 {
730 Wavefront *wf = gpuDynInst->wavefront();
731
732 if (gpuDynInst->exec_mask.none()) {
733 wf->decVMemInstsIssued();
734 wf->untrackVMemInst(gpuDynInst);
735 return;
736 }
737
738 gpuDynInst->execUnitId = wf->execUnitId;
739 gpuDynInst->latency.init(gpuDynInst->computeUnit());
740 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
741
742 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
743 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
744 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
745 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
746
747 rsrcDesc.read();
748 offset.read();
749
750 int inst_offset = instData.OFFSET;
751
752 if (!instData.IDXEN && !instData.OFFEN) {
755 addr0, addr1, rsrcDesc, offset, inst_offset);
756 } else if (!instData.IDXEN && instData.OFFEN) {
757 addr0.read();
760 addr0, addr1, rsrcDesc, offset, inst_offset);
761 } else if (instData.IDXEN && !instData.OFFEN) {
762 addr0.read();
765 addr1, addr0, rsrcDesc, offset, inst_offset);
766 } else {
767 addr0.read();
768 addr1.read();
771 addr1, addr0, rsrcDesc, offset, inst_offset);
772 }
773
774 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
775 } // execute
776
777 void
779 {
780 initMemRead<VecElemU16>(gpuDynInst);
781 } // initiateAcc
782
783 void
785 {
786 if (instData.LDS) {
787 ldsComplete<1>(gpuDynInst);
788
789 return;
790 }
791
792 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
793
794 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
795 if (gpuDynInst->exec_mask[lane]) {
796 if (!oobMask[lane]) {
797 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
798 gpuDynInst->d_data))[lane]);
799 } else {
800 vdst[lane] = 0;
801 }
802 }
803 }
804
805 vdst.write();
806 } // execute
807
808 // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods ---
809
810 Inst_MUBUF__BUFFER_LOAD_SSHORT
811 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
812 : Inst_MUBUF(iFmt, "buffer_load_sshort")
813 {
814 setFlag(MemoryRef);
815 setFlag(Load);
816 setFlag(GlobalSegment);
817 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
818
820 {
821 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
822
823 // --- description from .arch file ---
824 // Untyped buffer load signed short (sign extend to VGPR destination).
825 void
830
831 void
833 {
834 } // initiateAcc
835
836 void
840 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16 class methods ---
841
842 Inst_MUBUF__BUFFER_LOAD_SHORT_D16
843 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(InFmt_MUBUF *iFmt)
844 : Inst_MUBUF(iFmt, "buffer_load_short_d16")
845 {
846 setFlag(MemoryRef);
847 setFlag(Load);
848 if (instData.LDS) {
849 setFlag(GroupSegment);
850 warn("BUFFER.LDS not implemented!");
851 } else {
852 setFlag(GlobalSegment);
853 }
854 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16
855
857 {
858 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16
859
860 // --- description from .arch file ---
861 // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16;
862 // // RETURN_DATA[31:16] is preserved.
863 void
865 {
866 Wavefront *wf = gpuDynInst->wavefront();
867
868 if (gpuDynInst->exec_mask.none()) {
869 wf->decVMemInstsIssued();
870 wf->untrackVMemInst(gpuDynInst);
871 return;
872 }
873
874 gpuDynInst->execUnitId = wf->execUnitId;
875 gpuDynInst->latency.init(gpuDynInst->computeUnit());
876 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
877
878 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
879 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
880 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
881 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
882
883 rsrcDesc.read();
884 offset.read();
885
886 int inst_offset = instData.OFFSET;
887
888 // For explanation of buffer addressing, see section 9.1.5 in:
889 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
890 // instruction-set-architectures/
891 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
892 if (!instData.IDXEN && !instData.OFFEN) {
895 addr0, addr1, rsrcDesc, offset, inst_offset);
896 } else if (!instData.IDXEN && instData.OFFEN) {
897 addr0.read();
900 addr0, addr1, rsrcDesc, offset, inst_offset);
901 } else if (instData.IDXEN && !instData.OFFEN) {
902 addr0.read();
905 addr1, addr0, rsrcDesc, offset, inst_offset);
906 } else {
907 addr0.read();
908 addr1.read();
911 addr1, addr0, rsrcDesc, offset, inst_offset);
912 }
913
914 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
915 } // execute
916
917 void
919 {
920 initMemRead<VecElemU16>(gpuDynInst);
921 } // initiateAcc
922
923 void
925 {
926 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
927
928 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
929 if (gpuDynInst->exec_mask[lane]) {
930 if (!oobMask[lane]) {
931 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
932 gpuDynInst->d_data))[lane];
933 replaceBits(vdst[lane], 15, 0, buf_val);
934 } else {
935 vdst[lane] = 0;
936 }
937 }
938 }
939
940 vdst.write();
941 } // completeAcc
942 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI class methods ---
943
944 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
945 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(InFmt_MUBUF *iFmt)
946 : Inst_MUBUF(iFmt, "buffer_load_short_d16_hi")
947 {
948 setFlag(MemoryRef);
949 setFlag(Load);
950 if (instData.LDS) {
951 setFlag(GroupSegment);
952 warn("BUFFER.LDS not implemented!");
953 } else {
954 setFlag(GlobalSegment);
955 }
956 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
957
960 {
961 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
962
963 // --- description from .arch file ---
964 // VDATA[31 : 16].b16 = MEM[ADDR].b16;
965 // // VDATA[15:0] is preserved.
966 void
968 {
969 Wavefront *wf = gpuDynInst->wavefront();
970
971 if (gpuDynInst->exec_mask.none()) {
972 wf->decVMemInstsIssued();
973 wf->untrackVMemInst(gpuDynInst);
974 return;
975 }
976
977 gpuDynInst->execUnitId = wf->execUnitId;
978 gpuDynInst->latency.init(gpuDynInst->computeUnit());
979 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
980
981 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
982 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
983 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
984 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
985
986 rsrcDesc.read();
987 offset.read();
988
989 int inst_offset = instData.OFFSET;
990
991 // For explanation of buffer addressing, see section 9.1.5 in:
992 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
993 // instruction-set-architectures/
994 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
995 if (!instData.IDXEN && !instData.OFFEN) {
998 addr0, addr1, rsrcDesc, offset, inst_offset);
999 } else if (!instData.IDXEN && instData.OFFEN) {
1000 addr0.read();
1003 addr0, addr1, rsrcDesc, offset, inst_offset);
1004 } else if (instData.IDXEN && !instData.OFFEN) {
1005 addr0.read();
1008 addr1, addr0, rsrcDesc, offset, inst_offset);
1009 } else {
1010 addr0.read();
1011 addr1.read();
1014 addr1, addr0, rsrcDesc, offset, inst_offset);
1015 }
1016
1017 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1018 } // execute
1019
1020 void
1022 {
1023 initMemRead<VecElemU16>(gpuDynInst);
1024 } // initiateAcc
1025
1026 void
1028 {
1029 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1030
1031 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1032 if (gpuDynInst->exec_mask[lane]) {
1033 if (!oobMask[lane]) {
1034 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
1035 gpuDynInst->d_data))[lane];
1036 replaceBits(vdst[lane], 31, 16, buf_val);
1037 } else {
1038 vdst[lane] = 0;
1039 }
1040 }
1041 }
1042
1043 vdst.write();
1044 } // completeAcc
1045 // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods ---
1046
1047 Inst_MUBUF__BUFFER_LOAD_DWORD
1048 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
1049 : Inst_MUBUF(iFmt, "buffer_load_dword")
1050 {
1051 setFlag(MemoryRef);
1052 setFlag(Load);
1053 if (instData.LDS) {
1054 setFlag(GroupSegment);
1055 } else {
1056 setFlag(GlobalSegment);
1057 }
1058 } // Inst_MUBUF__BUFFER_LOAD_DWORD
1059
1061 {
1062 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
1063
1064 // --- description from .arch file ---
1065 // Untyped buffer load dword.
1066 void
1068 {
1069 Wavefront *wf = gpuDynInst->wavefront();
1070
1071 if (gpuDynInst->exec_mask.none()) {
1072 wf->decVMemInstsIssued();
1073 wf->untrackVMemInst(gpuDynInst);
1074 return;
1075 }
1076
1077 gpuDynInst->execUnitId = wf->execUnitId;
1078 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1079 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1080
1081 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1082 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1083 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1084 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1085
1086 rsrcDesc.read();
1087 offset.read();
1088
1089 int inst_offset = instData.OFFSET;
1090
1091 // For explanation of buffer addressing, see section 9.1.5 in:
1092 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
1093 // instruction-set-architectures/
1094 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
1095 if (!instData.IDXEN && !instData.OFFEN) {
1098 addr0, addr1, rsrcDesc, offset, inst_offset);
1099 } else if (!instData.IDXEN && instData.OFFEN) {
1100 addr0.read();
1103 addr0, addr1, rsrcDesc, offset, inst_offset);
1104 } else if (instData.IDXEN && !instData.OFFEN) {
1105 addr0.read();
1108 addr1, addr0, rsrcDesc, offset, inst_offset);
1109 } else {
1110 addr0.read();
1111 addr1.read();
1114 addr1, addr0, rsrcDesc, offset, inst_offset);
1115 }
1116
1117 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1118 } // execute
1119
1120 void
1122 {
1123 initMemRead<VecElemU32>(gpuDynInst);
1124 } // initiateAcc
1125
1126 void
1128 {
1129 if (instData.LDS) {
1130 ldsComplete<1>(gpuDynInst);
1131
1132 return;
1133 }
1134
1135 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1136
1137 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1138 if (gpuDynInst->exec_mask[lane]) {
1139 if (!oobMask[lane]) {
1140 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1141 gpuDynInst->d_data))[lane];
1142 } else {
1143 vdst[lane] = 0;
1144 }
1145 }
1146 }
1147
1148 vdst.write();
1149 } // completeAcc
1150 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods ---
1151
1152 Inst_MUBUF__BUFFER_LOAD_DWORDX2
1153 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
1154 : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
1155 {
1156 setFlag(MemoryRef);
1157 setFlag(Load);
1158 setFlag(GlobalSegment);
1159
1160 panic_if(instData.LDS, "Return to LDS not supported for %s", _opcode);
1161 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
1162
1164 {
1165 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
1166
1167 // --- description from .arch file ---
1168 // Untyped buffer load 2 dwords.
1169 void
1171 {
1172 Wavefront *wf = gpuDynInst->wavefront();
1173
1174 if (gpuDynInst->exec_mask.none()) {
1175 wf->decVMemInstsIssued();
1176 wf->untrackVMemInst(gpuDynInst);
1177 return;
1178 }
1179
1180 gpuDynInst->execUnitId = wf->execUnitId;
1181 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1182 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1183
1184 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1185 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1186 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1187 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1188
1189 rsrcDesc.read();
1190 offset.read();
1191
1192 int inst_offset = instData.OFFSET;
1193
1194 if (!instData.IDXEN && !instData.OFFEN) {
1197 addr0, addr1, rsrcDesc, offset, inst_offset);
1198 } else if (!instData.IDXEN && instData.OFFEN) {
1199 addr0.read();
1202 addr0, addr1, rsrcDesc, offset, inst_offset);
1203 } else if (instData.IDXEN && !instData.OFFEN) {
1204 addr0.read();
1207 addr1, addr0, rsrcDesc, offset, inst_offset);
1208 } else {
1209 addr0.read();
1210 addr1.read();
1213 addr1, addr0, rsrcDesc, offset, inst_offset);
1214 }
1215
1216 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1217 } // execute
1218
1219 void
1221 {
1222 initMemRead<2>(gpuDynInst);
1223 } // initiateAcc
1224
1225 void
1227 {
1228 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1229 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1230
1231 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1232 if (gpuDynInst->exec_mask[lane]) {
1233 if (!oobMask[lane]) {
1234 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1235 gpuDynInst->d_data))[lane * 2];
1236 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1237 gpuDynInst->d_data))[lane * 2 + 1];
1238 } else {
1239 vdst0[lane] = 0;
1240 vdst1[lane] = 0;
1241 }
1242 }
1243 }
1244
1245 vdst0.write();
1246 vdst1.write();
1247 } // completeAcc
1248 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods ---
1249
1250 Inst_MUBUF__BUFFER_LOAD_DWORDX3
1251 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
1252 : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
1253 {
1254 setFlag(MemoryRef);
1255 setFlag(Load);
1256 if (instData.LDS) {
1257 setFlag(GroupSegment);
1258 } else {
1259 setFlag(GlobalSegment);
1260 }
1261 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
1262
1264 {
1265 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
1266
1267 // --- description from .arch file ---
1268 // Untyped buffer load 3 dwords.
1269 void
1271 {
1272 Wavefront *wf = gpuDynInst->wavefront();
1273
1274 if (gpuDynInst->exec_mask.none()) {
1275 wf->decVMemInstsIssued();
1276 wf->untrackVMemInst(gpuDynInst);
1277 return;
1278 }
1279
1280 gpuDynInst->execUnitId = wf->execUnitId;
1281 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1282 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1283
1284 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1285 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1286 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1287 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1288
1289 rsrcDesc.read();
1290 offset.read();
1291
1292 int inst_offset = instData.OFFSET;
1293
1294 if (!instData.IDXEN && !instData.OFFEN) {
1297 addr0, addr1, rsrcDesc, offset, inst_offset);
1298 } else if (!instData.IDXEN && instData.OFFEN) {
1299 addr0.read();
1302 addr0, addr1, rsrcDesc, offset, inst_offset);
1303 } else if (instData.IDXEN && !instData.OFFEN) {
1304 addr0.read();
1307 addr1, addr0, rsrcDesc, offset, inst_offset);
1308 } else {
1309 addr0.read();
1310 addr1.read();
1313 addr1, addr0, rsrcDesc, offset, inst_offset);
1314 }
1315
1316 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1317 } // execute
1318
1319 void
1321 {
1322 initMemRead<3>(gpuDynInst);
1323 } // initiateAcc
1324
1325 void
1327 {
1328 if (instData.LDS) {
1329 ldsComplete<4>(gpuDynInst);
1330
1331 return;
1332 }
1333
1334 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1335 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1336 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1337
1338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1339 if (gpuDynInst->exec_mask[lane]) {
1340 if (!oobMask[lane]) {
1341 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1342 gpuDynInst->d_data))[lane * 3];
1343 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1344 gpuDynInst->d_data))[lane * 3 + 1];
1345 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1346 gpuDynInst->d_data))[lane * 3 + 2];
1347 } else {
1348 vdst0[lane] = 0;
1349 vdst1[lane] = 0;
1350 vdst2[lane] = 0;
1351 }
1352 }
1353 }
1354
1355 vdst0.write();
1356 vdst1.write();
1357 vdst2.write();
1358 } // completeAcc
1359 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods ---
1360
1361 Inst_MUBUF__BUFFER_LOAD_DWORDX4
1362 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
1363 : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
1364 {
1365 setFlag(MemoryRef);
1366 setFlag(Load);
1367 if (instData.LDS) {
1368 setFlag(GroupSegment);
1369 } else {
1370 setFlag(GlobalSegment);
1371 }
1372 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
1373
1375 {
1376 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
1377
1378 // --- description from .arch file ---
1379 // Untyped buffer load 4 dwords.
1380 void
1382 {
1383 Wavefront *wf = gpuDynInst->wavefront();
1384
1385 if (gpuDynInst->exec_mask.none()) {
1386 wf->decVMemInstsIssued();
1387 wf->untrackVMemInst(gpuDynInst);
1388 return;
1389 }
1390
1391 gpuDynInst->execUnitId = wf->execUnitId;
1392 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1393 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1394
1395 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1396 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1397 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1398 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1399
1400 rsrcDesc.read();
1401 offset.read();
1402
1403 int inst_offset = instData.OFFSET;
1404
1405 if (!instData.IDXEN && !instData.OFFEN) {
1408 addr0, addr1, rsrcDesc, offset, inst_offset);
1409 } else if (!instData.IDXEN && instData.OFFEN) {
1410 addr0.read();
1413 addr0, addr1, rsrcDesc, offset, inst_offset);
1414 } else if (instData.IDXEN && !instData.OFFEN) {
1415 addr0.read();
1418 addr1, addr0, rsrcDesc, offset, inst_offset);
1419 } else {
1420 addr0.read();
1421 addr1.read();
1424 addr1, addr0, rsrcDesc, offset, inst_offset);
1425 }
1426
1427 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1428 } // execute
1429
1430 void
1432 {
1433 initMemRead<4>(gpuDynInst);
1434 } // initiateAcc
1435
1436 void
1438 {
1439 if (instData.LDS) {
1440 ldsComplete<4>(gpuDynInst);
1441
1442 return;
1443 }
1444
1445 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1446 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1447 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1448 VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
1449
1450 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1451 if (gpuDynInst->exec_mask[lane]) {
1452 if (!oobMask[lane]) {
1453 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1454 gpuDynInst->d_data))[lane * 4];
1455 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1456 gpuDynInst->d_data))[lane * 4 + 1];
1457 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1458 gpuDynInst->d_data))[lane * 4 + 2];
1459 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
1460 gpuDynInst->d_data))[lane * 4 + 3];
1461 } else {
1462 vdst0[lane] = 0;
1463 vdst1[lane] = 0;
1464 vdst2[lane] = 0;
1465 vdst3[lane] = 0;
1466 }
1467 }
1468 }
1469
1470 vdst0.write();
1471 vdst1.write();
1472 vdst2.write();
1473 vdst3.write();
1474 } // completeAcc
1475 // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods ---
1476
1477 Inst_MUBUF__BUFFER_STORE_BYTE
1478 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
1479 : Inst_MUBUF(iFmt, "buffer_store_byte")
1480 {
1481 setFlag(MemoryRef);
1482 setFlag(Store);
1483 if (instData.LDS) {
1484 setFlag(GroupSegment);
1485 } else {
1486 setFlag(GlobalSegment);
1487 }
1488 } // Inst_MUBUF__BUFFER_STORE_BYTE
1489
1491 {
1492 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
1493
1494 // --- description from .arch file ---
1495 // Untyped buffer store byte.
1496 void
1498 {
1499 Wavefront *wf = gpuDynInst->wavefront();
1500
1501 if (gpuDynInst->exec_mask.none()) {
1502 wf->decVMemInstsIssued();
1503 wf->untrackVMemInst(gpuDynInst);
1504 wf->decExpInstsIssued();
1505 wf->untrackExpInst(gpuDynInst);
1506 return;
1507 }
1508
1509 gpuDynInst->execUnitId = wf->execUnitId;
1510 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1511 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1512
1513 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1514 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1515 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1516 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1517 ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
1518
1519 rsrcDesc.read();
1520 offset.read();
1521 data.read();
1522
1523 int inst_offset = instData.OFFSET;
1524
1525 if (!instData.IDXEN && !instData.OFFEN) {
1528 addr0, addr1, rsrcDesc, offset, inst_offset);
1529 } else if (!instData.IDXEN && instData.OFFEN) {
1530 addr0.read();
1533 addr0, addr1, rsrcDesc, offset, inst_offset);
1534 } else if (instData.IDXEN && !instData.OFFEN) {
1535 addr0.read();
1538 addr1, addr0, rsrcDesc, offset, inst_offset);
1539 } else {
1540 addr0.read();
1541 addr1.read();
1544 addr1, addr0, rsrcDesc, offset, inst_offset);
1545 }
1546
1547 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1548
1549 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1550 if (gpuDynInst->exec_mask[lane]) {
1551 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
1552 = data[lane];
1553 }
1554 }
1555 } // execute
1556
1557 void
1559 {
1560 initMemWrite<VecElemI8>(gpuDynInst);
1561 } // initiateAcc
1562
1563 void
1565 {
1566 } // execute
1567 // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods ---
1568
1569 Inst_MUBUF__BUFFER_STORE_SHORT
1570 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
1571 : Inst_MUBUF(iFmt, "buffer_store_short")
1572 {
1573 setFlag(MemoryRef);
1574 setFlag(Store);
1575 if (instData.LDS) {
1576 setFlag(GroupSegment);
1577 } else {
1578 setFlag(GlobalSegment);
1579 }
1580 } // Inst_MUBUF__BUFFER_STORE_SHORT
1581
1583 {
1584 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
1585
1586 // --- description from .arch file ---
1587 // Untyped buffer store short.
1588 void
1590 {
1591 Wavefront *wf = gpuDynInst->wavefront();
1592
1593 if (gpuDynInst->exec_mask.none()) {
1594 wf->decVMemInstsIssued();
1595 wf->untrackVMemInst(gpuDynInst);
1596 wf->decExpInstsIssued();
1597 wf->untrackExpInst(gpuDynInst);
1598 return;
1599 }
1600
1601 gpuDynInst->execUnitId = wf->execUnitId;
1602 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1603 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1604
1605 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1606 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1607 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1608 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1609 ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
1610
1611 rsrcDesc.read();
1612 offset.read();
1613 data.read();
1614
1615 int inst_offset = instData.OFFSET;
1616
1617 if (!instData.IDXEN && !instData.OFFEN) {
1620 addr0, addr1, rsrcDesc, offset, inst_offset);
1621 } else if (!instData.IDXEN && instData.OFFEN) {
1622 addr0.read();
1625 addr0, addr1, rsrcDesc, offset, inst_offset);
1626 } else if (instData.IDXEN && !instData.OFFEN) {
1627 addr0.read();
1630 addr1, addr0, rsrcDesc, offset, inst_offset);
1631 } else {
1632 addr0.read();
1633 addr1.read();
1636 addr1, addr0, rsrcDesc, offset, inst_offset);
1637 }
1638
1639 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1640
1641 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1642 if (gpuDynInst->exec_mask[lane]) {
1643 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
1644 = data[lane];
1645 }
1646 }
1647 } // execute
1648
1649 void
1651 {
1652 initMemWrite<VecElemI16>(gpuDynInst);
1653 } // initiateAcc
1654
1655 void
1659 // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods ---
1660
1663 : Inst_MUBUF(iFmt, "buffer_store_dword")
1664 {
1665 setFlag(MemoryRef);
1666 setFlag(Store);
1667 if (instData.LDS) {
1668 setFlag(GroupSegment);
1669 } else {
1670 setFlag(GlobalSegment);
1671 }
1672 } // Inst_MUBUF__BUFFER_STORE_DWORD
1673
1675 {
1676 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
1677
1678 // --- description from .arch file ---
1679 // Untyped buffer store dword.
1680 void
1682 {
1683 Wavefront *wf = gpuDynInst->wavefront();
1684
1685 if (gpuDynInst->exec_mask.none()) {
1686 wf->decVMemInstsIssued();
1687 wf->untrackVMemInst(gpuDynInst);
1688 wf->decExpInstsIssued();
1689 wf->untrackExpInst(gpuDynInst);
1690 return;
1691 }
1692
1693 gpuDynInst->execUnitId = wf->execUnitId;
1694 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1695 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1696
1697 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1698 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1699 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1700 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1701 ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
1702
1703 rsrcDesc.read();
1704 offset.read();
1705 data.read();
1706
1707 int inst_offset = instData.OFFSET;
1708
1709 if (!instData.IDXEN && !instData.OFFEN) {
1712 addr0, addr1, rsrcDesc, offset, inst_offset);
1713 } else if (!instData.IDXEN && instData.OFFEN) {
1714 addr0.read();
1717 addr0, addr1, rsrcDesc, offset, inst_offset);
1718 } else if (instData.IDXEN && !instData.OFFEN) {
1719 addr0.read();
1722 addr1, addr0, rsrcDesc, offset, inst_offset);
1723 } else {
1724 addr0.read();
1725 addr1.read();
1728 addr1, addr0, rsrcDesc, offset, inst_offset);
1729 }
1730
1731 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1732
1733 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1734 if (gpuDynInst->exec_mask[lane]) {
1735 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
1736 = data[lane];
1737 }
1738 }
1739 } // execute
1740
1741 void
1743 {
1744 initMemWrite<VecElemU32>(gpuDynInst);
1745 } // initiateAcc
1746
1747 void
1749 {
1750 } // completeAcc
1751 // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods ---
1752
1753 Inst_MUBUF__BUFFER_STORE_DWORDX2
1754 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
1755 : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
1756 {
1757 setFlag(MemoryRef);
1758 setFlag(Store);
1759 if (instData.LDS) {
1760 setFlag(GroupSegment);
1761 } else {
1762 setFlag(GlobalSegment);
1763 }
1764 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
1765
1767 {
1768 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
1769
1770 // --- description from .arch file ---
1771 // Untyped buffer store 2 dwords.
1772 void
1774 {
1775 Wavefront *wf = gpuDynInst->wavefront();
1776
1777 if (gpuDynInst->exec_mask.none()) {
1778 wf->decVMemInstsIssued();
1779 wf->untrackVMemInst(gpuDynInst);
1780 wf->decExpInstsIssued();
1781 wf->untrackExpInst(gpuDynInst);
1782 return;
1783 }
1784
1785 gpuDynInst->execUnitId = wf->execUnitId;
1786 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1787 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1788
1789 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1790 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1791 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1792 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1793 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1794 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1795
1796 rsrcDesc.read();
1797 offset.read();
1798 data0.read();
1799 data1.read();
1800
1801 int inst_offset = instData.OFFSET;
1802
1803 if (!instData.IDXEN && !instData.OFFEN) {
1806 addr0, addr1, rsrcDesc, offset, inst_offset);
1807 } else if (!instData.IDXEN && instData.OFFEN) {
1808 addr0.read();
1811 addr0, addr1, rsrcDesc, offset, inst_offset);
1812 } else if (instData.IDXEN && !instData.OFFEN) {
1813 addr0.read();
1816 addr1, addr0, rsrcDesc, offset, inst_offset);
1817 } else {
1818 addr0.read();
1819 addr1.read();
1822 addr1, addr0, rsrcDesc, offset, inst_offset);
1823 }
1824
1825 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1826
1827 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1828 if (gpuDynInst->exec_mask[lane]) {
1829 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
1830 = data0[lane];
1831 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*2 + 1]
1832 = data1[lane];
1833 }
1834 }
1835 } // execute
1836
1837 void
1839 {
1840 initMemWrite<2>(gpuDynInst);
1841 } // initiateAcc
1842
1843 void
1845 {
1846 } // completeAcc
1847 // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods ---
1848
1849 Inst_MUBUF__BUFFER_STORE_DWORDX3
1850 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
1851 : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
1852 {
1853 setFlag(MemoryRef);
1854 setFlag(Store);
1855 if (instData.LDS) {
1856 setFlag(GroupSegment);
1857 } else {
1858 setFlag(GlobalSegment);
1859 }
1860 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
1861
1863 {
1864 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
1865
1866 // --- description from .arch file ---
1867 // Untyped buffer store 3 dwords.
1868 void
1870 {
1871 Wavefront *wf = gpuDynInst->wavefront();
1872
1873 if (gpuDynInst->exec_mask.none()) {
1874 wf->decVMemInstsIssued();
1875 wf->untrackVMemInst(gpuDynInst);
1876 wf->decExpInstsIssued();
1877 wf->untrackExpInst(gpuDynInst);
1878 return;
1879 }
1880
1881 gpuDynInst->execUnitId = wf->execUnitId;
1882 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1883 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1884
1885 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1886 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1887 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1888 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1889 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1890 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1891 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1892
1893 rsrcDesc.read();
1894 offset.read();
1895 data0.read();
1896 data1.read();
1897 data2.read();
1898
1899 int inst_offset = instData.OFFSET;
1900
1901 if (!instData.IDXEN && !instData.OFFEN) {
1904 addr0, addr1, rsrcDesc, offset, inst_offset);
1905 } else if (!instData.IDXEN && instData.OFFEN) {
1906 addr0.read();
1909 addr0, addr1, rsrcDesc, offset, inst_offset);
1910 } else if (instData.IDXEN && !instData.OFFEN) {
1911 addr0.read();
1914 addr1, addr0, rsrcDesc, offset, inst_offset);
1915 } else {
1916 addr0.read();
1917 addr1.read();
1920 addr1, addr0, rsrcDesc, offset, inst_offset);
1921 }
1922
1923 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1924
1925 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1926 if (gpuDynInst->exec_mask[lane]) {
1927 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 3]
1928 = data0[lane];
1929 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*3 + 1]
1930 = data1[lane];
1931 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*3 + 2]
1932 = data2[lane];
1933 }
1934 }
1935 } // execute
1936
1937 void
1939 {
1940 initMemWrite<3>(gpuDynInst);
1941 } // initiateAcc
1942
1943 void
1945 {
1946 } // completeAcc
1947 // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods ---
1948
1949 Inst_MUBUF__BUFFER_STORE_DWORDX4
1950 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
1951 : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
1952 {
1953 setFlag(MemoryRef);
1954 setFlag(Store);
1955 if (instData.LDS) {
1956 setFlag(GroupSegment);
1957 } else {
1958 setFlag(GlobalSegment);
1959 }
1960 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
1961
1963 {
1964 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
1965
1966 // --- description from .arch file ---
1967 // Untyped buffer store 4 dwords.
1968 void
1970 {
1971 Wavefront *wf = gpuDynInst->wavefront();
1972
1973 if (gpuDynInst->exec_mask.none()) {
1974 wf->decVMemInstsIssued();
1975 wf->untrackVMemInst(gpuDynInst);
1976 wf->decExpInstsIssued();
1977 wf->untrackExpInst(gpuDynInst);
1978 return;
1979 }
1980
1981 gpuDynInst->execUnitId = wf->execUnitId;
1982 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1983 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1984
1985 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1986 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1987 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1988 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1989 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1990 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1991 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1992 ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
1993
1994 rsrcDesc.read();
1995 offset.read();
1996 data0.read();
1997 data1.read();
1998 data2.read();
1999 data3.read();
2000
2001 int inst_offset = instData.OFFSET;
2002
2003 if (!instData.IDXEN && !instData.OFFEN) {
2006 addr0, addr1, rsrcDesc, offset, inst_offset);
2007 } else if (!instData.IDXEN && instData.OFFEN) {
2008 addr0.read();
2011 addr0, addr1, rsrcDesc, offset, inst_offset);
2012 } else if (instData.IDXEN && !instData.OFFEN) {
2013 addr0.read();
2016 addr1, addr0, rsrcDesc, offset, inst_offset);
2017 } else {
2018 addr0.read();
2019 addr1.read();
2022 addr1, addr0, rsrcDesc, offset, inst_offset);
2023 }
2024
2025 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2026
2027 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2028 if (gpuDynInst->exec_mask[lane]) {
2029 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
2030 = data0[lane];
2031 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
2032 = data1[lane];
2033 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
2034 = data2[lane];
2035 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
2036 = data3[lane];
2037 }
2038 }
2039 } // execute
2040
2041 void
2043 {
2044 initMemWrite<4>(gpuDynInst);
2045 } // initiateAcc
2046
2047 void
2049 {
2050 } // completeAcc
2051 // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods ---
2052
2053 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2054 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
2055 : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
2056 {
2057 setFlag(Store);
2058 setFlag(GlobalSegment);
2059 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2060
2062 {
2063 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2064
2065 // --- description from .arch file ---
2066 // Store one DWORD from LDS memory to system memory without utilizing
2067 // VGPRs.
2068 void
2073 // --- Inst_MUBUF__BUFFER_WBINVL1 class methods ---
2074
2076 : Inst_MUBUF(iFmt, "buffer_wbinvl1")
2077 {
2078 setFlag(MemoryRef);
2079 setFlag(GPUStaticInst::MemSync);
2080 setFlag(GlobalSegment);
2081 setFlag(MemSync);
2082 } // Inst_MUBUF__BUFFER_WBINVL1
2083
2085 {
2086 } // ~Inst_MUBUF__BUFFER_WBINVL1
2087
2088 // --- description from .arch file ---
2089 // Write back and invalidate the shader L1.
2090 // Always returns ACK to shader.
2091 void
2093 {
2094 Wavefront *wf = gpuDynInst->wavefront();
2095
2096 if (gpuDynInst->exec_mask.none()) {
2097 wf->decVMemInstsIssued();
2098 wf->untrackVMemInst(gpuDynInst);
2099 return;
2100 }
2101
2102 gpuDynInst->execUnitId = wf->execUnitId;
2103 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2104 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2105
2106 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2107 gpuDynInst->computeUnit()->globalMemoryPipe.
2108 issueRequest(gpuDynInst);
2109 } else {
2110 fatal("Unsupported scope for flat instruction.\n");
2111 }
2112 } // execute
2113
2114 void
2116 {
2117 // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we
2118 // need to precisely communicate the writeback-invalidate operation to
2119 // the new gfx10 coalescer rather than sending AcquireRelease markers.
2120 // The SICoalescer would need to be updated appropriately as well.
2121 injectGlobalMemFence(gpuDynInst);
2122 } // initiateAcc
2123 void
2125 {
2126 } // completeAcc
2127 // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods ---
2128
2129 Inst_MUBUF__BUFFER_WBINVL1_VOL
2130 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
2131 : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
2132 // This instruction is same as buffer_wbinvl1 instruction except this
2133 // instruction only invalidate L1 shader line with MTYPE SC and GC.
2134 // Since Hermes L1 (TCP) do not differentiate between its cache lines,
2135 // this instruction currently behaves (and implemented ) exactly like
2136 // buffer_wbinvl1 instruction.
2137 setFlag(MemoryRef);
2138 setFlag(GPUStaticInst::MemSync);
2139 setFlag(GlobalSegment);
2140 setFlag(MemSync);
2141 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
2142
2144 {
2145 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
2146
2147 // --- description from .arch file ---
2148 // Write back and invalidate the shader L1 only for lines that are marked
2149 // --- volatile.
2150 // Always returns ACK to shader.
2151 void
2153 {
2154 Wavefront *wf = gpuDynInst->wavefront();
2155
2156 if (gpuDynInst->exec_mask.none()) {
2157 wf->decVMemInstsIssued();
2158 wf->untrackVMemInst(gpuDynInst);
2159 return;
2160 }
2161
2162 gpuDynInst->execUnitId = wf->execUnitId;
2163 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2164 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2165
2166 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2167 gpuDynInst->computeUnit()->globalMemoryPipe.
2168 issueRequest(gpuDynInst);
2169 } else {
2170 fatal("Unsupported scope for flat instruction.\n");
2171 }
2172 } // execute
2173 void
2175 {
2176 injectGlobalMemFence(gpuDynInst);
2177 } // initiateAcc
2178 void
2180 {
2181 } // completeAcc
2182 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods ---
2183
2184 Inst_MUBUF__BUFFER_ATOMIC_SWAP
2185 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
2186 : Inst_MUBUF(iFmt, "buffer_atomic_swap")
2187 {
2188 setFlag(AtomicExch);
2189 if (instData.GLC) {
2190 setFlag(AtomicReturn);
2191 } else {
2192 setFlag(AtomicNoReturn);
2193 }
2194 setFlag(MemoryRef);
2195 setFlag(GlobalSegment);
2196 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
2197
2199 {
2200 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
2201
2202 // --- description from .arch file ---
2203 // 32b:
2204 // tmp = MEM[ADDR];
2205 // MEM[ADDR] = DATA;
2206 // RETURN_DATA = tmp.
2207 void
2212 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods ---
2213
2214 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2215 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
2216 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
2217 {
2218 setFlag(AtomicCAS);
2219 if (instData.GLC) {
2220 setFlag(AtomicReturn);
2221 } else {
2222 setFlag(AtomicNoReturn);
2223 }
2224 setFlag(MemoryRef);
2225 setFlag(GlobalSegment);
2226 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2227
2229 {
2230 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2231
2232 // --- description from .arch file ---
2233 // 32b:
2234 // tmp = MEM[ADDR];
2235 // src = DATA[0];
2236 // cmp = DATA[1];
2237 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2238 // RETURN_DATA[0] = tmp.
2239 void
2241 {
2242 Wavefront *wf = gpuDynInst->wavefront();
2243
2244 if (gpuDynInst->exec_mask.none()) {
2245 wf->decVMemInstsIssued();
2246 wf->untrackVMemInst(gpuDynInst);
2247 return;
2248 }
2249
2250 gpuDynInst->execUnitId = wf->execUnitId;
2251 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2252 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2253
2254 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
2255 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
2256 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
2257 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
2258 ConstVecOperandU32 src(gpuDynInst, extData.VDATA);
2259 ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1);
2260
2261 rsrcDesc.read();
2262 offset.read();
2263 src.read();
2264 cmp.read();
2265
2266 int inst_offset = instData.OFFSET;
2267
2268 if (!instData.IDXEN && !instData.OFFEN) {
2271 addr0, addr1, rsrcDesc, offset, inst_offset);
2272 } else if (!instData.IDXEN && instData.OFFEN) {
2273 addr0.read();
2276 addr0, addr1, rsrcDesc, offset, inst_offset);
2277 } else if (instData.IDXEN && !instData.OFFEN) {
2278 addr0.read();
2281 addr1, addr0, rsrcDesc, offset, inst_offset);
2282 } else {
2283 addr0.read();
2284 addr1.read();
2287 addr1, addr0, rsrcDesc, offset, inst_offset);
2288 }
2289
2290 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2291 if (gpuDynInst->exec_mask[lane]) {
2292 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
2293 = src[lane];
2294 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
2295 = cmp[lane];
2296 }
2297 }
2298
2299 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2300 } // execute
2301
2302 void
2304 {
2305 initAtomicAccess<VecElemU32>(gpuDynInst);
2306 } // initiateAcc
2307
2308 void
2310 {
2311 if (isAtomicRet()) {
2312 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
2313
2314 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2315 if (gpuDynInst->exec_mask[lane]) {
2316 vdst[lane] = (reinterpret_cast<VecElemU32*>(
2317 gpuDynInst->d_data))[lane];
2318 }
2319 }
2320
2321 vdst.write();
2322 }
2323 } // completeAcc
2324 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods ---
2325
2326 Inst_MUBUF__BUFFER_ATOMIC_ADD
2327 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
2328 : Inst_MUBUF(iFmt, "buffer_atomic_add")
2329 {
2330 setFlag(AtomicAdd);
2331 if (instData.GLC) {
2332 setFlag(AtomicReturn);
2333 } else {
2334 setFlag(AtomicNoReturn);
2335 }
2336 setFlag(MemoryRef);
2337 setFlag(GlobalSegment);
2338 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
2339
2341 {
2342 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
2343
2344 // --- description from .arch file ---
2345 // 32b:
2346 // tmp = MEM[ADDR];
2347 // MEM[ADDR] += DATA;
2348 // RETURN_DATA = tmp.
2349 void
2354 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods ---
2355
2356 Inst_MUBUF__BUFFER_ATOMIC_SUB
2357 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
2358 : Inst_MUBUF(iFmt, "buffer_atomic_sub")
2359 {
2360 setFlag(AtomicSub);
2361 if (instData.GLC) {
2362 setFlag(AtomicReturn);
2363 } else {
2364 setFlag(AtomicNoReturn);
2365 }
2366 setFlag(MemoryRef);
2367 setFlag(GlobalSegment);
2368 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
2369
2371 {
2372 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
2373
2374 // --- description from .arch file ---
2375 // 32b:
2376 // tmp = MEM[ADDR];
2377 // MEM[ADDR] -= DATA;
2378 // RETURN_DATA = tmp.
2379 void
2384 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods ---
2385
2386 Inst_MUBUF__BUFFER_ATOMIC_SMIN
2387 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
2388 : Inst_MUBUF(iFmt, "buffer_atomic_smin")
2389 {
2390 setFlag(AtomicMin);
2391 if (instData.GLC) {
2392 setFlag(AtomicReturn);
2393 } else {
2394 setFlag(AtomicNoReturn);
2395 }
2396 setFlag(MemoryRef);
2397 setFlag(GlobalSegment);
2398 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
2399
2401 {
2402 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
2403
2404 // --- description from .arch file ---
2405 // 32b:
2406 // tmp = MEM[ADDR];
2407 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
2408 // RETURN_DATA = tmp.
2409 void
2414 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods ---
2415
2416 Inst_MUBUF__BUFFER_ATOMIC_UMIN
2417 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
2418 : Inst_MUBUF(iFmt, "buffer_atomic_umin")
2419 {
2420 setFlag(AtomicMin);
2421 if (instData.GLC) {
2422 setFlag(AtomicReturn);
2423 } else {
2424 setFlag(AtomicNoReturn);
2425 }
2426 setFlag(MemoryRef);
2427 setFlag(GlobalSegment);
2428 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
2429
2431 {
2432 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
2433
2434 // --- description from .arch file ---
2435 // 32b:
2436 // tmp = MEM[ADDR];
2437 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
2438 // RETURN_DATA = tmp.
2439 void
2444 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods ---
2445
2446 Inst_MUBUF__BUFFER_ATOMIC_SMAX
2447 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
2448 : Inst_MUBUF(iFmt, "buffer_atomic_smax")
2449 {
2450 setFlag(AtomicMax);
2451 if (instData.GLC) {
2452 setFlag(AtomicReturn);
2453 } else {
2454 setFlag(AtomicNoReturn);
2455 }
2456 setFlag(MemoryRef);
2457 setFlag(GlobalSegment);
2458 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
2459
2461 {
2462 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
2463
2464 // --- description from .arch file ---
2465 // 32b:
2466 // tmp = MEM[ADDR];
2467 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
2468 // RETURN_DATA = tmp.
2469 void
2474 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods ---
2475
2476 Inst_MUBUF__BUFFER_ATOMIC_UMAX
2477 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
2478 : Inst_MUBUF(iFmt, "buffer_atomic_umax")
2479 {
2480 setFlag(AtomicMax);
2481 if (instData.GLC) {
2482 setFlag(AtomicReturn);
2483 } else {
2484 setFlag(AtomicNoReturn);
2485 }
2486 setFlag(MemoryRef);
2487 setFlag(GlobalSegment);
2488 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
2489
2491 {
2492 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
2493
2494 // --- description from .arch file ---
2495 // 32b:
2496 // tmp = MEM[ADDR];
2497 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
2498 // RETURN_DATA = tmp.
2499 void
2504 // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods ---
2505
2506 Inst_MUBUF__BUFFER_ATOMIC_AND
2507 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
2508 : Inst_MUBUF(iFmt, "buffer_atomic_and")
2509 {
2510 setFlag(AtomicAnd);
2511 if (instData.GLC) {
2512 setFlag(AtomicReturn);
2513 } else {
2514 setFlag(AtomicNoReturn);
2515 }
2516 setFlag(MemoryRef);
2517 setFlag(GlobalSegment);
2518 } // Inst_MUBUF__BUFFER_ATOMIC_AND
2519
2521 {
2522 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
2523
2524 // --- description from .arch file ---
2525 // 32b:
2526 // tmp = MEM[ADDR];
2527 // MEM[ADDR] &= DATA;
2528 // RETURN_DATA = tmp.
2529 void
2534 // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods ---
2535
2536 Inst_MUBUF__BUFFER_ATOMIC_OR
2537 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
2538 : Inst_MUBUF(iFmt, "buffer_atomic_or")
2539 {
2540 setFlag(AtomicOr);
2541 if (instData.GLC) {
2542 setFlag(AtomicReturn);
2543 } else {
2544 setFlag(AtomicNoReturn);
2545 }
2546 setFlag(MemoryRef);
2547 setFlag(GlobalSegment);
2548 } // Inst_MUBUF__BUFFER_ATOMIC_OR
2549
2551 {
2552 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
2553
2554 // --- description from .arch file ---
2555 // 32b:
2556 // tmp = MEM[ADDR];
2557 // MEM[ADDR] |= DATA;
2558 // RETURN_DATA = tmp.
2559 void
2561 {
2563 } // execute
2564 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods ---
2565
2566 Inst_MUBUF__BUFFER_ATOMIC_XOR
2567 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
2568 : Inst_MUBUF(iFmt, "buffer_atomic_xor")
2569 {
2570 setFlag(AtomicXor);
2571 if (instData.GLC) {
2572 setFlag(AtomicReturn);
2573 } else {
2574 setFlag(AtomicNoReturn);
2575 }
2576 setFlag(MemoryRef);
2577 setFlag(GlobalSegment);
2578 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
2579
2581 {
2582 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
2583
2584 // --- description from .arch file ---
2585 // 32b:
2586 // tmp = MEM[ADDR];
2587 // MEM[ADDR] ^= DATA;
2588 // RETURN_DATA = tmp.
2589 void
2594 // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods ---
2595
2596 Inst_MUBUF__BUFFER_ATOMIC_INC
2597 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
2598 : Inst_MUBUF(iFmt, "buffer_atomic_inc")
2599 {
2600 setFlag(AtomicInc);
2601 if (instData.GLC) {
2602 setFlag(AtomicReturn);
2603 } else {
2604 setFlag(AtomicNoReturn);
2605 }
2606 setFlag(MemoryRef);
2607 setFlag(GlobalSegment);
2608 } // Inst_MUBUF__BUFFER_ATOMIC_INC
2609
2611 {
2612 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
2613
2614 // --- description from .arch file ---
2615 // 32b:
2616 // tmp = MEM[ADDR];
2617 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
2618 // RETURN_DATA = tmp.
2619 void
2624 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods ---
2625
2626 Inst_MUBUF__BUFFER_ATOMIC_DEC
2627 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
2628 : Inst_MUBUF(iFmt, "buffer_atomic_dec")
2629 {
2630 setFlag(AtomicDec);
2631 if (instData.GLC) {
2632 setFlag(AtomicReturn);
2633 } else {
2634 setFlag(AtomicNoReturn);
2635 }
2636 setFlag(MemoryRef);
2637 setFlag(GlobalSegment);
2638 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
2639
2641 {
2642 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
2643
2644 // --- description from .arch file ---
2645 // 32b:
2646 // tmp = MEM[ADDR];
2647 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
2648 // (unsigned compare); RETURN_DATA = tmp.
2649 void
2654 // --- Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16 class methods ---
2655
2656 Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16
2657 ::Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16(InFmt_MUBUF *iFmt)
2658 : Inst_MUBUF(iFmt, "buffer_atomic_pk_add_bf16")
2659 {
2660 setFlag(AtomicPkAddBF16);
2661
2662 // MI300 spec: "Float atomics must set SC[0]=0 (no return value)."
2663 panic_if(instData.GLC, "Saw float atomic with return set!");
2664
2665 setFlag(AtomicNoReturn);
2666 } // Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16
2667
2670 {
2671 } // ~Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16
2672
2673 void
2675 {
2676 Wavefront *wf = gpuDynInst->wavefront();
2677
2678 if (gpuDynInst->exec_mask.none()) {
2679 wf->decVMemInstsIssued();
2680 return;
2681 }
2682
2683 gpuDynInst->execUnitId = wf->execUnitId;
2684 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2685 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2686
2687 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
2688 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
2689 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
2690 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
2691 ConstVecOperandU32 src(gpuDynInst, extData.VDATA);
2692
2693 rsrcDesc.read();
2694 offset.read();
2695 src.read();
2696
2697 int inst_offset = instData.OFFSET;
2698
2699 if (!instData.IDXEN && !instData.OFFEN) {
2702 addr0, addr1, rsrcDesc, offset, inst_offset);
2703 } else if (!instData.IDXEN && instData.OFFEN) {
2704 addr0.read();
2707 addr0, addr1, rsrcDesc, offset, inst_offset);
2708 } else if (instData.IDXEN && !instData.OFFEN) {
2709 addr0.read();
2712 addr1, addr0, rsrcDesc, offset, inst_offset);
2713 } else {
2714 addr0.read();
2715 addr1.read();
2718 addr1, addr0, rsrcDesc, offset, inst_offset);
2719 }
2720
2721 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2722 if (gpuDynInst->exec_mask[lane]) {
2723 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
2724 = src[lane];
2725 }
2726 }
2727
2728 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2729 } // execute
2730
2731 void
2733 initiateAcc(GPUDynInstPtr gpuDynInst)
2734 {
2735 initAtomicAccess<VecElemU32>(gpuDynInst);
2736 } // initiateAcc
2737
2738 void
2740 completeAcc(GPUDynInstPtr gpuDynInst)
2741 {
2742 } // completeAcc
2743 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods ---
2744
2745 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2746 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
2747 : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
2748 {
2749 setFlag(AtomicExch);
2750 if (instData.GLC) {
2751 setFlag(AtomicReturn);
2752 } else {
2753 setFlag(AtomicNoReturn);
2754 }
2755 setFlag(MemoryRef);
2756 setFlag(GlobalSegment);
2757 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2758
2760 {
2761 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2762
2763 // --- description from .arch file ---
2764 // 64b:
2765 // tmp = MEM[ADDR];
2766 // MEM[ADDR] = DATA[0:1];
2767 // RETURN_DATA[0:1] = tmp.
2768 void
2773 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods ---
2774
2775 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2776 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
2777 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
2778 {
2779 setFlag(AtomicCAS);
2780 if (instData.GLC) {
2781 setFlag(AtomicReturn);
2782 } else {
2783 setFlag(AtomicNoReturn);
2784 }
2785 setFlag(MemoryRef);
2786 setFlag(GlobalSegment);
2787 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2788
2789 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2790 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
2791 {
2792 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2793
2794 // --- description from .arch file ---
2795 // 64b:
2796 // tmp = MEM[ADDR];
2797 // src = DATA[0:1];
2798 // cmp = DATA[2:3];
2799 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2800 // RETURN_DATA[0:1] = tmp.
2801 void
2806 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods ---
2807
2808 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2809 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
2810 : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
2811 {
2812 setFlag(AtomicAdd);
2813 if (instData.GLC) {
2814 setFlag(AtomicReturn);
2815 } else {
2816 setFlag(AtomicNoReturn);
2817 }
2818 setFlag(MemoryRef);
2819 setFlag(GlobalSegment);
2820 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2821
2823 {
2824 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2825
2826 // --- description from .arch file ---
2827 // 64b:
2828 // tmp = MEM[ADDR];
2829 // MEM[ADDR] += DATA[0:1];
2830 // RETURN_DATA[0:1] = tmp.
2831 void
2836 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods ---
2837
2838 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2839 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
2840 : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
2841 {
2842 setFlag(AtomicSub);
2843 if (instData.GLC) {
2844 setFlag(AtomicReturn);
2845 } else {
2846 setFlag(AtomicNoReturn);
2847 }
2848 setFlag(MemoryRef);
2849 setFlag(GlobalSegment);
2850 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2851
2853 {
2854 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2855
2856 // --- description from .arch file ---
2857 // 64b:
2858 // tmp = MEM[ADDR];
2859 // MEM[ADDR] -= DATA[0:1];
2860 // RETURN_DATA[0:1] = tmp.
2861 void
2866 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods ---
2867
2868 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2869 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
2870 : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
2871 {
2872 setFlag(AtomicMin);
2873 if (instData.GLC) {
2874 setFlag(AtomicReturn);
2875 } else {
2876 setFlag(AtomicNoReturn);
2877 }
2878 setFlag(MemoryRef);
2879 setFlag(GlobalSegment);
2880 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2881
2883 {
2884 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2885
2886 // --- description from .arch file ---
2887 // 64b:
2888 // tmp = MEM[ADDR];
2889 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
2890 // RETURN_DATA[0:1] = tmp.
2891 void
2896 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods ---
2897
2898 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2899 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
2900 : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
2901 {
2902 setFlag(AtomicMin);
2903 if (instData.GLC) {
2904 setFlag(AtomicReturn);
2905 } else {
2906 setFlag(AtomicNoReturn);
2907 }
2908 setFlag(MemoryRef);
2909 setFlag(GlobalSegment);
2910 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2911
2913 {
2914 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2915
2916 // --- description from .arch file ---
2917 // 64b:
2918 // tmp = MEM[ADDR];
2919 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
2920 // RETURN_DATA[0:1] = tmp.
2921 void
2926 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods ---
2927
2928 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2929 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
2930 : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
2931 {
2932 setFlag(AtomicMax);
2933 if (instData.GLC) {
2934 setFlag(AtomicReturn);
2935 } else {
2936 setFlag(AtomicNoReturn);
2937 }
2938 setFlag(MemoryRef);
2939 setFlag(GlobalSegment);
2940 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2941
2943 {
2944 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2945
2946 // --- description from .arch file ---
2947 // 64b:
2948 // tmp = MEM[ADDR];
2949 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2950 // RETURN_DATA[0:1] = tmp.
2951 void
2956 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods ---
2957
2958 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2959 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
2960 : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
2961 {
2962 setFlag(AtomicMax);
2963 if (instData.GLC) {
2964 setFlag(AtomicReturn);
2965 } else {
2966 setFlag(AtomicNoReturn);
2967 }
2968 setFlag(MemoryRef);
2969 setFlag(GlobalSegment);
2970 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2971
2973 {
2974 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2975
2976 // --- description from .arch file ---
2977 // 64b:
2978 // tmp = MEM[ADDR];
2979 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2980 // RETURN_DATA[0:1] = tmp.
2981 void
2986 // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods ---
2987
2988 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2989 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
2990 : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
2991 {
2992 setFlag(AtomicAnd);
2993 if (instData.GLC) {
2994 setFlag(AtomicReturn);
2995 } else {
2996 setFlag(AtomicNoReturn);
2997 }
2998 setFlag(MemoryRef);
2999 setFlag(GlobalSegment);
3000 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
3001
3003 {
3004 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
3005
3006 // --- description from .arch file ---
3007 // 64b:
3008 // tmp = MEM[ADDR];
3009 // MEM[ADDR] &= DATA[0:1];
3010 // RETURN_DATA[0:1] = tmp.
3011 void
3016 // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods ---
3017
3018 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
3019 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
3020 : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
3021 {
3022 setFlag(AtomicOr);
3023 if (instData.GLC) {
3024 setFlag(AtomicReturn);
3025 } else {
3026 setFlag(AtomicNoReturn);
3027 }
3028 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
3029
3031 {
3032 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
3033
3034 // --- description from .arch file ---
3035 // 64b:
3036 // tmp = MEM[ADDR];
3037 // MEM[ADDR] |= DATA[0:1];
3038 // RETURN_DATA[0:1] = tmp.
3039 void
3044 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods ---
3045
3046 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
3047 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
3048 : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
3049 {
3050 setFlag(AtomicXor);
3051 if (instData.GLC) {
3052 setFlag(AtomicReturn);
3053 } else {
3054 setFlag(AtomicNoReturn);
3055 }
3056 setFlag(MemoryRef);
3057 setFlag(GlobalSegment);
3058 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
3059
3061 {
3062 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
3063
3064 // --- description from .arch file ---
3065 // 64b:
3066 // tmp = MEM[ADDR];
3067 // MEM[ADDR] ^= DATA[0:1];
3068 // RETURN_DATA[0:1] = tmp.
3069 void
3074 // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods ---
3075
3076 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
3077 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
3078 : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
3079 {
3080 setFlag(AtomicInc);
3081 if (instData.GLC) {
3082 setFlag(AtomicReturn);
3083 } else {
3084 setFlag(AtomicNoReturn);
3085 }
3086 setFlag(MemoryRef);
3087 setFlag(GlobalSegment);
3088 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
3089
3091 {
3092 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
3093
3094 // --- description from .arch file ---
3095 // 64b:
3096 // tmp = MEM[ADDR];
3097 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
3098 // RETURN_DATA[0:1] = tmp.
3099 void
3104 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods ---
3105
3106 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3107 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
3108 : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
3109 {
3110 setFlag(AtomicDec);
3111 if (instData.GLC) {
3112 setFlag(AtomicReturn);
3113 } else {
3114 setFlag(AtomicNoReturn);
3115 }
3116 setFlag(MemoryRef);
3117 setFlag(GlobalSegment);
3118 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3119
3121 {
3122 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3123
3124 // --- description from .arch file ---
3125 // 64b:
3126 // tmp = MEM[ADDR];
3127 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
3128 // (unsigned compare);
3129 // RETURN_DATA[0:1] = tmp.
3130 void
3135} // namespace VegaISA
3136} // namespace gem5
const char data[]
void setFlag(Flags flag)
const std::string _opcode
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2832
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2350
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3012
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2530
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2309
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2240
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2303
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3131
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2650
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3100
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2620
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3040
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2560
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2952
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2470
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2892
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2410
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2862
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2380
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2769
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2208
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2982
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2500
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2922
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2440
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3070
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2590
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1226
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1220
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1170
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1326
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1270
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1320
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1431
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1437
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1381
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1067
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1121
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1127
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:321
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:326
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:159
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:164
void execute(GPUDynInstPtr) override
Definition mubuf.cc:121
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:132
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:127
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:95
void execute(GPUDynInstPtr) override
Definition mubuf.cc:89
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:100
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:63
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:68
void execute(GPUDynInstPtr) override
Definition mubuf.cc:57
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:698
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:703
void execute(GPUDynInstPtr) override
Definition mubuf.cc:692
void execute(GPUDynInstPtr) override
Definition mubuf.cc:864
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:918
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:924
void execute(GPUDynInstPtr) override
Definition mubuf.cc:826
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:832
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:837
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:650
void execute(GPUDynInstPtr) override
Definition mubuf.cc:594
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:644
void execute(GPUDynInstPtr) override
Definition mubuf.cc:728
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:784
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:778
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1497
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1564
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1558
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1838
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1773
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1844
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1944
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1938
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1869
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1969
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2048
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2042
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1681
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1748
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1742
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:293
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:288
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:260
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:255
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:228
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:223
void execute(GPUDynInstPtr) override
Definition mubuf.cc:217
void execute(GPUDynInstPtr) override
Definition mubuf.cc:185
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:196
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:191
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1656
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1650
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1589
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2152
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2179
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2174
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2115
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2092
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2124
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void ldsComplete(GPUDynInstPtr gpuDynInst)
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:419
void read() override
read from the vrf.
Definition operand.hh:148
void write() override
write to the vrf.
Definition operand.hh:203
void decExpInstsIssued()
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void decVMemInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
#define warn(...)
Definition logging.hh:288
Bitfield< 23, 0 > offset
Definition types.hh:144
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
Definition operand.hh:815
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
uint16_t VecElemU16
VecOperand< VecElemI8, true, 1 > ConstVecOperandI8
Definition operand.hh:841
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
Definition operand.hh:821
uint32_t VecElemU32
const int NumVecElemPerVecReg(64)
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
Definition operand.hh:843
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Mon Oct 27 2025 04:12:40 for gem5 by doxygen 1.14.0