gem5 v24.0.0.0
Loading...
Searching...
No Matches
mubuf.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods ---
40
41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
42 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
43 : Inst_MUBUF(iFmt, "buffer_load_format_x")
44 {
45 setFlag(MemoryRef);
47 setFlag(GlobalSegment);
48 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
49
51 {
52 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
53
54 // --- description from .arch file ---
55 // Untyped buffer load 1 dword with format conversion.
56 void
61
62 void
66
67 void
71 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods ---
72
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
74 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
75 : Inst_MUBUF(iFmt, "buffer_load_format_xy")
76 {
77 setFlag(MemoryRef);
79 setFlag(GlobalSegment);
80 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
81
83 {
84 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
85
86 // --- description from .arch file ---
87 // Untyped buffer load 2 dwords with format conversion.
88 void
93
94 void
98
99 void
103 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods ---
104
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
106 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
107 : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
108 {
109 setFlag(MemoryRef);
110 setFlag(Load);
111 setFlag(GlobalSegment);
112 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
113
115 {
116 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
117
118 // --- description from .arch file ---
119 // Untyped buffer load 3 dwords with format conversion.
120 void
125
126 void
130
131 void
135 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods ---
136
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
138 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
139 : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
140 {
141 setFlag(MemoryRef);
142 setFlag(Load);
143 setFlag(GlobalSegment);
144 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
145
147 {
148 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
149
150 // --- description from .arch file ---
151 // Untyped buffer load 4 dwords with format conversion.
152 void
157
158 void
162
163 void
167 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods ---
168
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X
170 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
171 : Inst_MUBUF(iFmt, "buffer_store_format_x")
172 {
173 setFlag(MemoryRef);
174 setFlag(Store);
175 setFlag(GlobalSegment);
176 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
177
179 {
180 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
181
182 // --- description from .arch file ---
183 // Untyped buffer store 1 dword with format conversion.
184 void
189
190 void
192 {
193 } // initiateAcc
194
195 void
199 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods ---
200
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
202 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
203 : Inst_MUBUF(iFmt, "buffer_store_format_xy")
204 {
205 setFlag(MemoryRef);
206 setFlag(Store);
207 setFlag(GlobalSegment);
208 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
209
211 {
212 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
213
214 // --- description from .arch file ---
215 // Untyped buffer store 2 dwords with format conversion.
216 void
221
222 void
226
227 void
231 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods ---
232
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
234 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
235 : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
236 {
237 setFlag(MemoryRef);
238 setFlag(Store);
239 setFlag(GlobalSegment);
240 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
241
243 {
244 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
245
246 // --- description from .arch file ---
247 // Untyped buffer store 3 dwords with format conversion.
248 void
253
254 void
258
259 void
263 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods ---
264
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
266 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
267 : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
268 {
269 setFlag(MemoryRef);
270 setFlag(Store);
271 setFlag(GlobalSegment);
272 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
273
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
275 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
276 {
277 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
278
279 // --- description from .arch file ---
280 // Untyped buffer store 4 dwords with format conversion.
281 void
286
287 void
291
292 void
296 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods ---
297
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
299 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
300 : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
301 {
302 setFlag(MemoryRef);
303 setFlag(Load);
304 setFlag(GlobalSegment);
305 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
306
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
308 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
309 {
310 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
311
312 // --- description from .arch file ---
313 // Untyped buffer load 1 dword with format conversion.
314 void
319
320 void
324
325 void
329 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods ---
330
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
332 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
333 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
334 {
335 setFlag(MemoryRef);
336 setFlag(Load);
337 setFlag(GlobalSegment);
338 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
339
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
341 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
342 {
343 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
344
345 // --- description from .arch file ---
346 // Untyped buffer load 2 dwords with format conversion.
347 void
352
353 void
355 GPUDynInstPtr gpuDynInst)
356 {
357 } // initiateAcc
358
359 void
364 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods ---
365
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
367 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
368 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
369 {
370 setFlag(MemoryRef);
371 setFlag(Load);
372 setFlag(GlobalSegment);
373 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
374
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
376 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
377 {
378 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
379
380 // --- description from .arch file ---
381 // Untyped buffer load 3 dwords with format conversion.
382 void
387
388 void
390 GPUDynInstPtr gpuDynInst)
391 {
392 } // initiateAcc
393
394 void
399 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods ---
400
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
402 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
403 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
404 {
405 setFlag(MemoryRef);
406 setFlag(Load);
407 setFlag(GlobalSegment);
408 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
409
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
411 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
412 {
413 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
414
415 // --- description from .arch file ---
416 // Untyped buffer load 4 dwords with format conversion.
417 void
422
423 void
425 GPUDynInstPtr gpuDynInst)
426 {
427 } // initiateAcc
428
429 void
434 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods ---
435
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
437 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
438 : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
439 {
440 setFlag(Store);
441 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
442
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
444 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
445 {
446 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
447
448 // --- description from .arch file ---
449 // Untyped buffer store 1 dword with format conversion.
450 void
455
456 void
458 GPUDynInstPtr gpuDynInst)
459 {
460 } // initiateAcc
461
462 void
467 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods ---
468
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
470 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
471 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
472 {
473 setFlag(MemoryRef);
474 setFlag(Store);
475 setFlag(GlobalSegment);
476 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
477
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
479 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
480 {
481 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
482
483 // --- description from .arch file ---
484 // Untyped buffer store 2 dwords with format conversion.
485 void
490
491 void
493 GPUDynInstPtr gpuDynInst)
494 {
495 } // initiateAcc
496
497 void
502 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods ---
503
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
505 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
506 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
507 {
508 setFlag(MemoryRef);
509 setFlag(Store);
510 setFlag(GlobalSegment);
511 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
512
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
514 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
515 {
516 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
517
518 // --- description from .arch file ---
519 // Untyped buffer store 3 dwords with format conversion.
520 void
525
526 void
528 GPUDynInstPtr gpuDynInst)
529 {
530 } // initiateAcc
531
532 void
537 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods ---
538
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
540 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
541 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
542 {
543 setFlag(MemoryRef);
544 setFlag(Store);
545 setFlag(GlobalSegment);
546 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
547
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
549 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
550 {
551 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
552
553 // --- description from .arch file ---
554 // Untyped buffer store 4 dwords with format conversion.
555 void
560
561 void
563 GPUDynInstPtr gpuDynInst)
564 {
565 } // initiateAcc
566
567 void
572 // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods ---
573
574 Inst_MUBUF__BUFFER_LOAD_UBYTE
575 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
576 : Inst_MUBUF(iFmt, "buffer_load_ubyte")
577 {
578 setFlag(MemoryRef);
579 setFlag(Load);
580 if (instData.LDS) {
581 setFlag(GroupSegment);
582 } else {
583 setFlag(GlobalSegment);
584 }
585 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
586
588 {
589 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
590
591 // --- description from .arch file ---
592 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
593 void
595 {
596 Wavefront *wf = gpuDynInst->wavefront();
597
598 if (gpuDynInst->exec_mask.none()) {
599 wf->decVMemInstsIssued();
600 return;
601 }
602
603 gpuDynInst->execUnitId = wf->execUnitId;
604 gpuDynInst->latency.init(gpuDynInst->computeUnit());
605 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
606
607 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
608 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
609 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
611
612 rsrcDesc.read();
613 offset.read();
614
615 int inst_offset = instData.OFFSET;
616
617 if (!instData.IDXEN && !instData.OFFEN) {
620 addr0, addr1, rsrcDesc, offset, inst_offset);
621 } else if (!instData.IDXEN && instData.OFFEN) {
622 addr0.read();
625 addr0, addr1, rsrcDesc, offset, inst_offset);
626 } else if (instData.IDXEN && !instData.OFFEN) {
627 addr0.read();
630 addr1, addr0, rsrcDesc, offset, inst_offset);
631 } else {
632 addr0.read();
633 addr1.read();
636 addr1, addr0, rsrcDesc, offset, inst_offset);
637 }
638
639 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
640 } // execute
641
642 void
644 {
645 initMemRead<VecElemU8>(gpuDynInst);
646 } // initiateAcc
647
648 void
650 {
651 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
652
653 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
654 if (gpuDynInst->exec_mask[lane]) {
655 if (!oobMask[lane]) {
656 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
657 gpuDynInst->d_data))[lane]);
658 } else {
659 vdst[lane] = 0;
660 }
661 }
662 }
663
664 vdst.write();
665 } // execute
666
667 // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods ---
668
669 Inst_MUBUF__BUFFER_LOAD_SBYTE
670 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
671 : Inst_MUBUF(iFmt, "buffer_load_sbyte")
672 {
673 setFlag(MemoryRef);
674 setFlag(Load);
675 setFlag(GlobalSegment);
676 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
677
679 {
680 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
681
682 // --- description from .arch file ---
683 // Untyped buffer load signed byte (sign extend to VGPR destination).
684 void
689
690 void
692 {
693 } // initiateAcc
694
695 void
699 // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods ---
700
701 Inst_MUBUF__BUFFER_LOAD_USHORT
702 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
703 : Inst_MUBUF(iFmt, "buffer_load_ushort")
704 {
705 setFlag(MemoryRef);
706 setFlag(Load);
707 if (instData.LDS) {
708 setFlag(GroupSegment);
709 } else {
710 setFlag(GlobalSegment);
711 }
712 } // Inst_MUBUF__BUFFER_LOAD_USHORT
713
715 {
716 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
717
718 // --- description from .arch file ---
719 // Untyped buffer load unsigned short (zero extend to VGPR destination).
720 void
722 {
723 Wavefront *wf = gpuDynInst->wavefront();
724
725 if (gpuDynInst->exec_mask.none()) {
726 wf->decVMemInstsIssued();
727 return;
728 }
729
730 gpuDynInst->execUnitId = wf->execUnitId;
731 gpuDynInst->latency.init(gpuDynInst->computeUnit());
732 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
733
734 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
735 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
736 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
738
739 rsrcDesc.read();
740 offset.read();
741
742 int inst_offset = instData.OFFSET;
743
744 if (!instData.IDXEN && !instData.OFFEN) {
747 addr0, addr1, rsrcDesc, offset, inst_offset);
748 } else if (!instData.IDXEN && instData.OFFEN) {
749 addr0.read();
752 addr0, addr1, rsrcDesc, offset, inst_offset);
753 } else if (instData.IDXEN && !instData.OFFEN) {
754 addr0.read();
757 addr1, addr0, rsrcDesc, offset, inst_offset);
758 } else {
759 addr0.read();
760 addr1.read();
763 addr1, addr0, rsrcDesc, offset, inst_offset);
764 }
765
766 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
767 } // execute
768
769 void
771 {
772 initMemRead<VecElemU16>(gpuDynInst);
773 } // initiateAcc
774
775 void
777 {
778 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
779
780 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
781 if (gpuDynInst->exec_mask[lane]) {
782 if (!oobMask[lane]) {
783 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
784 gpuDynInst->d_data))[lane]);
785 } else {
786 vdst[lane] = 0;
787 }
788 }
789 }
790
791 vdst.write();
792 } // execute
793
794 // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods ---
795
796 Inst_MUBUF__BUFFER_LOAD_SSHORT
797 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
798 : Inst_MUBUF(iFmt, "buffer_load_sshort")
799 {
800 setFlag(MemoryRef);
801 setFlag(Load);
802 setFlag(GlobalSegment);
803 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
804
806 {
807 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
808
809 // --- description from .arch file ---
810 // Untyped buffer load signed short (sign extend to VGPR destination).
811 void
816
817 void
819 {
820 } // initiateAcc
821
822 void
826 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16 class methods ---
827
828 Inst_MUBUF__BUFFER_LOAD_SHORT_D16
829 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(InFmt_MUBUF *iFmt)
830 : Inst_MUBUF(iFmt, "buffer_load_short_d16")
831 {
832 setFlag(MemoryRef);
833 setFlag(Load);
834 if (instData.LDS) {
835 setFlag(GroupSegment);
836 warn("BUFFER.LDS not implemented!");
837 } else {
838 setFlag(GlobalSegment);
839 }
840 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16
841
843 {
844 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16
845
846 // --- description from .arch file ---
847 // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16;
848 // // RETURN_DATA[31:16] is preserved.
849 void
851 {
852 Wavefront *wf = gpuDynInst->wavefront();
853
854 if (gpuDynInst->exec_mask.none()) {
855 wf->decVMemInstsIssued();
856 return;
857 }
858
859 gpuDynInst->execUnitId = wf->execUnitId;
860 gpuDynInst->latency.init(gpuDynInst->computeUnit());
861 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
862
863 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
864 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
865 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
867
868 rsrcDesc.read();
869 offset.read();
870
871 int inst_offset = instData.OFFSET;
872
873 // For explanation of buffer addressing, see section 9.1.5 in:
874 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
875 // instruction-set-architectures/
876 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
877 if (!instData.IDXEN && !instData.OFFEN) {
880 addr0, addr1, rsrcDesc, offset, inst_offset);
881 } else if (!instData.IDXEN && instData.OFFEN) {
882 addr0.read();
885 addr0, addr1, rsrcDesc, offset, inst_offset);
886 } else if (instData.IDXEN && !instData.OFFEN) {
887 addr0.read();
890 addr1, addr0, rsrcDesc, offset, inst_offset);
891 } else {
892 addr0.read();
893 addr1.read();
896 addr1, addr0, rsrcDesc, offset, inst_offset);
897 }
898
899 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
900 } // execute
901
902 void
904 {
905 initMemRead<VecElemU16>(gpuDynInst);
906 } // initiateAcc
907
908 void
910 {
911 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
912
913 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
914 if (gpuDynInst->exec_mask[lane]) {
915 if (!oobMask[lane]) {
916 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
917 gpuDynInst->d_data))[lane];
918 replaceBits(vdst[lane], 15, 0, buf_val);
919 } else {
920 vdst[lane] = 0;
921 }
922 }
923 }
924
925 vdst.write();
926 } // completeAcc
927 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI class methods ---
928
929 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
930 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(InFmt_MUBUF *iFmt)
931 : Inst_MUBUF(iFmt, "buffer_load_short_d16_hi")
932 {
933 setFlag(MemoryRef);
934 setFlag(Load);
935 if (instData.LDS) {
936 setFlag(GroupSegment);
937 warn("BUFFER.LDS not implemented!");
938 } else {
939 setFlag(GlobalSegment);
940 }
941 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
942
945 {
946 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
947
948 // --- description from .arch file ---
949 // VDATA[31 : 16].b16 = MEM[ADDR].b16;
950 // // VDATA[15:0] is preserved.
951 void
953 {
954 Wavefront *wf = gpuDynInst->wavefront();
955
956 if (gpuDynInst->exec_mask.none()) {
957 wf->decVMemInstsIssued();
958 return;
959 }
960
961 gpuDynInst->execUnitId = wf->execUnitId;
962 gpuDynInst->latency.init(gpuDynInst->computeUnit());
963 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
964
965 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
966 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
967 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
969
970 rsrcDesc.read();
971 offset.read();
972
973 int inst_offset = instData.OFFSET;
974
975 // For explanation of buffer addressing, see section 9.1.5 in:
976 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
977 // instruction-set-architectures/
978 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
979 if (!instData.IDXEN && !instData.OFFEN) {
982 addr0, addr1, rsrcDesc, offset, inst_offset);
983 } else if (!instData.IDXEN && instData.OFFEN) {
984 addr0.read();
987 addr0, addr1, rsrcDesc, offset, inst_offset);
988 } else if (instData.IDXEN && !instData.OFFEN) {
989 addr0.read();
992 addr1, addr0, rsrcDesc, offset, inst_offset);
993 } else {
994 addr0.read();
995 addr1.read();
998 addr1, addr0, rsrcDesc, offset, inst_offset);
999 }
1000
1001 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1002 } // execute
1003
1004 void
1006 {
1007 initMemRead<VecElemU16>(gpuDynInst);
1008 } // initiateAcc
1009
1010 void
1012 {
1013 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1014
1015 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1016 if (gpuDynInst->exec_mask[lane]) {
1017 if (!oobMask[lane]) {
1018 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
1019 gpuDynInst->d_data))[lane];
1020 replaceBits(vdst[lane], 31, 16, buf_val);
1021 } else {
1022 vdst[lane] = 0;
1023 }
1024 }
1025 }
1026
1027 vdst.write();
1028 } // completeAcc
1029 // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods ---
1030
1031 Inst_MUBUF__BUFFER_LOAD_DWORD
1032 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
1033 : Inst_MUBUF(iFmt, "buffer_load_dword")
1034 {
1035 setFlag(MemoryRef);
1036 setFlag(Load);
1037 if (instData.LDS) {
1038 setFlag(GroupSegment);
1039 } else {
1040 setFlag(GlobalSegment);
1041 }
1042 } // Inst_MUBUF__BUFFER_LOAD_DWORD
1043
1045 {
1046 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
1047
1048 // --- description from .arch file ---
1049 // Untyped buffer load dword.
1050 void
1052 {
1053 Wavefront *wf = gpuDynInst->wavefront();
1054
1055 if (gpuDynInst->exec_mask.none()) {
1056 wf->decVMemInstsIssued();
1057 return;
1058 }
1059
1060 gpuDynInst->execUnitId = wf->execUnitId;
1061 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1062 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1063
1064 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1065 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1066 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1068
1069 rsrcDesc.read();
1070 offset.read();
1071
1072 int inst_offset = instData.OFFSET;
1073
1074 // For explanation of buffer addressing, see section 9.1.5 in:
1075 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
1076 // instruction-set-architectures/
1077 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
1078 if (!instData.IDXEN && !instData.OFFEN) {
1081 addr0, addr1, rsrcDesc, offset, inst_offset);
1082 } else if (!instData.IDXEN && instData.OFFEN) {
1083 addr0.read();
1086 addr0, addr1, rsrcDesc, offset, inst_offset);
1087 } else if (instData.IDXEN && !instData.OFFEN) {
1088 addr0.read();
1091 addr1, addr0, rsrcDesc, offset, inst_offset);
1092 } else {
1093 addr0.read();
1094 addr1.read();
1097 addr1, addr0, rsrcDesc, offset, inst_offset);
1098 }
1099
1100 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1101 } // execute
1102
1103 void
1105 {
1106 initMemRead<VecElemU32>(gpuDynInst);
1107 } // initiateAcc
1108
1109 void
1111 {
1112 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1113
1114 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1115 if (gpuDynInst->exec_mask[lane]) {
1116 if (!oobMask[lane]) {
1117 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1118 gpuDynInst->d_data))[lane];
1119 } else {
1120 vdst[lane] = 0;
1121 }
1122 }
1123 }
1124
1125 vdst.write();
1126 } // completeAcc
1127 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods ---
1128
1129 Inst_MUBUF__BUFFER_LOAD_DWORDX2
1130 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
1131 : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
1132 {
1133 setFlag(MemoryRef);
1134 setFlag(Load);
1135 if (instData.LDS) {
1136 setFlag(GroupSegment);
1137 } else {
1138 setFlag(GlobalSegment);
1139 }
1140 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
1141
1143 {
1144 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
1145
1146 // --- description from .arch file ---
1147 // Untyped buffer load 2 dwords.
1148 void
1150 {
1151 Wavefront *wf = gpuDynInst->wavefront();
1152
1153 if (gpuDynInst->exec_mask.none()) {
1154 wf->decVMemInstsIssued();
1155 return;
1156 }
1157
1158 gpuDynInst->execUnitId = wf->execUnitId;
1159 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1160 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1161
1162 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1163 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1164 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1166
1167 rsrcDesc.read();
1168 offset.read();
1169
1170 int inst_offset = instData.OFFSET;
1171
1172 if (!instData.IDXEN && !instData.OFFEN) {
1175 addr0, addr1, rsrcDesc, offset, inst_offset);
1176 } else if (!instData.IDXEN && instData.OFFEN) {
1177 addr0.read();
1180 addr0, addr1, rsrcDesc, offset, inst_offset);
1181 } else if (instData.IDXEN && !instData.OFFEN) {
1182 addr0.read();
1185 addr1, addr0, rsrcDesc, offset, inst_offset);
1186 } else {
1187 addr0.read();
1188 addr1.read();
1191 addr1, addr0, rsrcDesc, offset, inst_offset);
1192 }
1193
1194 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1195 } // execute
1196
1197 void
1199 {
1200 initMemRead<2>(gpuDynInst);
1201 } // initiateAcc
1202
1203 void
1205 {
1206 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1207 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1208
1209 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1210 if (gpuDynInst->exec_mask[lane]) {
1211 if (!oobMask[lane]) {
1212 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1213 gpuDynInst->d_data))[lane * 2];
1214 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1215 gpuDynInst->d_data))[lane * 2 + 1];
1216 } else {
1217 vdst0[lane] = 0;
1218 vdst1[lane] = 0;
1219 }
1220 }
1221 }
1222
1223 vdst0.write();
1224 vdst1.write();
1225 } // completeAcc
1226 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods ---
1227
1228 Inst_MUBUF__BUFFER_LOAD_DWORDX3
1229 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
1230 : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
1231 {
1232 setFlag(MemoryRef);
1233 setFlag(Load);
1234 if (instData.LDS) {
1235 setFlag(GroupSegment);
1236 } else {
1237 setFlag(GlobalSegment);
1238 }
1239 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
1240
1242 {
1243 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
1244
1245 // --- description from .arch file ---
1246 // Untyped buffer load 3 dwords.
1247 void
1249 {
1250 Wavefront *wf = gpuDynInst->wavefront();
1251
1252 if (gpuDynInst->exec_mask.none()) {
1253 wf->decVMemInstsIssued();
1254 return;
1255 }
1256
1257 gpuDynInst->execUnitId = wf->execUnitId;
1258 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1259 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1260
1261 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1262 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1263 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1265
1266 rsrcDesc.read();
1267 offset.read();
1268
1269 int inst_offset = instData.OFFSET;
1270
1271 if (!instData.IDXEN && !instData.OFFEN) {
1274 addr0, addr1, rsrcDesc, offset, inst_offset);
1275 } else if (!instData.IDXEN && instData.OFFEN) {
1276 addr0.read();
1279 addr0, addr1, rsrcDesc, offset, inst_offset);
1280 } else if (instData.IDXEN && !instData.OFFEN) {
1281 addr0.read();
1284 addr1, addr0, rsrcDesc, offset, inst_offset);
1285 } else {
1286 addr0.read();
1287 addr1.read();
1290 addr1, addr0, rsrcDesc, offset, inst_offset);
1291 }
1292
1293 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1294 } // execute
1295
1296 void
1298 {
1299 initMemRead<3>(gpuDynInst);
1300 } // initiateAcc
1301
1302 void
1304 {
1305 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1306 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1307 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1308
1309 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1310 if (gpuDynInst->exec_mask[lane]) {
1311 if (!oobMask[lane]) {
1312 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1313 gpuDynInst->d_data))[lane * 3];
1314 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1315 gpuDynInst->d_data))[lane * 3 + 1];
1316 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1317 gpuDynInst->d_data))[lane * 3 + 2];
1318 } else {
1319 vdst0[lane] = 0;
1320 vdst1[lane] = 0;
1321 vdst2[lane] = 0;
1322 }
1323 }
1324 }
1325
1326 vdst0.write();
1327 vdst1.write();
1328 vdst2.write();
1329 } // completeAcc
1330 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods ---
1331
1332 Inst_MUBUF__BUFFER_LOAD_DWORDX4
1333 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
1334 : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
1335 {
1336 setFlag(MemoryRef);
1337 setFlag(Load);
1338 if (instData.LDS) {
1339 setFlag(GroupSegment);
1340 } else {
1341 setFlag(GlobalSegment);
1342 }
1343 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
1344
1346 {
1347 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
1348
1349 // --- description from .arch file ---
1350 // Untyped buffer load 4 dwords.
1351 void
1353 {
1354 Wavefront *wf = gpuDynInst->wavefront();
1355
1356 if (gpuDynInst->exec_mask.none()) {
1357 wf->decVMemInstsIssued();
1358 return;
1359 }
1360
1361 gpuDynInst->execUnitId = wf->execUnitId;
1362 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1363 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1364
1365 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1366 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1367 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1369
1370 rsrcDesc.read();
1371 offset.read();
1372
1373 int inst_offset = instData.OFFSET;
1374
1375 if (!instData.IDXEN && !instData.OFFEN) {
1378 addr0, addr1, rsrcDesc, offset, inst_offset);
1379 } else if (!instData.IDXEN && instData.OFFEN) {
1380 addr0.read();
1383 addr0, addr1, rsrcDesc, offset, inst_offset);
1384 } else if (instData.IDXEN && !instData.OFFEN) {
1385 addr0.read();
1388 addr1, addr0, rsrcDesc, offset, inst_offset);
1389 } else {
1390 addr0.read();
1391 addr1.read();
1394 addr1, addr0, rsrcDesc, offset, inst_offset);
1395 }
1396
1397 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1398 } // execute
1399
1400 void
1402 {
1403 initMemRead<4>(gpuDynInst);
1404 } // initiateAcc
1405
1406 void
1408 {
1409 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1410 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1411 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1412 VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
1413
1414 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1415 if (gpuDynInst->exec_mask[lane]) {
1416 if (!oobMask[lane]) {
1417 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1418 gpuDynInst->d_data))[lane * 4];
1419 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1420 gpuDynInst->d_data))[lane * 4 + 1];
1421 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1422 gpuDynInst->d_data))[lane * 4 + 2];
1423 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
1424 gpuDynInst->d_data))[lane * 4 + 3];
1425 } else {
1426 vdst0[lane] = 0;
1427 vdst1[lane] = 0;
1428 vdst2[lane] = 0;
1429 vdst3[lane] = 0;
1430 }
1431 }
1432 }
1433
1434 vdst0.write();
1435 vdst1.write();
1436 vdst2.write();
1437 vdst3.write();
1438 } // completeAcc
1439 // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods ---
1440
1441 Inst_MUBUF__BUFFER_STORE_BYTE
1442 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
1443 : Inst_MUBUF(iFmt, "buffer_store_byte")
1444 {
1445 setFlag(MemoryRef);
1446 setFlag(Store);
1447 if (instData.LDS) {
1448 setFlag(GroupSegment);
1449 } else {
1450 setFlag(GlobalSegment);
1451 }
1452 } // Inst_MUBUF__BUFFER_STORE_BYTE
1453
1455 {
1456 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
1457
1458 // --- description from .arch file ---
1459 // Untyped buffer store byte.
1460 void
1462 {
1463 Wavefront *wf = gpuDynInst->wavefront();
1464
1465 if (gpuDynInst->exec_mask.none()) {
1466 wf->decVMemInstsIssued();
1467 wf->decExpInstsIssued();
1468 return;
1469 }
1470
1471 gpuDynInst->execUnitId = wf->execUnitId;
1472 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1473 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1474
1475 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1476 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1477 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1479 ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
1480
1481 rsrcDesc.read();
1482 offset.read();
1483 data.read();
1484
1485 int inst_offset = instData.OFFSET;
1486
1487 if (!instData.IDXEN && !instData.OFFEN) {
1490 addr0, addr1, rsrcDesc, offset, inst_offset);
1491 } else if (!instData.IDXEN && instData.OFFEN) {
1492 addr0.read();
1495 addr0, addr1, rsrcDesc, offset, inst_offset);
1496 } else if (instData.IDXEN && !instData.OFFEN) {
1497 addr0.read();
1500 addr1, addr0, rsrcDesc, offset, inst_offset);
1501 } else {
1502 addr0.read();
1503 addr1.read();
1506 addr1, addr0, rsrcDesc, offset, inst_offset);
1507 }
1508
1509 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1510
1511 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1512 if (gpuDynInst->exec_mask[lane]) {
1513 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
1514 = data[lane];
1515 }
1516 }
1517 } // execute
1518
1519 void
1521 {
1522 initMemWrite<VecElemI8>(gpuDynInst);
1523 } // initiateAcc
1524
1525 void
1527 {
1528 } // execute
1529 // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods ---
1530
1531 Inst_MUBUF__BUFFER_STORE_SHORT
1532 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
1533 : Inst_MUBUF(iFmt, "buffer_store_short")
1534 {
1535 setFlag(MemoryRef);
1536 setFlag(Store);
1537 if (instData.LDS) {
1538 setFlag(GroupSegment);
1539 } else {
1540 setFlag(GlobalSegment);
1541 }
1542 } // Inst_MUBUF__BUFFER_STORE_SHORT
1543
1545 {
1546 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
1547
1548 // --- description from .arch file ---
1549 // Untyped buffer store short.
1550 void
1552 {
1553 Wavefront *wf = gpuDynInst->wavefront();
1554
1555 if (gpuDynInst->exec_mask.none()) {
1556 wf->decVMemInstsIssued();
1557 wf->decExpInstsIssued();
1558 return;
1559 }
1560
1561 gpuDynInst->execUnitId = wf->execUnitId;
1562 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1563 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1564
1565 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1566 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1567 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1569 ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
1570
1571 rsrcDesc.read();
1572 offset.read();
1573 data.read();
1574
1575 int inst_offset = instData.OFFSET;
1576
1577 if (!instData.IDXEN && !instData.OFFEN) {
1580 addr0, addr1, rsrcDesc, offset, inst_offset);
1581 } else if (!instData.IDXEN && instData.OFFEN) {
1582 addr0.read();
1585 addr0, addr1, rsrcDesc, offset, inst_offset);
1586 } else if (instData.IDXEN && !instData.OFFEN) {
1587 addr0.read();
1590 addr1, addr0, rsrcDesc, offset, inst_offset);
1591 } else {
1592 addr0.read();
1593 addr1.read();
1596 addr1, addr0, rsrcDesc, offset, inst_offset);
1597 }
1598
1599 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1600
1601 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1602 if (gpuDynInst->exec_mask[lane]) {
1603 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
1604 = data[lane];
1605 }
1606 }
1607 } // execute
1608
1609 void
1611 {
1612 initMemWrite<VecElemI16>(gpuDynInst);
1613 } // initiateAcc
1614
1615 void
1619 // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods ---
1620
1623 : Inst_MUBUF(iFmt, "buffer_store_dword")
1624 {
1625 setFlag(MemoryRef);
1626 setFlag(Store);
1627 if (instData.LDS) {
1628 setFlag(GroupSegment);
1629 } else {
1630 setFlag(GlobalSegment);
1631 }
1632 } // Inst_MUBUF__BUFFER_STORE_DWORD
1633
1635 {
1636 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
1637
1638 // --- description from .arch file ---
1639 // Untyped buffer store dword.
1640 void
1642 {
1643 Wavefront *wf = gpuDynInst->wavefront();
1644
1645 if (gpuDynInst->exec_mask.none()) {
1646 wf->decVMemInstsIssued();
1647 wf->decExpInstsIssued();
1648 return;
1649 }
1650
1651 gpuDynInst->execUnitId = wf->execUnitId;
1652 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1653 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1654
1655 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1656 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1657 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1659 ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
1660
1661 rsrcDesc.read();
1662 offset.read();
1663 data.read();
1664
1665 int inst_offset = instData.OFFSET;
1666
1667 if (!instData.IDXEN && !instData.OFFEN) {
1670 addr0, addr1, rsrcDesc, offset, inst_offset);
1671 } else if (!instData.IDXEN && instData.OFFEN) {
1672 addr0.read();
1675 addr0, addr1, rsrcDesc, offset, inst_offset);
1676 } else if (instData.IDXEN && !instData.OFFEN) {
1677 addr0.read();
1680 addr1, addr0, rsrcDesc, offset, inst_offset);
1681 } else {
1682 addr0.read();
1683 addr1.read();
1686 addr1, addr0, rsrcDesc, offset, inst_offset);
1687 }
1688
1689 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1690
1691 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1692 if (gpuDynInst->exec_mask[lane]) {
1693 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
1694 = data[lane];
1695 }
1696 }
1697 } // execute
1698
1699 void
1701 {
1702 initMemWrite<VecElemU32>(gpuDynInst);
1703 } // initiateAcc
1704
1705 void
1707 {
1708 } // completeAcc
1709 // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods ---
1710
1711 Inst_MUBUF__BUFFER_STORE_DWORDX2
1712 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
1713 : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
1714 {
1715 setFlag(MemoryRef);
1716 setFlag(Store);
1717 if (instData.LDS) {
1718 setFlag(GroupSegment);
1719 } else {
1720 setFlag(GlobalSegment);
1721 }
1722 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
1723
1725 {
1726 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
1727
1728 // --- description from .arch file ---
1729 // Untyped buffer store 2 dwords.
1730 void
1732 {
1733 Wavefront *wf = gpuDynInst->wavefront();
1734
1735 if (gpuDynInst->exec_mask.none()) {
1736 wf->decVMemInstsIssued();
1737 wf->decExpInstsIssued();
1738 return;
1739 }
1740
1741 gpuDynInst->execUnitId = wf->execUnitId;
1742 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1743 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1744
1745 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1746 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1747 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1749 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1750 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1751
1752 rsrcDesc.read();
1753 offset.read();
1754 data0.read();
1755 data1.read();
1756
1757 int inst_offset = instData.OFFSET;
1758
1759 if (!instData.IDXEN && !instData.OFFEN) {
1762 addr0, addr1, rsrcDesc, offset, inst_offset);
1763 } else if (!instData.IDXEN && instData.OFFEN) {
1764 addr0.read();
1767 addr0, addr1, rsrcDesc, offset, inst_offset);
1768 } else if (instData.IDXEN && !instData.OFFEN) {
1769 addr0.read();
1772 addr1, addr0, rsrcDesc, offset, inst_offset);
1773 } else {
1774 addr0.read();
1775 addr1.read();
1778 addr1, addr0, rsrcDesc, offset, inst_offset);
1779 }
1780
1781 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1782
1783 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1784 if (gpuDynInst->exec_mask[lane]) {
1785 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
1786 = data0[lane];
1787 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
1788 = data1[lane];
1789 }
1790 }
1791 } // execute
1792
1793 void
1795 {
1796 initMemWrite<2>(gpuDynInst);
1797 } // initiateAcc
1798
1799 void
1801 {
1802 } // completeAcc
1803 // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods ---
1804
1805 Inst_MUBUF__BUFFER_STORE_DWORDX3
1806 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
1807 : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
1808 {
1809 setFlag(MemoryRef);
1810 setFlag(Store);
1811 if (instData.LDS) {
1812 setFlag(GroupSegment);
1813 } else {
1814 setFlag(GlobalSegment);
1815 }
1816 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
1817
1819 {
1820 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
1821
1822 // --- description from .arch file ---
1823 // Untyped buffer store 3 dwords.
1824 void
1826 {
1827 Wavefront *wf = gpuDynInst->wavefront();
1828
1829 if (gpuDynInst->exec_mask.none()) {
1830 wf->decVMemInstsIssued();
1831 wf->decExpInstsIssued();
1832 return;
1833 }
1834
1835 gpuDynInst->execUnitId = wf->execUnitId;
1836 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1837 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1838
1839 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1840 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1841 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1843 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1844 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1845 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1846
1847 rsrcDesc.read();
1848 offset.read();
1849 data0.read();
1850 data1.read();
1851 data2.read();
1852
1853 int inst_offset = instData.OFFSET;
1854
1855 if (!instData.IDXEN && !instData.OFFEN) {
1858 addr0, addr1, rsrcDesc, offset, inst_offset);
1859 } else if (!instData.IDXEN && instData.OFFEN) {
1860 addr0.read();
1863 addr0, addr1, rsrcDesc, offset, inst_offset);
1864 } else if (instData.IDXEN && !instData.OFFEN) {
1865 addr0.read();
1868 addr1, addr0, rsrcDesc, offset, inst_offset);
1869 } else {
1870 addr0.read();
1871 addr1.read();
1874 addr1, addr0, rsrcDesc, offset, inst_offset);
1875 }
1876
1877 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1878
1879 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1880 if (gpuDynInst->exec_mask[lane]) {
1881 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
1882 = data0[lane];
1883 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
1884 = data1[lane];
1885 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
1886 = data2[lane];
1887 }
1888 }
1889 } // execute
1890
1891 void
1893 {
1894 initMemWrite<3>(gpuDynInst);
1895 } // initiateAcc
1896
1897 void
1899 {
1900 } // completeAcc
1901 // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods ---
1902
1903 Inst_MUBUF__BUFFER_STORE_DWORDX4
1904 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
1905 : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
1906 {
1907 setFlag(MemoryRef);
1908 setFlag(Store);
1909 if (instData.LDS) {
1910 setFlag(GroupSegment);
1911 } else {
1912 setFlag(GlobalSegment);
1913 }
1914 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
1915
1917 {
1918 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
1919
1920 // --- description from .arch file ---
1921 // Untyped buffer store 4 dwords.
1922 void
1924 {
1925 Wavefront *wf = gpuDynInst->wavefront();
1926
1927 if (gpuDynInst->exec_mask.none()) {
1928 wf->decVMemInstsIssued();
1929 wf->decExpInstsIssued();
1930 return;
1931 }
1932
1933 gpuDynInst->execUnitId = wf->execUnitId;
1934 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1935 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1936
1937 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1938 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1939 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1941 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1942 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1943 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1944 ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
1945
1946 rsrcDesc.read();
1947 offset.read();
1948 data0.read();
1949 data1.read();
1950 data2.read();
1951 data3.read();
1952
1953 int inst_offset = instData.OFFSET;
1954
1955 if (!instData.IDXEN && !instData.OFFEN) {
1958 addr0, addr1, rsrcDesc, offset, inst_offset);
1959 } else if (!instData.IDXEN && instData.OFFEN) {
1960 addr0.read();
1963 addr0, addr1, rsrcDesc, offset, inst_offset);
1964 } else if (instData.IDXEN && !instData.OFFEN) {
1965 addr0.read();
1968 addr1, addr0, rsrcDesc, offset, inst_offset);
1969 } else {
1970 addr0.read();
1971 addr1.read();
1974 addr1, addr0, rsrcDesc, offset, inst_offset);
1975 }
1976
1977 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1978
1979 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1980 if (gpuDynInst->exec_mask[lane]) {
1981 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
1982 = data0[lane];
1983 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
1984 = data1[lane];
1985 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
1986 = data2[lane];
1987 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
1988 = data3[lane];
1989 }
1990 }
1991 } // execute
1992
1993 void
1995 {
1996 initMemWrite<4>(gpuDynInst);
1997 } // initiateAcc
1998
1999 void
2001 {
2002 } // completeAcc
2003 // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods ---
2004
2005 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2006 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
2007 : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
2008 {
2009 setFlag(Store);
2010 setFlag(GlobalSegment);
2011 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2012
2014 {
2015 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2016
2017 // --- description from .arch file ---
2018 // Store one DWORD from LDS memory to system memory without utilizing
2019 // VGPRs.
2020 void
2025 // --- Inst_MUBUF__BUFFER_WBINVL1 class methods ---
2026
2028 : Inst_MUBUF(iFmt, "buffer_wbinvl1")
2029 {
2030 setFlag(MemoryRef);
2031 setFlag(GPUStaticInst::MemSync);
2032 setFlag(GlobalSegment);
2033 setFlag(MemSync);
2034 } // Inst_MUBUF__BUFFER_WBINVL1
2035
2037 {
2038 } // ~Inst_MUBUF__BUFFER_WBINVL1
2039
2040 // --- description from .arch file ---
2041 // Write back and invalidate the shader L1.
2042 // Always returns ACK to shader.
2043 void
2045 {
2046 Wavefront *wf = gpuDynInst->wavefront();
2047
2048 if (gpuDynInst->exec_mask.none()) {
2049 wf->decVMemInstsIssued();
2050 return;
2051 }
2052
2053 gpuDynInst->execUnitId = wf->execUnitId;
2054 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2055 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2056
2057 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2058 gpuDynInst->computeUnit()->globalMemoryPipe.
2059 issueRequest(gpuDynInst);
2060 } else {
2061 fatal("Unsupported scope for flat instruction.\n");
2062 }
2063 } // execute
2064
2065 void
2067 {
2068 // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we
2069 // need to precisely communicate the writeback-invalidate operation to
2070 // the new gfx10 coalescer rather than sending AcquireRelease markers.
2071 // The SICoalescer would need to be updated appropriately as well.
2072 injectGlobalMemFence(gpuDynInst);
2073 } // initiateAcc
2074 void
2076 {
2077 } // completeAcc
2078 // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods ---
2079
2080 Inst_MUBUF__BUFFER_WBINVL1_VOL
2081 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
2082 : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
2083 // This instruction is same as buffer_wbinvl1 instruction except this
2084 // instruction only invalidate L1 shader line with MTYPE SC and GC.
2085 // Since Hermes L1 (TCP) do not differentiate between its cache lines,
2086 // this instruction currently behaves (and implemented ) exactly like
2087 // buffer_wbinvl1 instruction.
2088 setFlag(MemoryRef);
2089 setFlag(GPUStaticInst::MemSync);
2090 setFlag(GlobalSegment);
2091 setFlag(MemSync);
2092 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
2093
2095 {
2096 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
2097
2098 // --- description from .arch file ---
2099 // Write back and invalidate the shader L1 only for lines that are marked
2100 // --- volatile.
2101 // Always returns ACK to shader.
2102 void
2104 {
2105 Wavefront *wf = gpuDynInst->wavefront();
2106
2107 if (gpuDynInst->exec_mask.none()) {
2108 wf->decVMemInstsIssued();
2109 return;
2110 }
2111
2112 gpuDynInst->execUnitId = wf->execUnitId;
2113 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2114 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2115
2116 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2117 gpuDynInst->computeUnit()->globalMemoryPipe.
2118 issueRequest(gpuDynInst);
2119 } else {
2120 fatal("Unsupported scope for flat instruction.\n");
2121 }
2122 } // execute
2123 void
2125 {
2126 injectGlobalMemFence(gpuDynInst);
2127 } // initiateAcc
2128 void
2130 {
2131 } // completeAcc
2132 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods ---
2133
2134 Inst_MUBUF__BUFFER_ATOMIC_SWAP
2135 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
2136 : Inst_MUBUF(iFmt, "buffer_atomic_swap")
2137 {
2138 setFlag(AtomicExch);
2139 if (instData.GLC) {
2140 setFlag(AtomicReturn);
2141 } else {
2142 setFlag(AtomicNoReturn);
2143 }
2144 setFlag(MemoryRef);
2145 setFlag(GlobalSegment);
2146 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
2147
2149 {
2150 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
2151
2152 // --- description from .arch file ---
2153 // 32b:
2154 // tmp = MEM[ADDR];
2155 // MEM[ADDR] = DATA;
2156 // RETURN_DATA = tmp.
2157 void
2162 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods ---
2163
2164 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2165 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
2166 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
2167 {
2168 setFlag(AtomicCAS);
2169 if (instData.GLC) {
2170 setFlag(AtomicReturn);
2171 } else {
2172 setFlag(AtomicNoReturn);
2173 }
2174 setFlag(MemoryRef);
2175 setFlag(GlobalSegment);
2176 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2177
2179 {
2180 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2181
2182 // --- description from .arch file ---
2183 // 32b:
2184 // tmp = MEM[ADDR];
2185 // src = DATA[0];
2186 // cmp = DATA[1];
2187 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2188 // RETURN_DATA[0] = tmp.
2189 void
2191 {
2192 Wavefront *wf = gpuDynInst->wavefront();
2193
2194 if (gpuDynInst->exec_mask.none()) {
2195 wf->decVMemInstsIssued();
2196 return;
2197 }
2198
2199 gpuDynInst->execUnitId = wf->execUnitId;
2200 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2201 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2202
2203 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
2204 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
2205 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
2207 ConstVecOperandU32 src(gpuDynInst, extData.VDATA);
2208 ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1);
2209
2210 rsrcDesc.read();
2211 offset.read();
2212 src.read();
2213 cmp.read();
2214
2215 int inst_offset = instData.OFFSET;
2216
2217 if (!instData.IDXEN && !instData.OFFEN) {
2220 addr0, addr1, rsrcDesc, offset, inst_offset);
2221 } else if (!instData.IDXEN && instData.OFFEN) {
2222 addr0.read();
2225 addr0, addr1, rsrcDesc, offset, inst_offset);
2226 } else if (instData.IDXEN && !instData.OFFEN) {
2227 addr0.read();
2230 addr1, addr0, rsrcDesc, offset, inst_offset);
2231 } else {
2232 addr0.read();
2233 addr1.read();
2236 addr1, addr0, rsrcDesc, offset, inst_offset);
2237 }
2238
2239 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2240 if (gpuDynInst->exec_mask[lane]) {
2241 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
2242 = src[lane];
2243 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
2244 = cmp[lane];
2245 }
2246 }
2247
2248 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2249 } // execute
2250
2251 void
2253 {
2254 initAtomicAccess<VecElemU32>(gpuDynInst);
2255 } // initiateAcc
2256
2257 void
2259 {
2260 if (isAtomicRet()) {
2261 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
2262
2263 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2264 if (gpuDynInst->exec_mask[lane]) {
2265 vdst[lane] = (reinterpret_cast<VecElemU32*>(
2266 gpuDynInst->d_data))[lane];
2267 }
2268 }
2269
2270 vdst.write();
2271 }
2272 } // completeAcc
2273 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods ---
2274
2275 Inst_MUBUF__BUFFER_ATOMIC_ADD
2276 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
2277 : Inst_MUBUF(iFmt, "buffer_atomic_add")
2278 {
2279 setFlag(AtomicAdd);
2280 if (instData.GLC) {
2281 setFlag(AtomicReturn);
2282 } else {
2283 setFlag(AtomicNoReturn);
2284 }
2285 setFlag(MemoryRef);
2286 setFlag(GlobalSegment);
2287 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
2288
2290 {
2291 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
2292
2293 // --- description from .arch file ---
2294 // 32b:
2295 // tmp = MEM[ADDR];
2296 // MEM[ADDR] += DATA;
2297 // RETURN_DATA = tmp.
2298 void
2303 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods ---
2304
2305 Inst_MUBUF__BUFFER_ATOMIC_SUB
2306 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
2307 : Inst_MUBUF(iFmt, "buffer_atomic_sub")
2308 {
2309 setFlag(AtomicSub);
2310 if (instData.GLC) {
2311 setFlag(AtomicReturn);
2312 } else {
2313 setFlag(AtomicNoReturn);
2314 }
2315 setFlag(MemoryRef);
2316 setFlag(GlobalSegment);
2317 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
2318
2320 {
2321 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
2322
2323 // --- description from .arch file ---
2324 // 32b:
2325 // tmp = MEM[ADDR];
2326 // MEM[ADDR] -= DATA;
2327 // RETURN_DATA = tmp.
2328 void
2333 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods ---
2334
2335 Inst_MUBUF__BUFFER_ATOMIC_SMIN
2336 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
2337 : Inst_MUBUF(iFmt, "buffer_atomic_smin")
2338 {
2339 setFlag(AtomicMin);
2340 if (instData.GLC) {
2341 setFlag(AtomicReturn);
2342 } else {
2343 setFlag(AtomicNoReturn);
2344 }
2345 setFlag(MemoryRef);
2346 setFlag(GlobalSegment);
2347 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
2348
2350 {
2351 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
2352
2353 // --- description from .arch file ---
2354 // 32b:
2355 // tmp = MEM[ADDR];
2356 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
2357 // RETURN_DATA = tmp.
2358 void
2363 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods ---
2364
2365 Inst_MUBUF__BUFFER_ATOMIC_UMIN
2366 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
2367 : Inst_MUBUF(iFmt, "buffer_atomic_umin")
2368 {
2369 setFlag(AtomicMin);
2370 if (instData.GLC) {
2371 setFlag(AtomicReturn);
2372 } else {
2373 setFlag(AtomicNoReturn);
2374 }
2375 setFlag(MemoryRef);
2376 setFlag(GlobalSegment);
2377 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
2378
2380 {
2381 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
2382
2383 // --- description from .arch file ---
2384 // 32b:
2385 // tmp = MEM[ADDR];
2386 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
2387 // RETURN_DATA = tmp.
2388 void
2393 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods ---
2394
2395 Inst_MUBUF__BUFFER_ATOMIC_SMAX
2396 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
2397 : Inst_MUBUF(iFmt, "buffer_atomic_smax")
2398 {
2399 setFlag(AtomicMax);
2400 if (instData.GLC) {
2401 setFlag(AtomicReturn);
2402 } else {
2403 setFlag(AtomicNoReturn);
2404 }
2405 setFlag(MemoryRef);
2406 setFlag(GlobalSegment);
2407 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
2408
2410 {
2411 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
2412
2413 // --- description from .arch file ---
2414 // 32b:
2415 // tmp = MEM[ADDR];
2416 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
2417 // RETURN_DATA = tmp.
2418 void
2423 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods ---
2424
2425 Inst_MUBUF__BUFFER_ATOMIC_UMAX
2426 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
2427 : Inst_MUBUF(iFmt, "buffer_atomic_umax")
2428 {
2429 setFlag(AtomicMax);
2430 if (instData.GLC) {
2431 setFlag(AtomicReturn);
2432 } else {
2433 setFlag(AtomicNoReturn);
2434 }
2435 setFlag(MemoryRef);
2436 setFlag(GlobalSegment);
2437 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
2438
2440 {
2441 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
2442
2443 // --- description from .arch file ---
2444 // 32b:
2445 // tmp = MEM[ADDR];
2446 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
2447 // RETURN_DATA = tmp.
2448 void
2453 // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods ---
2454
2455 Inst_MUBUF__BUFFER_ATOMIC_AND
2456 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
2457 : Inst_MUBUF(iFmt, "buffer_atomic_and")
2458 {
2459 setFlag(AtomicAnd);
2460 if (instData.GLC) {
2461 setFlag(AtomicReturn);
2462 } else {
2463 setFlag(AtomicNoReturn);
2464 }
2465 setFlag(MemoryRef);
2466 setFlag(GlobalSegment);
2467 } // Inst_MUBUF__BUFFER_ATOMIC_AND
2468
2470 {
2471 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
2472
2473 // --- description from .arch file ---
2474 // 32b:
2475 // tmp = MEM[ADDR];
2476 // MEM[ADDR] &= DATA;
2477 // RETURN_DATA = tmp.
2478 void
2483 // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods ---
2484
2485 Inst_MUBUF__BUFFER_ATOMIC_OR
2486 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
2487 : Inst_MUBUF(iFmt, "buffer_atomic_or")
2488 {
2489 setFlag(AtomicOr);
2490 if (instData.GLC) {
2491 setFlag(AtomicReturn);
2492 } else {
2493 setFlag(AtomicNoReturn);
2494 }
2495 setFlag(MemoryRef);
2496 setFlag(GlobalSegment);
2497 } // Inst_MUBUF__BUFFER_ATOMIC_OR
2498
2500 {
2501 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
2502
2503 // --- description from .arch file ---
2504 // 32b:
2505 // tmp = MEM[ADDR];
2506 // MEM[ADDR] |= DATA;
2507 // RETURN_DATA = tmp.
2508 void
2510 {
2512 } // execute
2513 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods ---
2514
2515 Inst_MUBUF__BUFFER_ATOMIC_XOR
2516 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
2517 : Inst_MUBUF(iFmt, "buffer_atomic_xor")
2518 {
2519 setFlag(AtomicXor);
2520 if (instData.GLC) {
2521 setFlag(AtomicReturn);
2522 } else {
2523 setFlag(AtomicNoReturn);
2524 }
2525 setFlag(MemoryRef);
2526 setFlag(GlobalSegment);
2527 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
2528
2530 {
2531 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
2532
2533 // --- description from .arch file ---
2534 // 32b:
2535 // tmp = MEM[ADDR];
2536 // MEM[ADDR] ^= DATA;
2537 // RETURN_DATA = tmp.
2538 void
2543 // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods ---
2544
2545 Inst_MUBUF__BUFFER_ATOMIC_INC
2546 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
2547 : Inst_MUBUF(iFmt, "buffer_atomic_inc")
2548 {
2549 setFlag(AtomicInc);
2550 if (instData.GLC) {
2551 setFlag(AtomicReturn);
2552 } else {
2553 setFlag(AtomicNoReturn);
2554 }
2555 setFlag(MemoryRef);
2556 setFlag(GlobalSegment);
2557 } // Inst_MUBUF__BUFFER_ATOMIC_INC
2558
2560 {
2561 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
2562
2563 // --- description from .arch file ---
2564 // 32b:
2565 // tmp = MEM[ADDR];
2566 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
2567 // RETURN_DATA = tmp.
2568 void
2573 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods ---
2574
2575 Inst_MUBUF__BUFFER_ATOMIC_DEC
2576 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
2577 : Inst_MUBUF(iFmt, "buffer_atomic_dec")
2578 {
2579 setFlag(AtomicDec);
2580 if (instData.GLC) {
2581 setFlag(AtomicReturn);
2582 } else {
2583 setFlag(AtomicNoReturn);
2584 }
2585 setFlag(MemoryRef);
2586 setFlag(GlobalSegment);
2587 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
2588
2590 {
2591 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
2592
2593 // --- description from .arch file ---
2594 // 32b:
2595 // tmp = MEM[ADDR];
2596 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
2597 // (unsigned compare); RETURN_DATA = tmp.
2598 void
2603 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods ---
2604
2605 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2606 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
2607 : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
2608 {
2609 setFlag(AtomicExch);
2610 if (instData.GLC) {
2611 setFlag(AtomicReturn);
2612 } else {
2613 setFlag(AtomicNoReturn);
2614 }
2615 setFlag(MemoryRef);
2616 setFlag(GlobalSegment);
2617 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2618
2620 {
2621 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2622
2623 // --- description from .arch file ---
2624 // 64b:
2625 // tmp = MEM[ADDR];
2626 // MEM[ADDR] = DATA[0:1];
2627 // RETURN_DATA[0:1] = tmp.
2628 void
2633 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods ---
2634
2635 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2636 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
2637 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
2638 {
2639 setFlag(AtomicCAS);
2640 if (instData.GLC) {
2641 setFlag(AtomicReturn);
2642 } else {
2643 setFlag(AtomicNoReturn);
2644 }
2645 setFlag(MemoryRef);
2646 setFlag(GlobalSegment);
2647 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2648
2649 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2650 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
2651 {
2652 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2653
2654 // --- description from .arch file ---
2655 // 64b:
2656 // tmp = MEM[ADDR];
2657 // src = DATA[0:1];
2658 // cmp = DATA[2:3];
2659 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2660 // RETURN_DATA[0:1] = tmp.
2661 void
2666 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods ---
2667
2668 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2669 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
2670 : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
2671 {
2672 setFlag(AtomicAdd);
2673 if (instData.GLC) {
2674 setFlag(AtomicReturn);
2675 } else {
2676 setFlag(AtomicNoReturn);
2677 }
2678 setFlag(MemoryRef);
2679 setFlag(GlobalSegment);
2680 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2681
2683 {
2684 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2685
2686 // --- description from .arch file ---
2687 // 64b:
2688 // tmp = MEM[ADDR];
2689 // MEM[ADDR] += DATA[0:1];
2690 // RETURN_DATA[0:1] = tmp.
2691 void
2696 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods ---
2697
2698 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2699 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
2700 : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
2701 {
2702 setFlag(AtomicSub);
2703 if (instData.GLC) {
2704 setFlag(AtomicReturn);
2705 } else {
2706 setFlag(AtomicNoReturn);
2707 }
2708 setFlag(MemoryRef);
2709 setFlag(GlobalSegment);
2710 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2711
2713 {
2714 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2715
2716 // --- description from .arch file ---
2717 // 64b:
2718 // tmp = MEM[ADDR];
2719 // MEM[ADDR] -= DATA[0:1];
2720 // RETURN_DATA[0:1] = tmp.
2721 void
2726 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods ---
2727
2728 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2729 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
2730 : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
2731 {
2732 setFlag(AtomicMin);
2733 if (instData.GLC) {
2734 setFlag(AtomicReturn);
2735 } else {
2736 setFlag(AtomicNoReturn);
2737 }
2738 setFlag(MemoryRef);
2739 setFlag(GlobalSegment);
2740 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2741
2743 {
2744 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2745
2746 // --- description from .arch file ---
2747 // 64b:
2748 // tmp = MEM[ADDR];
2749 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
2750 // RETURN_DATA[0:1] = tmp.
2751 void
2756 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods ---
2757
2758 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2759 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
2760 : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
2761 {
2762 setFlag(AtomicMin);
2763 if (instData.GLC) {
2764 setFlag(AtomicReturn);
2765 } else {
2766 setFlag(AtomicNoReturn);
2767 }
2768 setFlag(MemoryRef);
2769 setFlag(GlobalSegment);
2770 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2771
2773 {
2774 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2775
2776 // --- description from .arch file ---
2777 // 64b:
2778 // tmp = MEM[ADDR];
2779 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
2780 // RETURN_DATA[0:1] = tmp.
2781 void
2786 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods ---
2787
2788 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2789 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
2790 : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
2791 {
2792 setFlag(AtomicMax);
2793 if (instData.GLC) {
2794 setFlag(AtomicReturn);
2795 } else {
2796 setFlag(AtomicNoReturn);
2797 }
2798 setFlag(MemoryRef);
2799 setFlag(GlobalSegment);
2800 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2801
2803 {
2804 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2805
2806 // --- description from .arch file ---
2807 // 64b:
2808 // tmp = MEM[ADDR];
2809 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2810 // RETURN_DATA[0:1] = tmp.
2811 void
2816 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods ---
2817
2818 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2819 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
2820 : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
2821 {
2822 setFlag(AtomicMax);
2823 if (instData.GLC) {
2824 setFlag(AtomicReturn);
2825 } else {
2826 setFlag(AtomicNoReturn);
2827 }
2828 setFlag(MemoryRef);
2829 setFlag(GlobalSegment);
2830 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2831
2833 {
2834 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2835
2836 // --- description from .arch file ---
2837 // 64b:
2838 // tmp = MEM[ADDR];
2839 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2840 // RETURN_DATA[0:1] = tmp.
2841 void
2846 // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods ---
2847
2848 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2849 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
2850 : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
2851 {
2852 setFlag(AtomicAnd);
2853 if (instData.GLC) {
2854 setFlag(AtomicReturn);
2855 } else {
2856 setFlag(AtomicNoReturn);
2857 }
2858 setFlag(MemoryRef);
2859 setFlag(GlobalSegment);
2860 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2861
2863 {
2864 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2865
2866 // --- description from .arch file ---
2867 // 64b:
2868 // tmp = MEM[ADDR];
2869 // MEM[ADDR] &= DATA[0:1];
2870 // RETURN_DATA[0:1] = tmp.
2871 void
2876 // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods ---
2877
2878 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2879 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
2880 : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
2881 {
2882 setFlag(AtomicOr);
2883 if (instData.GLC) {
2884 setFlag(AtomicReturn);
2885 } else {
2886 setFlag(AtomicNoReturn);
2887 }
2888 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2889
2891 {
2892 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2893
2894 // --- description from .arch file ---
2895 // 64b:
2896 // tmp = MEM[ADDR];
2897 // MEM[ADDR] |= DATA[0:1];
2898 // RETURN_DATA[0:1] = tmp.
2899 void
2904 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods ---
2905
2906 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2907 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
2908 : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
2909 {
2910 setFlag(AtomicXor);
2911 if (instData.GLC) {
2912 setFlag(AtomicReturn);
2913 } else {
2914 setFlag(AtomicNoReturn);
2915 }
2916 setFlag(MemoryRef);
2917 setFlag(GlobalSegment);
2918 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2919
2921 {
2922 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2923
2924 // --- description from .arch file ---
2925 // 64b:
2926 // tmp = MEM[ADDR];
2927 // MEM[ADDR] ^= DATA[0:1];
2928 // RETURN_DATA[0:1] = tmp.
2929 void
2934 // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods ---
2935
2936 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2937 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
2938 : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
2939 {
2940 setFlag(AtomicInc);
2941 if (instData.GLC) {
2942 setFlag(AtomicReturn);
2943 } else {
2944 setFlag(AtomicNoReturn);
2945 }
2946 setFlag(MemoryRef);
2947 setFlag(GlobalSegment);
2948 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2949
2951 {
2952 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2953
2954 // --- description from .arch file ---
2955 // 64b:
2956 // tmp = MEM[ADDR];
2957 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
2958 // RETURN_DATA[0:1] = tmp.
2959 void
2964 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods ---
2965
2966 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
2967 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
2968 : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
2969 {
2970 setFlag(AtomicDec);
2971 if (instData.GLC) {
2972 setFlag(AtomicReturn);
2973 } else {
2974 setFlag(AtomicNoReturn);
2975 }
2976 setFlag(MemoryRef);
2977 setFlag(GlobalSegment);
2978 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
2979
2981 {
2982 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
2983
2984 // --- description from .arch file ---
2985 // 64b:
2986 // tmp = MEM[ADDR];
2987 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
2988 // (unsigned compare);
2989 // RETURN_DATA[0:1] = tmp.
2990 void
2995} // namespace VegaISA
2996} // namespace gem5
const char data[]
void setFlag(Flags flag)
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2692
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2299
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2872
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2479
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2258
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2190
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2252
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2991
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2599
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2960
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2569
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2900
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2509
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2812
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2419
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2752
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2359
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2722
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2329
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2629
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2158
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2842
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2449
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2782
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2389
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2930
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2539
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1204
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1198
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1149
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1303
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1248
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1297
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1401
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1407
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1352
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1051
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1104
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1110
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:321
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:326
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:159
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:164
void execute(GPUDynInstPtr) override
Definition mubuf.cc:121
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:132
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:127
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:95
void execute(GPUDynInstPtr) override
Definition mubuf.cc:89
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:100
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:63
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:68
void execute(GPUDynInstPtr) override
Definition mubuf.cc:57
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:691
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:696
void execute(GPUDynInstPtr) override
Definition mubuf.cc:685
void execute(GPUDynInstPtr) override
Definition mubuf.cc:850
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:903
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:909
void execute(GPUDynInstPtr) override
Definition mubuf.cc:812
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:818
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:823
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:649
void execute(GPUDynInstPtr) override
Definition mubuf.cc:594
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:643
void execute(GPUDynInstPtr) override
Definition mubuf.cc:721
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:776
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:770
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1461
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1526
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1520
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1794
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1731
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1800
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1898
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1892
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1825
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1923
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2000
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1994
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1641
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1706
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1700
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:293
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:288
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:260
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:255
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:228
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:223
void execute(GPUDynInstPtr) override
Definition mubuf.cc:217
void execute(GPUDynInstPtr) override
Definition mubuf.cc:185
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:196
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:191
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1616
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1610
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1551
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2103
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2129
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2124
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2066
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2044
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2075
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
void read() override
read from the vrf.
Definition operand.hh:147
void write() override
write to the vrf.
Definition operand.hh:199
void decExpInstsIssued()
void decVMemInstsIssued()
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define warn(...)
Definition logging.hh:256
Bitfield< 23, 0 > offset
Definition types.hh:144
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:828
uint16_t VecElemU16
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
Definition operand.hh:805
uint32_t VecElemU32
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Tue Jun 18 2024 16:23:47 for gem5 by doxygen 1.11.0