gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
mubuf.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods ---
40
41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
42 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
43 : Inst_MUBUF(iFmt, "buffer_load_format_x")
44 {
45 setFlag(MemoryRef);
47 setFlag(GlobalSegment);
48 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
49
51 {
52 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
53
54 // --- description from .arch file ---
55 // Untyped buffer load 1 dword with format conversion.
56 void
61
62 void
66
67 void
71 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods ---
72
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
74 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
75 : Inst_MUBUF(iFmt, "buffer_load_format_xy")
76 {
77 setFlag(MemoryRef);
79 setFlag(GlobalSegment);
80 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
81
83 {
84 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
85
86 // --- description from .arch file ---
87 // Untyped buffer load 2 dwords with format conversion.
88 void
93
94 void
98
99 void
103 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods ---
104
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
106 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
107 : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
108 {
109 setFlag(MemoryRef);
110 setFlag(Load);
111 setFlag(GlobalSegment);
112 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
113
115 {
116 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
117
118 // --- description from .arch file ---
119 // Untyped buffer load 3 dwords with format conversion.
120 void
125
126 void
130
131 void
135 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods ---
136
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
138 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
139 : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
140 {
141 setFlag(MemoryRef);
142 setFlag(Load);
143 setFlag(GlobalSegment);
144 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
145
147 {
148 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
149
150 // --- description from .arch file ---
151 // Untyped buffer load 4 dwords with format conversion.
152 void
157
158 void
162
163 void
167 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods ---
168
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X
170 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
171 : Inst_MUBUF(iFmt, "buffer_store_format_x")
172 {
173 setFlag(MemoryRef);
174 setFlag(Store);
175 setFlag(GlobalSegment);
176 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
177
179 {
180 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
181
182 // --- description from .arch file ---
183 // Untyped buffer store 1 dword with format conversion.
184 void
189
190 void
192 {
193 } // initiateAcc
194
195 void
199 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods ---
200
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
202 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
203 : Inst_MUBUF(iFmt, "buffer_store_format_xy")
204 {
205 setFlag(MemoryRef);
206 setFlag(Store);
207 setFlag(GlobalSegment);
208 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
209
211 {
212 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
213
214 // --- description from .arch file ---
215 // Untyped buffer store 2 dwords with format conversion.
216 void
221
222 void
226
227 void
231 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods ---
232
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
234 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
235 : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
236 {
237 setFlag(MemoryRef);
238 setFlag(Store);
239 setFlag(GlobalSegment);
240 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
241
243 {
244 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
245
246 // --- description from .arch file ---
247 // Untyped buffer store 3 dwords with format conversion.
248 void
253
254 void
258
259 void
263 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods ---
264
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
266 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
267 : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
268 {
269 setFlag(MemoryRef);
270 setFlag(Store);
271 setFlag(GlobalSegment);
272 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
273
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
275 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
276 {
277 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
278
279 // --- description from .arch file ---
280 // Untyped buffer store 4 dwords with format conversion.
281 void
286
287 void
291
292 void
296 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods ---
297
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
299 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
300 : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
301 {
302 setFlag(MemoryRef);
303 setFlag(Load);
304 setFlag(GlobalSegment);
305 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
306
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
308 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
309 {
310 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
311
312 // --- description from .arch file ---
313 // Untyped buffer load 1 dword with format conversion.
314 void
319
320 void
324
325 void
329 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods ---
330
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
332 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
333 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
334 {
335 setFlag(MemoryRef);
336 setFlag(Load);
337 setFlag(GlobalSegment);
338 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
339
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
341 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
342 {
343 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
344
345 // --- description from .arch file ---
346 // Untyped buffer load 2 dwords with format conversion.
347 void
352
353 void
355 GPUDynInstPtr gpuDynInst)
356 {
357 } // initiateAcc
358
359 void
364 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods ---
365
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
367 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
368 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
369 {
370 setFlag(MemoryRef);
371 setFlag(Load);
372 setFlag(GlobalSegment);
373 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
374
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
376 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
377 {
378 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
379
380 // --- description from .arch file ---
381 // Untyped buffer load 3 dwords with format conversion.
382 void
387
388 void
390 GPUDynInstPtr gpuDynInst)
391 {
392 } // initiateAcc
393
394 void
399 // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods ---
400
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
402 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
403 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
404 {
405 setFlag(MemoryRef);
406 setFlag(Load);
407 setFlag(GlobalSegment);
408 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
409
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
411 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
412 {
413 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
414
415 // --- description from .arch file ---
416 // Untyped buffer load 4 dwords with format conversion.
417 void
422
423 void
425 GPUDynInstPtr gpuDynInst)
426 {
427 } // initiateAcc
428
429 void
434 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods ---
435
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
437 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
438 : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
439 {
440 setFlag(Store);
441 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
442
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
444 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
445 {
446 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
447
448 // --- description from .arch file ---
449 // Untyped buffer store 1 dword with format conversion.
450 void
455
456 void
458 GPUDynInstPtr gpuDynInst)
459 {
460 } // initiateAcc
461
462 void
467 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods ---
468
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
470 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
471 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
472 {
473 setFlag(MemoryRef);
474 setFlag(Store);
475 setFlag(GlobalSegment);
476 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
477
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
479 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
480 {
481 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
482
483 // --- description from .arch file ---
484 // Untyped buffer store 2 dwords with format conversion.
485 void
490
491 void
493 GPUDynInstPtr gpuDynInst)
494 {
495 } // initiateAcc
496
497 void
502 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods ---
503
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
505 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
506 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
507 {
508 setFlag(MemoryRef);
509 setFlag(Store);
510 setFlag(GlobalSegment);
511 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
512
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
514 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
515 {
516 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
517
518 // --- description from .arch file ---
519 // Untyped buffer store 3 dwords with format conversion.
520 void
525
526 void
528 GPUDynInstPtr gpuDynInst)
529 {
530 } // initiateAcc
531
532 void
537 // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods ---
538
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
540 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
541 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
542 {
543 setFlag(MemoryRef);
544 setFlag(Store);
545 setFlag(GlobalSegment);
546 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
547
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
549 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
550 {
551 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
552
553 // --- description from .arch file ---
554 // Untyped buffer store 4 dwords with format conversion.
555 void
560
561 void
563 GPUDynInstPtr gpuDynInst)
564 {
565 } // initiateAcc
566
567 void
572 // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods ---
573
574 Inst_MUBUF__BUFFER_LOAD_UBYTE
575 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
576 : Inst_MUBUF(iFmt, "buffer_load_ubyte")
577 {
578 setFlag(MemoryRef);
579 setFlag(Load);
580 if (instData.LDS) {
581 setFlag(GroupSegment);
582 } else {
583 setFlag(GlobalSegment);
584 }
585 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
586
588 {
589 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
590
591 // --- description from .arch file ---
592 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
593 void
595 {
596 Wavefront *wf = gpuDynInst->wavefront();
597
598 if (gpuDynInst->exec_mask.none()) {
599 wf->decVMemInstsIssued();
600 wf->untrackVMemInst(gpuDynInst);
601 return;
602 }
603
604 gpuDynInst->execUnitId = wf->execUnitId;
605 gpuDynInst->latency.init(gpuDynInst->computeUnit());
606 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
607
608 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
609 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
610 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
611 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
612
613 rsrcDesc.read();
614 offset.read();
615
616 int inst_offset = instData.OFFSET;
617
618 if (!instData.IDXEN && !instData.OFFEN) {
621 addr0, addr1, rsrcDesc, offset, inst_offset);
622 } else if (!instData.IDXEN && instData.OFFEN) {
623 addr0.read();
626 addr0, addr1, rsrcDesc, offset, inst_offset);
627 } else if (instData.IDXEN && !instData.OFFEN) {
628 addr0.read();
631 addr1, addr0, rsrcDesc, offset, inst_offset);
632 } else {
633 addr0.read();
634 addr1.read();
637 addr1, addr0, rsrcDesc, offset, inst_offset);
638 }
639
640 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
641 } // execute
642
643 void
645 {
646 initMemRead<VecElemU8>(gpuDynInst);
647 } // initiateAcc
648
649 void
651 {
652 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
653
654 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
655 if (gpuDynInst->exec_mask[lane]) {
656 if (!oobMask[lane]) {
657 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
658 gpuDynInst->d_data))[lane]);
659 } else {
660 vdst[lane] = 0;
661 }
662 }
663 }
664
665 vdst.write();
666 } // execute
667
668 // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods ---
669
670 Inst_MUBUF__BUFFER_LOAD_SBYTE
671 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
672 : Inst_MUBUF(iFmt, "buffer_load_sbyte")
673 {
674 setFlag(MemoryRef);
675 setFlag(Load);
676 setFlag(GlobalSegment);
677 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
678
680 {
681 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
682
683 // --- description from .arch file ---
684 // Untyped buffer load signed byte (sign extend to VGPR destination).
685 void
690
691 void
693 {
694 } // initiateAcc
695
696 void
700 // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods ---
701
702 Inst_MUBUF__BUFFER_LOAD_USHORT
703 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
704 : Inst_MUBUF(iFmt, "buffer_load_ushort")
705 {
706 setFlag(MemoryRef);
707 setFlag(Load);
708 if (instData.LDS) {
709 setFlag(GroupSegment);
710 } else {
711 setFlag(GlobalSegment);
712 }
713 } // Inst_MUBUF__BUFFER_LOAD_USHORT
714
716 {
717 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
718
719 // --- description from .arch file ---
720 // Untyped buffer load unsigned short (zero extend to VGPR destination).
721 void
723 {
724 Wavefront *wf = gpuDynInst->wavefront();
725
726 if (gpuDynInst->exec_mask.none()) {
727 wf->decVMemInstsIssued();
728 wf->untrackVMemInst(gpuDynInst);
729 return;
730 }
731
732 gpuDynInst->execUnitId = wf->execUnitId;
733 gpuDynInst->latency.init(gpuDynInst->computeUnit());
734 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
735
736 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
737 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
738 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
739 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
740
741 rsrcDesc.read();
742 offset.read();
743
744 int inst_offset = instData.OFFSET;
745
746 if (!instData.IDXEN && !instData.OFFEN) {
749 addr0, addr1, rsrcDesc, offset, inst_offset);
750 } else if (!instData.IDXEN && instData.OFFEN) {
751 addr0.read();
754 addr0, addr1, rsrcDesc, offset, inst_offset);
755 } else if (instData.IDXEN && !instData.OFFEN) {
756 addr0.read();
759 addr1, addr0, rsrcDesc, offset, inst_offset);
760 } else {
761 addr0.read();
762 addr1.read();
765 addr1, addr0, rsrcDesc, offset, inst_offset);
766 }
767
768 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
769 } // execute
770
771 void
773 {
774 initMemRead<VecElemU16>(gpuDynInst);
775 } // initiateAcc
776
777 void
779 {
780 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
781
782 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
783 if (gpuDynInst->exec_mask[lane]) {
784 if (!oobMask[lane]) {
785 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
786 gpuDynInst->d_data))[lane]);
787 } else {
788 vdst[lane] = 0;
789 }
790 }
791 }
792
793 vdst.write();
794 } // execute
795
796 // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods ---
797
798 Inst_MUBUF__BUFFER_LOAD_SSHORT
799 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
800 : Inst_MUBUF(iFmt, "buffer_load_sshort")
801 {
802 setFlag(MemoryRef);
803 setFlag(Load);
804 setFlag(GlobalSegment);
805 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
806
808 {
809 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
810
811 // --- description from .arch file ---
812 // Untyped buffer load signed short (sign extend to VGPR destination).
813 void
818
819 void
821 {
822 } // initiateAcc
823
824 void
828 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16 class methods ---
829
830 Inst_MUBUF__BUFFER_LOAD_SHORT_D16
831 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(InFmt_MUBUF *iFmt)
832 : Inst_MUBUF(iFmt, "buffer_load_short_d16")
833 {
834 setFlag(MemoryRef);
835 setFlag(Load);
836 if (instData.LDS) {
837 setFlag(GroupSegment);
838 warn("BUFFER.LDS not implemented!");
839 } else {
840 setFlag(GlobalSegment);
841 }
842 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16
843
845 {
846 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16
847
848 // --- description from .arch file ---
849 // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16;
850 // // RETURN_DATA[31:16] is preserved.
851 void
853 {
854 Wavefront *wf = gpuDynInst->wavefront();
855
856 if (gpuDynInst->exec_mask.none()) {
857 wf->decVMemInstsIssued();
858 wf->untrackVMemInst(gpuDynInst);
859 return;
860 }
861
862 gpuDynInst->execUnitId = wf->execUnitId;
863 gpuDynInst->latency.init(gpuDynInst->computeUnit());
864 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
865
866 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
867 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
868 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
869 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
870
871 rsrcDesc.read();
872 offset.read();
873
874 int inst_offset = instData.OFFSET;
875
876 // For explanation of buffer addressing, see section 9.1.5 in:
877 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
878 // instruction-set-architectures/
879 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
880 if (!instData.IDXEN && !instData.OFFEN) {
883 addr0, addr1, rsrcDesc, offset, inst_offset);
884 } else if (!instData.IDXEN && instData.OFFEN) {
885 addr0.read();
888 addr0, addr1, rsrcDesc, offset, inst_offset);
889 } else if (instData.IDXEN && !instData.OFFEN) {
890 addr0.read();
893 addr1, addr0, rsrcDesc, offset, inst_offset);
894 } else {
895 addr0.read();
896 addr1.read();
899 addr1, addr0, rsrcDesc, offset, inst_offset);
900 }
901
902 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
903 } // execute
904
905 void
907 {
908 initMemRead<VecElemU16>(gpuDynInst);
909 } // initiateAcc
910
911 void
913 {
914 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
915
916 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
917 if (gpuDynInst->exec_mask[lane]) {
918 if (!oobMask[lane]) {
919 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
920 gpuDynInst->d_data))[lane];
921 replaceBits(vdst[lane], 15, 0, buf_val);
922 } else {
923 vdst[lane] = 0;
924 }
925 }
926 }
927
928 vdst.write();
929 } // completeAcc
930 // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI class methods ---
931
932 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
933 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(InFmt_MUBUF *iFmt)
934 : Inst_MUBUF(iFmt, "buffer_load_short_d16_hi")
935 {
936 setFlag(MemoryRef);
937 setFlag(Load);
938 if (instData.LDS) {
939 setFlag(GroupSegment);
940 warn("BUFFER.LDS not implemented!");
941 } else {
942 setFlag(GlobalSegment);
943 }
944 } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
945
948 {
949 } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
950
951 // --- description from .arch file ---
952 // VDATA[31 : 16].b16 = MEM[ADDR].b16;
953 // // VDATA[15:0] is preserved.
954 void
956 {
957 Wavefront *wf = gpuDynInst->wavefront();
958
959 if (gpuDynInst->exec_mask.none()) {
960 wf->decVMemInstsIssued();
961 wf->untrackVMemInst(gpuDynInst);
962 return;
963 }
964
965 gpuDynInst->execUnitId = wf->execUnitId;
966 gpuDynInst->latency.init(gpuDynInst->computeUnit());
967 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
968
969 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
970 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
971 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
972 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
973
974 rsrcDesc.read();
975 offset.read();
976
977 int inst_offset = instData.OFFSET;
978
979 // For explanation of buffer addressing, see section 9.1.5 in:
980 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
981 // instruction-set-architectures/
982 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
983 if (!instData.IDXEN && !instData.OFFEN) {
986 addr0, addr1, rsrcDesc, offset, inst_offset);
987 } else if (!instData.IDXEN && instData.OFFEN) {
988 addr0.read();
991 addr0, addr1, rsrcDesc, offset, inst_offset);
992 } else if (instData.IDXEN && !instData.OFFEN) {
993 addr0.read();
996 addr1, addr0, rsrcDesc, offset, inst_offset);
997 } else {
998 addr0.read();
999 addr1.read();
1002 addr1, addr0, rsrcDesc, offset, inst_offset);
1003 }
1004
1005 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1006 } // execute
1007
1008 void
1010 {
1011 initMemRead<VecElemU16>(gpuDynInst);
1012 } // initiateAcc
1013
1014 void
1016 {
1017 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1018
1019 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1020 if (gpuDynInst->exec_mask[lane]) {
1021 if (!oobMask[lane]) {
1022 VecElemU16 buf_val = (reinterpret_cast<VecElemU16*>(
1023 gpuDynInst->d_data))[lane];
1024 replaceBits(vdst[lane], 31, 16, buf_val);
1025 } else {
1026 vdst[lane] = 0;
1027 }
1028 }
1029 }
1030
1031 vdst.write();
1032 } // completeAcc
1033 // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods ---
1034
1035 Inst_MUBUF__BUFFER_LOAD_DWORD
1036 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
1037 : Inst_MUBUF(iFmt, "buffer_load_dword")
1038 {
1039 setFlag(MemoryRef);
1040 setFlag(Load);
1041 if (instData.LDS) {
1042 setFlag(GroupSegment);
1043 } else {
1044 setFlag(GlobalSegment);
1045 }
1046 } // Inst_MUBUF__BUFFER_LOAD_DWORD
1047
1049 {
1050 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
1051
1052 // --- description from .arch file ---
1053 // Untyped buffer load dword.
1054 void
1056 {
1057 Wavefront *wf = gpuDynInst->wavefront();
1058
1059 if (gpuDynInst->exec_mask.none()) {
1060 wf->decVMemInstsIssued();
1061 wf->untrackVMemInst(gpuDynInst);
1062 return;
1063 }
1064
1065 gpuDynInst->execUnitId = wf->execUnitId;
1066 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1067 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1068
1069 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1070 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1071 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1072 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1073
1074 rsrcDesc.read();
1075 offset.read();
1076
1077 int inst_offset = instData.OFFSET;
1078
1079 // For explanation of buffer addressing, see section 9.1.5 in:
1080 // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/
1081 // instruction-set-architectures/
1082 // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf
1083 if (!instData.IDXEN && !instData.OFFEN) {
1086 addr0, addr1, rsrcDesc, offset, inst_offset);
1087 } else if (!instData.IDXEN && instData.OFFEN) {
1088 addr0.read();
1091 addr0, addr1, rsrcDesc, offset, inst_offset);
1092 } else if (instData.IDXEN && !instData.OFFEN) {
1093 addr0.read();
1096 addr1, addr0, rsrcDesc, offset, inst_offset);
1097 } else {
1098 addr0.read();
1099 addr1.read();
1102 addr1, addr0, rsrcDesc, offset, inst_offset);
1103 }
1104
1105 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1106 } // execute
1107
1108 void
1110 {
1111 initMemRead<VecElemU32>(gpuDynInst);
1112 } // initiateAcc
1113
1114 void
1116 {
1117 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
1118
1119 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1120 if (gpuDynInst->exec_mask[lane]) {
1121 if (!oobMask[lane]) {
1122 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1123 gpuDynInst->d_data))[lane];
1124 } else {
1125 vdst[lane] = 0;
1126 }
1127 }
1128 }
1129
1130 vdst.write();
1131 } // completeAcc
1132 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods ---
1133
1134 Inst_MUBUF__BUFFER_LOAD_DWORDX2
1135 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
1136 : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
1137 {
1138 setFlag(MemoryRef);
1139 setFlag(Load);
1140 if (instData.LDS) {
1141 setFlag(GroupSegment);
1142 } else {
1143 setFlag(GlobalSegment);
1144 }
1145 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
1146
1148 {
1149 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
1150
1151 // --- description from .arch file ---
1152 // Untyped buffer load 2 dwords.
1153 void
1155 {
1156 Wavefront *wf = gpuDynInst->wavefront();
1157
1158 if (gpuDynInst->exec_mask.none()) {
1159 wf->decVMemInstsIssued();
1160 wf->untrackVMemInst(gpuDynInst);
1161 return;
1162 }
1163
1164 gpuDynInst->execUnitId = wf->execUnitId;
1165 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1166 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1167
1168 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1169 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1170 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1171 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1172
1173 rsrcDesc.read();
1174 offset.read();
1175
1176 int inst_offset = instData.OFFSET;
1177
1178 if (!instData.IDXEN && !instData.OFFEN) {
1181 addr0, addr1, rsrcDesc, offset, inst_offset);
1182 } else if (!instData.IDXEN && instData.OFFEN) {
1183 addr0.read();
1186 addr0, addr1, rsrcDesc, offset, inst_offset);
1187 } else if (instData.IDXEN && !instData.OFFEN) {
1188 addr0.read();
1191 addr1, addr0, rsrcDesc, offset, inst_offset);
1192 } else {
1193 addr0.read();
1194 addr1.read();
1197 addr1, addr0, rsrcDesc, offset, inst_offset);
1198 }
1199
1200 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1201 } // execute
1202
1203 void
1205 {
1206 initMemRead<2>(gpuDynInst);
1207 } // initiateAcc
1208
1209 void
1211 {
1212 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1213 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1214
1215 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1216 if (gpuDynInst->exec_mask[lane]) {
1217 if (!oobMask[lane]) {
1218 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1219 gpuDynInst->d_data))[lane * 2];
1220 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1221 gpuDynInst->d_data))[lane * 2 + 1];
1222 } else {
1223 vdst0[lane] = 0;
1224 vdst1[lane] = 0;
1225 }
1226 }
1227 }
1228
1229 vdst0.write();
1230 vdst1.write();
1231 } // completeAcc
1232 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods ---
1233
1234 Inst_MUBUF__BUFFER_LOAD_DWORDX3
1235 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
1236 : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
1237 {
1238 setFlag(MemoryRef);
1239 setFlag(Load);
1240 if (instData.LDS) {
1241 setFlag(GroupSegment);
1242 } else {
1243 setFlag(GlobalSegment);
1244 }
1245 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
1246
1248 {
1249 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
1250
1251 // --- description from .arch file ---
1252 // Untyped buffer load 3 dwords.
1253 void
1255 {
1256 Wavefront *wf = gpuDynInst->wavefront();
1257
1258 if (gpuDynInst->exec_mask.none()) {
1259 wf->decVMemInstsIssued();
1260 wf->untrackVMemInst(gpuDynInst);
1261 return;
1262 }
1263
1264 gpuDynInst->execUnitId = wf->execUnitId;
1265 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1266 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1267
1268 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1269 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1270 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1271 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1272
1273 rsrcDesc.read();
1274 offset.read();
1275
1276 int inst_offset = instData.OFFSET;
1277
1278 if (!instData.IDXEN && !instData.OFFEN) {
1281 addr0, addr1, rsrcDesc, offset, inst_offset);
1282 } else if (!instData.IDXEN && instData.OFFEN) {
1283 addr0.read();
1286 addr0, addr1, rsrcDesc, offset, inst_offset);
1287 } else if (instData.IDXEN && !instData.OFFEN) {
1288 addr0.read();
1291 addr1, addr0, rsrcDesc, offset, inst_offset);
1292 } else {
1293 addr0.read();
1294 addr1.read();
1297 addr1, addr0, rsrcDesc, offset, inst_offset);
1298 }
1299
1300 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1301 } // execute
1302
1303 void
1305 {
1306 initMemRead<3>(gpuDynInst);
1307 } // initiateAcc
1308
1309 void
1311 {
1312 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1313 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1314 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1315
1316 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1317 if (gpuDynInst->exec_mask[lane]) {
1318 if (!oobMask[lane]) {
1319 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1320 gpuDynInst->d_data))[lane * 3];
1321 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1322 gpuDynInst->d_data))[lane * 3 + 1];
1323 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1324 gpuDynInst->d_data))[lane * 3 + 2];
1325 } else {
1326 vdst0[lane] = 0;
1327 vdst1[lane] = 0;
1328 vdst2[lane] = 0;
1329 }
1330 }
1331 }
1332
1333 vdst0.write();
1334 vdst1.write();
1335 vdst2.write();
1336 } // completeAcc
1337 // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods ---
1338
1339 Inst_MUBUF__BUFFER_LOAD_DWORDX4
1340 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
1341 : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
1342 {
1343 setFlag(MemoryRef);
1344 setFlag(Load);
1345 if (instData.LDS) {
1346 setFlag(GroupSegment);
1347 } else {
1348 setFlag(GlobalSegment);
1349 }
1350 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
1351
1353 {
1354 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
1355
1356 // --- description from .arch file ---
1357 // Untyped buffer load 4 dwords.
1358 void
1360 {
1361 Wavefront *wf = gpuDynInst->wavefront();
1362
1363 if (gpuDynInst->exec_mask.none()) {
1364 wf->decVMemInstsIssued();
1365 wf->untrackVMemInst(gpuDynInst);
1366 return;
1367 }
1368
1369 gpuDynInst->execUnitId = wf->execUnitId;
1370 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1371 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1372
1373 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1374 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1375 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1376 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1377
1378 rsrcDesc.read();
1379 offset.read();
1380
1381 int inst_offset = instData.OFFSET;
1382
1383 if (!instData.IDXEN && !instData.OFFEN) {
1386 addr0, addr1, rsrcDesc, offset, inst_offset);
1387 } else if (!instData.IDXEN && instData.OFFEN) {
1388 addr0.read();
1391 addr0, addr1, rsrcDesc, offset, inst_offset);
1392 } else if (instData.IDXEN && !instData.OFFEN) {
1393 addr0.read();
1396 addr1, addr0, rsrcDesc, offset, inst_offset);
1397 } else {
1398 addr0.read();
1399 addr1.read();
1402 addr1, addr0, rsrcDesc, offset, inst_offset);
1403 }
1404
1405 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1406 } // execute
1407
1408 void
1410 {
1411 initMemRead<4>(gpuDynInst);
1412 } // initiateAcc
1413
1414 void
1416 {
1417 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
1418 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
1419 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
1420 VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
1421
1422 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1423 if (gpuDynInst->exec_mask[lane]) {
1424 if (!oobMask[lane]) {
1425 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1426 gpuDynInst->d_data))[lane * 4];
1427 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1428 gpuDynInst->d_data))[lane * 4 + 1];
1429 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
1430 gpuDynInst->d_data))[lane * 4 + 2];
1431 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
1432 gpuDynInst->d_data))[lane * 4 + 3];
1433 } else {
1434 vdst0[lane] = 0;
1435 vdst1[lane] = 0;
1436 vdst2[lane] = 0;
1437 vdst3[lane] = 0;
1438 }
1439 }
1440 }
1441
1442 vdst0.write();
1443 vdst1.write();
1444 vdst2.write();
1445 vdst3.write();
1446 } // completeAcc
1447 // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods ---
1448
1449 Inst_MUBUF__BUFFER_STORE_BYTE
1450 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
1451 : Inst_MUBUF(iFmt, "buffer_store_byte")
1452 {
1453 setFlag(MemoryRef);
1454 setFlag(Store);
1455 if (instData.LDS) {
1456 setFlag(GroupSegment);
1457 } else {
1458 setFlag(GlobalSegment);
1459 }
1460 } // Inst_MUBUF__BUFFER_STORE_BYTE
1461
1463 {
1464 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
1465
1466 // --- description from .arch file ---
1467 // Untyped buffer store byte.
1468 void
1470 {
1471 Wavefront *wf = gpuDynInst->wavefront();
1472
1473 if (gpuDynInst->exec_mask.none()) {
1474 wf->decVMemInstsIssued();
1475 wf->untrackVMemInst(gpuDynInst);
1476 wf->decExpInstsIssued();
1477 wf->untrackExpInst(gpuDynInst);
1478 return;
1479 }
1480
1481 gpuDynInst->execUnitId = wf->execUnitId;
1482 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1483 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1484
1485 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1486 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1487 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1488 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1489 ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
1490
1491 rsrcDesc.read();
1492 offset.read();
1493 data.read();
1494
1495 int inst_offset = instData.OFFSET;
1496
1497 if (!instData.IDXEN && !instData.OFFEN) {
1500 addr0, addr1, rsrcDesc, offset, inst_offset);
1501 } else if (!instData.IDXEN && instData.OFFEN) {
1502 addr0.read();
1505 addr0, addr1, rsrcDesc, offset, inst_offset);
1506 } else if (instData.IDXEN && !instData.OFFEN) {
1507 addr0.read();
1510 addr1, addr0, rsrcDesc, offset, inst_offset);
1511 } else {
1512 addr0.read();
1513 addr1.read();
1516 addr1, addr0, rsrcDesc, offset, inst_offset);
1517 }
1518
1519 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1520
1521 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1522 if (gpuDynInst->exec_mask[lane]) {
1523 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
1524 = data[lane];
1525 }
1526 }
1527 } // execute
1528
1529 void
1531 {
1532 initMemWrite<VecElemI8>(gpuDynInst);
1533 } // initiateAcc
1534
1535 void
1537 {
1538 } // execute
1539 // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods ---
1540
1541 Inst_MUBUF__BUFFER_STORE_SHORT
1542 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
1543 : Inst_MUBUF(iFmt, "buffer_store_short")
1544 {
1545 setFlag(MemoryRef);
1546 setFlag(Store);
1547 if (instData.LDS) {
1548 setFlag(GroupSegment);
1549 } else {
1550 setFlag(GlobalSegment);
1551 }
1552 } // Inst_MUBUF__BUFFER_STORE_SHORT
1553
1555 {
1556 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
1557
1558 // --- description from .arch file ---
1559 // Untyped buffer store short.
1560 void
1562 {
1563 Wavefront *wf = gpuDynInst->wavefront();
1564
1565 if (gpuDynInst->exec_mask.none()) {
1566 wf->decVMemInstsIssued();
1567 wf->untrackVMemInst(gpuDynInst);
1568 wf->decExpInstsIssued();
1569 wf->untrackExpInst(gpuDynInst);
1570 return;
1571 }
1572
1573 gpuDynInst->execUnitId = wf->execUnitId;
1574 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1575 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1576
1577 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1578 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1579 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1580 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1581 ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
1582
1583 rsrcDesc.read();
1584 offset.read();
1585 data.read();
1586
1587 int inst_offset = instData.OFFSET;
1588
1589 if (!instData.IDXEN && !instData.OFFEN) {
1592 addr0, addr1, rsrcDesc, offset, inst_offset);
1593 } else if (!instData.IDXEN && instData.OFFEN) {
1594 addr0.read();
1597 addr0, addr1, rsrcDesc, offset, inst_offset);
1598 } else if (instData.IDXEN && !instData.OFFEN) {
1599 addr0.read();
1602 addr1, addr0, rsrcDesc, offset, inst_offset);
1603 } else {
1604 addr0.read();
1605 addr1.read();
1608 addr1, addr0, rsrcDesc, offset, inst_offset);
1609 }
1610
1611 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1612
1613 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1614 if (gpuDynInst->exec_mask[lane]) {
1615 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
1616 = data[lane];
1617 }
1618 }
1619 } // execute
1620
1621 void
1623 {
1624 initMemWrite<VecElemI16>(gpuDynInst);
1625 } // initiateAcc
1626
1627 void
1631 // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods ---
1632
1635 : Inst_MUBUF(iFmt, "buffer_store_dword")
1636 {
1637 setFlag(MemoryRef);
1638 setFlag(Store);
1639 if (instData.LDS) {
1640 setFlag(GroupSegment);
1641 } else {
1642 setFlag(GlobalSegment);
1643 }
1644 } // Inst_MUBUF__BUFFER_STORE_DWORD
1645
1647 {
1648 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
1649
1650 // --- description from .arch file ---
1651 // Untyped buffer store dword.
1652 void
1654 {
1655 Wavefront *wf = gpuDynInst->wavefront();
1656
1657 if (gpuDynInst->exec_mask.none()) {
1658 wf->decVMemInstsIssued();
1659 wf->untrackVMemInst(gpuDynInst);
1660 wf->decExpInstsIssued();
1661 wf->untrackExpInst(gpuDynInst);
1662 return;
1663 }
1664
1665 gpuDynInst->execUnitId = wf->execUnitId;
1666 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1667 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1668
1669 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1670 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1671 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1672 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1673 ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
1674
1675 rsrcDesc.read();
1676 offset.read();
1677 data.read();
1678
1679 int inst_offset = instData.OFFSET;
1680
1681 if (!instData.IDXEN && !instData.OFFEN) {
1684 addr0, addr1, rsrcDesc, offset, inst_offset);
1685 } else if (!instData.IDXEN && instData.OFFEN) {
1686 addr0.read();
1689 addr0, addr1, rsrcDesc, offset, inst_offset);
1690 } else if (instData.IDXEN && !instData.OFFEN) {
1691 addr0.read();
1694 addr1, addr0, rsrcDesc, offset, inst_offset);
1695 } else {
1696 addr0.read();
1697 addr1.read();
1700 addr1, addr0, rsrcDesc, offset, inst_offset);
1701 }
1702
1703 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1704
1705 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1706 if (gpuDynInst->exec_mask[lane]) {
1707 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
1708 = data[lane];
1709 }
1710 }
1711 } // execute
1712
1713 void
1715 {
1716 initMemWrite<VecElemU32>(gpuDynInst);
1717 } // initiateAcc
1718
1719 void
1721 {
1722 } // completeAcc
1723 // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods ---
1724
1725 Inst_MUBUF__BUFFER_STORE_DWORDX2
1726 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
1727 : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
1728 {
1729 setFlag(MemoryRef);
1730 setFlag(Store);
1731 if (instData.LDS) {
1732 setFlag(GroupSegment);
1733 } else {
1734 setFlag(GlobalSegment);
1735 }
1736 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
1737
1739 {
1740 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
1741
1742 // --- description from .arch file ---
1743 // Untyped buffer store 2 dwords.
1744 void
1746 {
1747 Wavefront *wf = gpuDynInst->wavefront();
1748
1749 if (gpuDynInst->exec_mask.none()) {
1750 wf->decVMemInstsIssued();
1751 wf->untrackVMemInst(gpuDynInst);
1752 wf->decExpInstsIssued();
1753 wf->untrackExpInst(gpuDynInst);
1754 return;
1755 }
1756
1757 gpuDynInst->execUnitId = wf->execUnitId;
1758 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1759 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1760
1761 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1762 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1763 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1764 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1765 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1766 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1767
1768 rsrcDesc.read();
1769 offset.read();
1770 data0.read();
1771 data1.read();
1772
1773 int inst_offset = instData.OFFSET;
1774
1775 if (!instData.IDXEN && !instData.OFFEN) {
1778 addr0, addr1, rsrcDesc, offset, inst_offset);
1779 } else if (!instData.IDXEN && instData.OFFEN) {
1780 addr0.read();
1783 addr0, addr1, rsrcDesc, offset, inst_offset);
1784 } else if (instData.IDXEN && !instData.OFFEN) {
1785 addr0.read();
1788 addr1, addr0, rsrcDesc, offset, inst_offset);
1789 } else {
1790 addr0.read();
1791 addr1.read();
1794 addr1, addr0, rsrcDesc, offset, inst_offset);
1795 }
1796
1797 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1798
1799 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1800 if (gpuDynInst->exec_mask[lane]) {
1801 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
1802 = data0[lane];
1803 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*2 + 1]
1804 = data1[lane];
1805 }
1806 }
1807 } // execute
1808
1809 void
1811 {
1812 initMemWrite<2>(gpuDynInst);
1813 } // initiateAcc
1814
1815 void
1817 {
1818 } // completeAcc
1819 // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods ---
1820
1821 Inst_MUBUF__BUFFER_STORE_DWORDX3
1822 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
1823 : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
1824 {
1825 setFlag(MemoryRef);
1826 setFlag(Store);
1827 if (instData.LDS) {
1828 setFlag(GroupSegment);
1829 } else {
1830 setFlag(GlobalSegment);
1831 }
1832 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
1833
1835 {
1836 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
1837
1838 // --- description from .arch file ---
1839 // Untyped buffer store 3 dwords.
1840 void
1842 {
1843 Wavefront *wf = gpuDynInst->wavefront();
1844
1845 if (gpuDynInst->exec_mask.none()) {
1846 wf->decVMemInstsIssued();
1847 wf->untrackVMemInst(gpuDynInst);
1848 wf->decExpInstsIssued();
1849 wf->untrackExpInst(gpuDynInst);
1850 return;
1851 }
1852
1853 gpuDynInst->execUnitId = wf->execUnitId;
1854 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1855 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1856
1857 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1858 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1859 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1860 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1861 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1862 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1863 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1864
1865 rsrcDesc.read();
1866 offset.read();
1867 data0.read();
1868 data1.read();
1869 data2.read();
1870
1871 int inst_offset = instData.OFFSET;
1872
1873 if (!instData.IDXEN && !instData.OFFEN) {
1876 addr0, addr1, rsrcDesc, offset, inst_offset);
1877 } else if (!instData.IDXEN && instData.OFFEN) {
1878 addr0.read();
1881 addr0, addr1, rsrcDesc, offset, inst_offset);
1882 } else if (instData.IDXEN && !instData.OFFEN) {
1883 addr0.read();
1886 addr1, addr0, rsrcDesc, offset, inst_offset);
1887 } else {
1888 addr0.read();
1889 addr1.read();
1892 addr1, addr0, rsrcDesc, offset, inst_offset);
1893 }
1894
1895 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1896
1897 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1898 if (gpuDynInst->exec_mask[lane]) {
1899 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 3]
1900 = data0[lane];
1901 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*3 + 1]
1902 = data1[lane];
1903 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*3 + 2]
1904 = data2[lane];
1905 }
1906 }
1907 } // execute
1908
1909 void
1911 {
1912 initMemWrite<3>(gpuDynInst);
1913 } // initiateAcc
1914
1915 void
1917 {
1918 } // completeAcc
1919 // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods ---
1920
1921 Inst_MUBUF__BUFFER_STORE_DWORDX4
1922 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
1923 : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
1924 {
1925 setFlag(MemoryRef);
1926 setFlag(Store);
1927 if (instData.LDS) {
1928 setFlag(GroupSegment);
1929 } else {
1930 setFlag(GlobalSegment);
1931 }
1932 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
1933
1935 {
1936 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
1937
1938 // --- description from .arch file ---
1939 // Untyped buffer store 4 dwords.
1940 void
1942 {
1943 Wavefront *wf = gpuDynInst->wavefront();
1944
1945 if (gpuDynInst->exec_mask.none()) {
1946 wf->decVMemInstsIssued();
1947 wf->untrackVMemInst(gpuDynInst);
1948 wf->decExpInstsIssued();
1949 wf->untrackExpInst(gpuDynInst);
1950 return;
1951 }
1952
1953 gpuDynInst->execUnitId = wf->execUnitId;
1954 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1955 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1956
1957 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
1958 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
1959 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
1960 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
1961 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
1962 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
1963 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
1964 ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
1965
1966 rsrcDesc.read();
1967 offset.read();
1968 data0.read();
1969 data1.read();
1970 data2.read();
1971 data3.read();
1972
1973 int inst_offset = instData.OFFSET;
1974
1975 if (!instData.IDXEN && !instData.OFFEN) {
1978 addr0, addr1, rsrcDesc, offset, inst_offset);
1979 } else if (!instData.IDXEN && instData.OFFEN) {
1980 addr0.read();
1983 addr0, addr1, rsrcDesc, offset, inst_offset);
1984 } else if (instData.IDXEN && !instData.OFFEN) {
1985 addr0.read();
1988 addr1, addr0, rsrcDesc, offset, inst_offset);
1989 } else {
1990 addr0.read();
1991 addr1.read();
1994 addr1, addr0, rsrcDesc, offset, inst_offset);
1995 }
1996
1997 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1998
1999 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2000 if (gpuDynInst->exec_mask[lane]) {
2001 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
2002 = data0[lane];
2003 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
2004 = data1[lane];
2005 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
2006 = data2[lane];
2007 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
2008 = data3[lane];
2009 }
2010 }
2011 } // execute
2012
2013 void
2015 {
2016 initMemWrite<4>(gpuDynInst);
2017 } // initiateAcc
2018
2019 void
2021 {
2022 } // completeAcc
2023 // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods ---
2024
2025 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2026 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
2027 : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
2028 {
2029 setFlag(Store);
2030 setFlag(GlobalSegment);
2031 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2032
2034 {
2035 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2036
2037 // --- description from .arch file ---
2038 // Store one DWORD from LDS memory to system memory without utilizing
2039 // VGPRs.
2040 void
2045 // --- Inst_MUBUF__BUFFER_WBINVL1 class methods ---
2046
2048 : Inst_MUBUF(iFmt, "buffer_wbinvl1")
2049 {
2050 setFlag(MemoryRef);
2051 setFlag(GPUStaticInst::MemSync);
2052 setFlag(GlobalSegment);
2053 setFlag(MemSync);
2054 } // Inst_MUBUF__BUFFER_WBINVL1
2055
2057 {
2058 } // ~Inst_MUBUF__BUFFER_WBINVL1
2059
2060 // --- description from .arch file ---
2061 // Write back and invalidate the shader L1.
2062 // Always returns ACK to shader.
2063 void
2065 {
2066 Wavefront *wf = gpuDynInst->wavefront();
2067
2068 if (gpuDynInst->exec_mask.none()) {
2069 wf->decVMemInstsIssued();
2070 wf->untrackVMemInst(gpuDynInst);
2071 return;
2072 }
2073
2074 gpuDynInst->execUnitId = wf->execUnitId;
2075 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2076 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2077
2078 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2079 gpuDynInst->computeUnit()->globalMemoryPipe.
2080 issueRequest(gpuDynInst);
2081 } else {
2082 fatal("Unsupported scope for flat instruction.\n");
2083 }
2084 } // execute
2085
2086 void
2088 {
2089 // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we
2090 // need to precisely communicate the writeback-invalidate operation to
2091 // the new gfx10 coalescer rather than sending AcquireRelease markers.
2092 // The SICoalescer would need to be updated appropriately as well.
2093 injectGlobalMemFence(gpuDynInst);
2094 } // initiateAcc
2095 void
2097 {
2098 } // completeAcc
2099 // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods ---
2100
2101 Inst_MUBUF__BUFFER_WBINVL1_VOL
2102 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
2103 : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
2104 // This instruction is same as buffer_wbinvl1 instruction except this
2105 // instruction only invalidate L1 shader line with MTYPE SC and GC.
2106 // Since Hermes L1 (TCP) do not differentiate between its cache lines,
2107 // this instruction currently behaves (and implemented ) exactly like
2108 // buffer_wbinvl1 instruction.
2109 setFlag(MemoryRef);
2110 setFlag(GPUStaticInst::MemSync);
2111 setFlag(GlobalSegment);
2112 setFlag(MemSync);
2113 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
2114
2116 {
2117 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
2118
2119 // --- description from .arch file ---
2120 // Write back and invalidate the shader L1 only for lines that are marked
2121 // --- volatile.
2122 // Always returns ACK to shader.
2123 void
2125 {
2126 Wavefront *wf = gpuDynInst->wavefront();
2127
2128 if (gpuDynInst->exec_mask.none()) {
2129 wf->decVMemInstsIssued();
2130 wf->untrackVMemInst(gpuDynInst);
2131 return;
2132 }
2133
2134 gpuDynInst->execUnitId = wf->execUnitId;
2135 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2136 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2137
2138 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2139 gpuDynInst->computeUnit()->globalMemoryPipe.
2140 issueRequest(gpuDynInst);
2141 } else {
2142 fatal("Unsupported scope for flat instruction.\n");
2143 }
2144 } // execute
2145 void
2147 {
2148 injectGlobalMemFence(gpuDynInst);
2149 } // initiateAcc
2150 void
2152 {
2153 } // completeAcc
2154 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods ---
2155
2156 Inst_MUBUF__BUFFER_ATOMIC_SWAP
2157 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
2158 : Inst_MUBUF(iFmt, "buffer_atomic_swap")
2159 {
2160 setFlag(AtomicExch);
2161 if (instData.GLC) {
2162 setFlag(AtomicReturn);
2163 } else {
2164 setFlag(AtomicNoReturn);
2165 }
2166 setFlag(MemoryRef);
2167 setFlag(GlobalSegment);
2168 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
2169
2171 {
2172 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
2173
2174 // --- description from .arch file ---
2175 // 32b:
2176 // tmp = MEM[ADDR];
2177 // MEM[ADDR] = DATA;
2178 // RETURN_DATA = tmp.
2179 void
2184 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods ---
2185
2186 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2187 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
2188 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
2189 {
2190 setFlag(AtomicCAS);
2191 if (instData.GLC) {
2192 setFlag(AtomicReturn);
2193 } else {
2194 setFlag(AtomicNoReturn);
2195 }
2196 setFlag(MemoryRef);
2197 setFlag(GlobalSegment);
2198 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2199
2201 {
2202 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2203
2204 // --- description from .arch file ---
2205 // 32b:
2206 // tmp = MEM[ADDR];
2207 // src = DATA[0];
2208 // cmp = DATA[1];
2209 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2210 // RETURN_DATA[0] = tmp.
2211 void
2213 {
2214 Wavefront *wf = gpuDynInst->wavefront();
2215
2216 if (gpuDynInst->exec_mask.none()) {
2217 wf->decVMemInstsIssued();
2218 wf->untrackVMemInst(gpuDynInst);
2219 return;
2220 }
2221
2222 gpuDynInst->execUnitId = wf->execUnitId;
2223 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2224 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2225
2226 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
2227 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
2228 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
2229 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
2230 ConstVecOperandU32 src(gpuDynInst, extData.VDATA);
2231 ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1);
2232
2233 rsrcDesc.read();
2234 offset.read();
2235 src.read();
2236 cmp.read();
2237
2238 int inst_offset = instData.OFFSET;
2239
2240 if (!instData.IDXEN && !instData.OFFEN) {
2243 addr0, addr1, rsrcDesc, offset, inst_offset);
2244 } else if (!instData.IDXEN && instData.OFFEN) {
2245 addr0.read();
2248 addr0, addr1, rsrcDesc, offset, inst_offset);
2249 } else if (instData.IDXEN && !instData.OFFEN) {
2250 addr0.read();
2253 addr1, addr0, rsrcDesc, offset, inst_offset);
2254 } else {
2255 addr0.read();
2256 addr1.read();
2259 addr1, addr0, rsrcDesc, offset, inst_offset);
2260 }
2261
2262 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2263 if (gpuDynInst->exec_mask[lane]) {
2264 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
2265 = src[lane];
2266 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
2267 = cmp[lane];
2268 }
2269 }
2270
2271 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2272 } // execute
2273
2274 void
2276 {
2277 initAtomicAccess<VecElemU32>(gpuDynInst);
2278 } // initiateAcc
2279
2280 void
2282 {
2283 if (isAtomicRet()) {
2284 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
2285
2286 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2287 if (gpuDynInst->exec_mask[lane]) {
2288 vdst[lane] = (reinterpret_cast<VecElemU32*>(
2289 gpuDynInst->d_data))[lane];
2290 }
2291 }
2292
2293 vdst.write();
2294 }
2295 } // completeAcc
2296 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods ---
2297
2298 Inst_MUBUF__BUFFER_ATOMIC_ADD
2299 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
2300 : Inst_MUBUF(iFmt, "buffer_atomic_add")
2301 {
2302 setFlag(AtomicAdd);
2303 if (instData.GLC) {
2304 setFlag(AtomicReturn);
2305 } else {
2306 setFlag(AtomicNoReturn);
2307 }
2308 setFlag(MemoryRef);
2309 setFlag(GlobalSegment);
2310 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
2311
2313 {
2314 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
2315
2316 // --- description from .arch file ---
2317 // 32b:
2318 // tmp = MEM[ADDR];
2319 // MEM[ADDR] += DATA;
2320 // RETURN_DATA = tmp.
2321 void
2326 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods ---
2327
2328 Inst_MUBUF__BUFFER_ATOMIC_SUB
2329 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
2330 : Inst_MUBUF(iFmt, "buffer_atomic_sub")
2331 {
2332 setFlag(AtomicSub);
2333 if (instData.GLC) {
2334 setFlag(AtomicReturn);
2335 } else {
2336 setFlag(AtomicNoReturn);
2337 }
2338 setFlag(MemoryRef);
2339 setFlag(GlobalSegment);
2340 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
2341
2343 {
2344 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
2345
2346 // --- description from .arch file ---
2347 // 32b:
2348 // tmp = MEM[ADDR];
2349 // MEM[ADDR] -= DATA;
2350 // RETURN_DATA = tmp.
2351 void
2356 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods ---
2357
2358 Inst_MUBUF__BUFFER_ATOMIC_SMIN
2359 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
2360 : Inst_MUBUF(iFmt, "buffer_atomic_smin")
2361 {
2362 setFlag(AtomicMin);
2363 if (instData.GLC) {
2364 setFlag(AtomicReturn);
2365 } else {
2366 setFlag(AtomicNoReturn);
2367 }
2368 setFlag(MemoryRef);
2369 setFlag(GlobalSegment);
2370 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
2371
2373 {
2374 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
2375
2376 // --- description from .arch file ---
2377 // 32b:
2378 // tmp = MEM[ADDR];
2379 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
2380 // RETURN_DATA = tmp.
2381 void
2386 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods ---
2387
2388 Inst_MUBUF__BUFFER_ATOMIC_UMIN
2389 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
2390 : Inst_MUBUF(iFmt, "buffer_atomic_umin")
2391 {
2392 setFlag(AtomicMin);
2393 if (instData.GLC) {
2394 setFlag(AtomicReturn);
2395 } else {
2396 setFlag(AtomicNoReturn);
2397 }
2398 setFlag(MemoryRef);
2399 setFlag(GlobalSegment);
2400 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
2401
2403 {
2404 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
2405
2406 // --- description from .arch file ---
2407 // 32b:
2408 // tmp = MEM[ADDR];
2409 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
2410 // RETURN_DATA = tmp.
2411 void
2416 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods ---
2417
2418 Inst_MUBUF__BUFFER_ATOMIC_SMAX
2419 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
2420 : Inst_MUBUF(iFmt, "buffer_atomic_smax")
2421 {
2422 setFlag(AtomicMax);
2423 if (instData.GLC) {
2424 setFlag(AtomicReturn);
2425 } else {
2426 setFlag(AtomicNoReturn);
2427 }
2428 setFlag(MemoryRef);
2429 setFlag(GlobalSegment);
2430 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
2431
2433 {
2434 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
2435
2436 // --- description from .arch file ---
2437 // 32b:
2438 // tmp = MEM[ADDR];
2439 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
2440 // RETURN_DATA = tmp.
2441 void
2446 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods ---
2447
2448 Inst_MUBUF__BUFFER_ATOMIC_UMAX
2449 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
2450 : Inst_MUBUF(iFmt, "buffer_atomic_umax")
2451 {
2452 setFlag(AtomicMax);
2453 if (instData.GLC) {
2454 setFlag(AtomicReturn);
2455 } else {
2456 setFlag(AtomicNoReturn);
2457 }
2458 setFlag(MemoryRef);
2459 setFlag(GlobalSegment);
2460 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
2461
2463 {
2464 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
2465
2466 // --- description from .arch file ---
2467 // 32b:
2468 // tmp = MEM[ADDR];
2469 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
2470 // RETURN_DATA = tmp.
2471 void
2476 // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods ---
2477
2478 Inst_MUBUF__BUFFER_ATOMIC_AND
2479 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
2480 : Inst_MUBUF(iFmt, "buffer_atomic_and")
2481 {
2482 setFlag(AtomicAnd);
2483 if (instData.GLC) {
2484 setFlag(AtomicReturn);
2485 } else {
2486 setFlag(AtomicNoReturn);
2487 }
2488 setFlag(MemoryRef);
2489 setFlag(GlobalSegment);
2490 } // Inst_MUBUF__BUFFER_ATOMIC_AND
2491
2493 {
2494 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
2495
2496 // --- description from .arch file ---
2497 // 32b:
2498 // tmp = MEM[ADDR];
2499 // MEM[ADDR] &= DATA;
2500 // RETURN_DATA = tmp.
2501 void
2506 // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods ---
2507
2508 Inst_MUBUF__BUFFER_ATOMIC_OR
2509 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
2510 : Inst_MUBUF(iFmt, "buffer_atomic_or")
2511 {
2512 setFlag(AtomicOr);
2513 if (instData.GLC) {
2514 setFlag(AtomicReturn);
2515 } else {
2516 setFlag(AtomicNoReturn);
2517 }
2518 setFlag(MemoryRef);
2519 setFlag(GlobalSegment);
2520 } // Inst_MUBUF__BUFFER_ATOMIC_OR
2521
2523 {
2524 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
2525
2526 // --- description from .arch file ---
2527 // 32b:
2528 // tmp = MEM[ADDR];
2529 // MEM[ADDR] |= DATA;
2530 // RETURN_DATA = tmp.
2531 void
2533 {
2535 } // execute
2536 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods ---
2537
2538 Inst_MUBUF__BUFFER_ATOMIC_XOR
2539 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
2540 : Inst_MUBUF(iFmt, "buffer_atomic_xor")
2541 {
2542 setFlag(AtomicXor);
2543 if (instData.GLC) {
2544 setFlag(AtomicReturn);
2545 } else {
2546 setFlag(AtomicNoReturn);
2547 }
2548 setFlag(MemoryRef);
2549 setFlag(GlobalSegment);
2550 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
2551
2553 {
2554 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
2555
2556 // --- description from .arch file ---
2557 // 32b:
2558 // tmp = MEM[ADDR];
2559 // MEM[ADDR] ^= DATA;
2560 // RETURN_DATA = tmp.
2561 void
2566 // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods ---
2567
2568 Inst_MUBUF__BUFFER_ATOMIC_INC
2569 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
2570 : Inst_MUBUF(iFmt, "buffer_atomic_inc")
2571 {
2572 setFlag(AtomicInc);
2573 if (instData.GLC) {
2574 setFlag(AtomicReturn);
2575 } else {
2576 setFlag(AtomicNoReturn);
2577 }
2578 setFlag(MemoryRef);
2579 setFlag(GlobalSegment);
2580 } // Inst_MUBUF__BUFFER_ATOMIC_INC
2581
2583 {
2584 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
2585
2586 // --- description from .arch file ---
2587 // 32b:
2588 // tmp = MEM[ADDR];
2589 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
2590 // RETURN_DATA = tmp.
2591 void
2596 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods ---
2597
2598 Inst_MUBUF__BUFFER_ATOMIC_DEC
2599 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
2600 : Inst_MUBUF(iFmt, "buffer_atomic_dec")
2601 {
2602 setFlag(AtomicDec);
2603 if (instData.GLC) {
2604 setFlag(AtomicReturn);
2605 } else {
2606 setFlag(AtomicNoReturn);
2607 }
2608 setFlag(MemoryRef);
2609 setFlag(GlobalSegment);
2610 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
2611
2613 {
2614 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
2615
2616 // --- description from .arch file ---
2617 // 32b:
2618 // tmp = MEM[ADDR];
2619 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
2620 // (unsigned compare); RETURN_DATA = tmp.
2621 void
2626 // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods ---
2627
2628 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2629 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
2630 : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
2631 {
2632 setFlag(AtomicExch);
2633 if (instData.GLC) {
2634 setFlag(AtomicReturn);
2635 } else {
2636 setFlag(AtomicNoReturn);
2637 }
2638 setFlag(MemoryRef);
2639 setFlag(GlobalSegment);
2640 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2641
2643 {
2644 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2645
2646 // --- description from .arch file ---
2647 // 64b:
2648 // tmp = MEM[ADDR];
2649 // MEM[ADDR] = DATA[0:1];
2650 // RETURN_DATA[0:1] = tmp.
2651 void
2656 // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods ---
2657
2658 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2659 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
2660 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
2661 {
2662 setFlag(AtomicCAS);
2663 if (instData.GLC) {
2664 setFlag(AtomicReturn);
2665 } else {
2666 setFlag(AtomicNoReturn);
2667 }
2668 setFlag(MemoryRef);
2669 setFlag(GlobalSegment);
2670 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2671
2672 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2673 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
2674 {
2675 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2676
2677 // --- description from .arch file ---
2678 // 64b:
2679 // tmp = MEM[ADDR];
2680 // src = DATA[0:1];
2681 // cmp = DATA[2:3];
2682 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2683 // RETURN_DATA[0:1] = tmp.
2684 void
2689 // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods ---
2690
2691 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2692 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
2693 : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
2694 {
2695 setFlag(AtomicAdd);
2696 if (instData.GLC) {
2697 setFlag(AtomicReturn);
2698 } else {
2699 setFlag(AtomicNoReturn);
2700 }
2701 setFlag(MemoryRef);
2702 setFlag(GlobalSegment);
2703 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2704
2706 {
2707 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2708
2709 // --- description from .arch file ---
2710 // 64b:
2711 // tmp = MEM[ADDR];
2712 // MEM[ADDR] += DATA[0:1];
2713 // RETURN_DATA[0:1] = tmp.
2714 void
2719 // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods ---
2720
2721 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2722 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
2723 : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
2724 {
2725 setFlag(AtomicSub);
2726 if (instData.GLC) {
2727 setFlag(AtomicReturn);
2728 } else {
2729 setFlag(AtomicNoReturn);
2730 }
2731 setFlag(MemoryRef);
2732 setFlag(GlobalSegment);
2733 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2734
2736 {
2737 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2738
2739 // --- description from .arch file ---
2740 // 64b:
2741 // tmp = MEM[ADDR];
2742 // MEM[ADDR] -= DATA[0:1];
2743 // RETURN_DATA[0:1] = tmp.
2744 void
2749 // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods ---
2750
2751 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2752 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
2753 : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
2754 {
2755 setFlag(AtomicMin);
2756 if (instData.GLC) {
2757 setFlag(AtomicReturn);
2758 } else {
2759 setFlag(AtomicNoReturn);
2760 }
2761 setFlag(MemoryRef);
2762 setFlag(GlobalSegment);
2763 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2764
2766 {
2767 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2768
2769 // --- description from .arch file ---
2770 // 64b:
2771 // tmp = MEM[ADDR];
2772 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
2773 // RETURN_DATA[0:1] = tmp.
2774 void
2779 // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods ---
2780
2781 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2782 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
2783 : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
2784 {
2785 setFlag(AtomicMin);
2786 if (instData.GLC) {
2787 setFlag(AtomicReturn);
2788 } else {
2789 setFlag(AtomicNoReturn);
2790 }
2791 setFlag(MemoryRef);
2792 setFlag(GlobalSegment);
2793 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2794
2796 {
2797 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2798
2799 // --- description from .arch file ---
2800 // 64b:
2801 // tmp = MEM[ADDR];
2802 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
2803 // RETURN_DATA[0:1] = tmp.
2804 void
2809 // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods ---
2810
2811 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2812 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
2813 : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
2814 {
2815 setFlag(AtomicMax);
2816 if (instData.GLC) {
2817 setFlag(AtomicReturn);
2818 } else {
2819 setFlag(AtomicNoReturn);
2820 }
2821 setFlag(MemoryRef);
2822 setFlag(GlobalSegment);
2823 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2824
2826 {
2827 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2828
2829 // --- description from .arch file ---
2830 // 64b:
2831 // tmp = MEM[ADDR];
2832 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2833 // RETURN_DATA[0:1] = tmp.
2834 void
2839 // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods ---
2840
2841 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2842 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
2843 : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
2844 {
2845 setFlag(AtomicMax);
2846 if (instData.GLC) {
2847 setFlag(AtomicReturn);
2848 } else {
2849 setFlag(AtomicNoReturn);
2850 }
2851 setFlag(MemoryRef);
2852 setFlag(GlobalSegment);
2853 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2854
2856 {
2857 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2858
2859 // --- description from .arch file ---
2860 // 64b:
2861 // tmp = MEM[ADDR];
2862 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2863 // RETURN_DATA[0:1] = tmp.
2864 void
2869 // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods ---
2870
2871 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2872 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
2873 : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
2874 {
2875 setFlag(AtomicAnd);
2876 if (instData.GLC) {
2877 setFlag(AtomicReturn);
2878 } else {
2879 setFlag(AtomicNoReturn);
2880 }
2881 setFlag(MemoryRef);
2882 setFlag(GlobalSegment);
2883 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2884
2886 {
2887 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2888
2889 // --- description from .arch file ---
2890 // 64b:
2891 // tmp = MEM[ADDR];
2892 // MEM[ADDR] &= DATA[0:1];
2893 // RETURN_DATA[0:1] = tmp.
2894 void
2899 // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods ---
2900
2901 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2902 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
2903 : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
2904 {
2905 setFlag(AtomicOr);
2906 if (instData.GLC) {
2907 setFlag(AtomicReturn);
2908 } else {
2909 setFlag(AtomicNoReturn);
2910 }
2911 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2912
2914 {
2915 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2916
2917 // --- description from .arch file ---
2918 // 64b:
2919 // tmp = MEM[ADDR];
2920 // MEM[ADDR] |= DATA[0:1];
2921 // RETURN_DATA[0:1] = tmp.
2922 void
2927 // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods ---
2928
2929 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2930 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
2931 : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
2932 {
2933 setFlag(AtomicXor);
2934 if (instData.GLC) {
2935 setFlag(AtomicReturn);
2936 } else {
2937 setFlag(AtomicNoReturn);
2938 }
2939 setFlag(MemoryRef);
2940 setFlag(GlobalSegment);
2941 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2942
2944 {
2945 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2946
2947 // --- description from .arch file ---
2948 // 64b:
2949 // tmp = MEM[ADDR];
2950 // MEM[ADDR] ^= DATA[0:1];
2951 // RETURN_DATA[0:1] = tmp.
2952 void
2957 // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods ---
2958
2959 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2960 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
2961 : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
2962 {
2963 setFlag(AtomicInc);
2964 if (instData.GLC) {
2965 setFlag(AtomicReturn);
2966 } else {
2967 setFlag(AtomicNoReturn);
2968 }
2969 setFlag(MemoryRef);
2970 setFlag(GlobalSegment);
2971 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2972
2974 {
2975 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2976
2977 // --- description from .arch file ---
2978 // 64b:
2979 // tmp = MEM[ADDR];
2980 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
2981 // RETURN_DATA[0:1] = tmp.
2982 void
2987 // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods ---
2988
2989 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
2990 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
2991 : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
2992 {
2993 setFlag(AtomicDec);
2994 if (instData.GLC) {
2995 setFlag(AtomicReturn);
2996 } else {
2997 setFlag(AtomicNoReturn);
2998 }
2999 setFlag(MemoryRef);
3000 setFlag(GlobalSegment);
3001 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3002
3004 {
3005 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3006
3007 // --- description from .arch file ---
3008 // 64b:
3009 // tmp = MEM[ADDR];
3010 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
3011 // (unsigned compare);
3012 // RETURN_DATA[0:1] = tmp.
3013 void
3018} // namespace VegaISA
3019} // namespace gem5
const char data[]
void setFlag(Flags flag)
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2715
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2322
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2895
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2502
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2281
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2212
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2275
void execute(GPUDynInstPtr) override
Definition mubuf.cc:3014
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2622
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2983
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2592
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2923
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2532
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2835
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2442
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2775
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2382
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2745
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2352
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2652
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2180
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2865
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2472
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2805
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2412
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2953
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2562
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1210
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1204
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1154
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1310
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1254
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1304
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1409
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1415
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1359
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1055
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1109
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1115
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:321
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:326
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:159
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:164
void execute(GPUDynInstPtr) override
Definition mubuf.cc:121
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:132
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:127
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:95
void execute(GPUDynInstPtr) override
Definition mubuf.cc:89
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:100
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:63
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:68
void execute(GPUDynInstPtr) override
Definition mubuf.cc:57
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:692
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:697
void execute(GPUDynInstPtr) override
Definition mubuf.cc:686
void execute(GPUDynInstPtr) override
Definition mubuf.cc:852
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:906
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:912
void execute(GPUDynInstPtr) override
Definition mubuf.cc:814
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:820
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:825
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:650
void execute(GPUDynInstPtr) override
Definition mubuf.cc:594
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:644
void execute(GPUDynInstPtr) override
Definition mubuf.cc:722
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:778
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:772
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1469
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1536
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1530
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1810
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1745
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1816
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1916
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1910
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1841
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1941
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2020
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2014
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1653
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1720
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1714
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:293
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:288
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:260
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:255
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:228
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:223
void execute(GPUDynInstPtr) override
Definition mubuf.cc:217
void execute(GPUDynInstPtr) override
Definition mubuf.cc:185
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:196
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:191
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:1628
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:1622
void execute(GPUDynInstPtr) override
Definition mubuf.cc:1561
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2124
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2151
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2146
void initiateAcc(GPUDynInstPtr) override
Definition mubuf.cc:2087
void execute(GPUDynInstPtr) override
Definition mubuf.cc:2064
void completeAcc(GPUDynInstPtr) override
Definition mubuf.cc:2096
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:419
void read() override
read from the vrf.
Definition operand.hh:148
void write() override
write to the vrf.
Definition operand.hh:203
void decExpInstsIssued()
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void decVMemInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232
#define warn(...)
Definition logging.hh:288
Bitfield< 23, 0 > offset
Definition types.hh:144
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
Definition operand.hh:815
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
uint16_t VecElemU16
VecOperand< VecElemI8, true, 1 > ConstVecOperandI8
Definition operand.hh:841
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
Definition operand.hh:821
uint32_t VecElemU32
const int NumVecElemPerVecReg(64)
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
Definition operand.hh:843
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Mon May 26 2025 09:18:41 for gem5 by doxygen 1.13.2