gem5 v24.0.0.0
Loading...
Searching...
No Matches
flat.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods ---
40
42 : Inst_FLAT(iFmt, "flat_load_ubyte")
43 {
44 setFlag(MemoryRef);
46 } // Inst_FLAT__FLAT_LOAD_UBYTE
47
49 {
50 } // ~Inst_FLAT__FLAT_LOAD_UBYTE
51
52 // --- description from .arch file ---
53 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
54 void
56 {
57 Wavefront *wf = gpuDynInst->wavefront();
58
59 if (gpuDynInst->exec_mask.none()) {
61 if (isFlat()) {
63 }
64 return;
65 }
66
67 gpuDynInst->execUnitId = wf->execUnitId;
68 gpuDynInst->latency.init(gpuDynInst->computeUnit());
69 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
70
72
73 issueRequestHelper(gpuDynInst);
74 } // execute
75
76 void
78 {
79 initMemRead<VecElemU8>(gpuDynInst);
80 } // initiateAcc
81
82 void
84 {
85 VecOperandU32 vdst(gpuDynInst, extData.VDST);
86
87 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
88 if (gpuDynInst->exec_mask[lane]) {
89 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
90 gpuDynInst->d_data))[lane]);
91 }
92 }
93 vdst.write();
94 } // execute
95 // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
96
98 : Inst_FLAT(iFmt, "flat_load_sbyte")
99 {
100 setFlag(MemoryRef);
101 setFlag(Load);
102 } // Inst_FLAT__FLAT_LOAD_SBYTE
103
105 {
106 } // ~Inst_FLAT__FLAT_LOAD_SBYTE
107
108 // --- description from .arch file ---
109 // Untyped buffer load signed byte (sign extend to VGPR destination).
110 void
112 {
113 Wavefront *wf = gpuDynInst->wavefront();
114
115 if (gpuDynInst->exec_mask.none()) {
116 wf->decVMemInstsIssued();
117 if (isFlat()) {
118 wf->decLGKMInstsIssued();
119 }
120 return;
121 }
122
123 gpuDynInst->execUnitId = wf->execUnitId;
124 gpuDynInst->latency.init(gpuDynInst->computeUnit());
125 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
126
128
129 issueRequestHelper(gpuDynInst);
130 } // execute
131
132 void
134 {
135 initMemRead<VecElemI8>(gpuDynInst);
136 } // initiateAcc
137
138 void
140 {
141 VecOperandU32 vdst(gpuDynInst, extData.VDST);
142
143 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
144 if (gpuDynInst->exec_mask[lane]) {
145 vdst[lane] = (VecElemI32)((reinterpret_cast<VecElemI8*>(
146 gpuDynInst->d_data))[lane]);
147 }
148 }
149 vdst.write();
150 } // execute
151 // --- Inst_FLAT__FLAT_LOAD_USHORT class methods ---
152
154 : Inst_FLAT(iFmt, "flat_load_ushort")
155 {
156 setFlag(MemoryRef);
157 setFlag(Load);
158 } // Inst_FLAT__FLAT_LOAD_USHORT
159
161 {
162 } // ~Inst_FLAT__FLAT_LOAD_USHORT
163
164 // --- description from .arch file ---
165 // Untyped buffer load unsigned short (zero extend to VGPR destination).
166 void
168 {
169 Wavefront *wf = gpuDynInst->wavefront();
170
171 if (gpuDynInst->exec_mask.none()) {
172 wf->decVMemInstsIssued();
173 if (isFlat()) {
174 wf->decLGKMInstsIssued();
175 }
176 return;
177 }
178
179 gpuDynInst->execUnitId = wf->execUnitId;
180 gpuDynInst->latency.init(gpuDynInst->computeUnit());
181 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
182
184
185 issueRequestHelper(gpuDynInst);
186 } // execute
187
188 void
190 {
191 initMemRead<VecElemU16>(gpuDynInst);
192 } // initiateAcc
193
194 void
196 {
197 VecOperandU32 vdst(gpuDynInst, extData.VDST);
198
199 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
200 if (gpuDynInst->exec_mask[lane]) {
201 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
202 gpuDynInst->d_data))[lane]);
203 }
204 }
205 vdst.write();
206 } // execute
207
208 // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods ---
209
211 : Inst_FLAT(iFmt, "flat_load_sshort")
212 {
213 setFlag(MemoryRef);
214 setFlag(Load);
215 } // Inst_FLAT__FLAT_LOAD_SSHORT
216
218 {
219 } // ~Inst_FLAT__FLAT_LOAD_SSHORT
220
221 // --- description from .arch file ---
222 // Untyped buffer load signed short (sign extend to VGPR destination).
223 void
228
229 void
231 {
232 } // initiateAcc
233
234 void
236 {
237 } // execute
238 // --- Inst_FLAT__FLAT_LOAD_DWORD class methods ---
239
241 : Inst_FLAT(iFmt, "flat_load_dword")
242 {
243 setFlag(MemoryRef);
244 setFlag(Load);
245 } // Inst_FLAT__FLAT_LOAD_DWORD
246
248 {
249 } // ~Inst_FLAT__FLAT_LOAD_DWORD
250
251 // --- description from .arch file ---
252 // Untyped buffer load dword.
253 void
255 {
256 Wavefront *wf = gpuDynInst->wavefront();
257
258 if (gpuDynInst->exec_mask.none()) {
259 wf->decVMemInstsIssued();
260 if (isFlat()) {
261 wf->decLGKMInstsIssued();
262 }
263 return;
264 }
265
266 gpuDynInst->execUnitId = wf->execUnitId;
267 gpuDynInst->latency.init(gpuDynInst->computeUnit());
268 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
269
271
272 issueRequestHelper(gpuDynInst);
273 } // execute
274
275 void
277 {
278 initMemRead<VecElemU32>(gpuDynInst);
279 } // initiateAcc
280
281 void
283 {
284 VecOperandU32 vdst(gpuDynInst, extData.VDST);
285
286 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
287 if (gpuDynInst->exec_mask[lane]) {
288 vdst[lane] = (reinterpret_cast<VecElemU32*>(
289 gpuDynInst->d_data))[lane];
290 }
291 }
292 vdst.write();
293 } // completeAcc
294 // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods ---
295
297 InFmt_FLAT *iFmt)
298 : Inst_FLAT(iFmt, "flat_load_dwordx2")
299 {
300 setFlag(MemoryRef);
301 setFlag(Load);
302 } // Inst_FLAT__FLAT_LOAD_DWORDX2
303
305 {
306 } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
307
308 // --- description from .arch file ---
309 // Untyped buffer load 2 dwords.
310 void
312 {
313 Wavefront *wf = gpuDynInst->wavefront();
314
315 if (gpuDynInst->exec_mask.none()) {
316 wf->decVMemInstsIssued();
317 if (isFlat()) {
318 wf->decLGKMInstsIssued();
319 }
320 return;
321 }
322
323 gpuDynInst->execUnitId = wf->execUnitId;
324 gpuDynInst->latency.init(gpuDynInst->computeUnit());
325 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
326
328
329 issueRequestHelper(gpuDynInst);
330 } // execute
331
332 void
334 {
335 initMemRead<VecElemU64>(gpuDynInst);
336 } // initiateAcc
337
338 void
340 {
341 VecOperandU64 vdst(gpuDynInst, extData.VDST);
342
343 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
344 if (gpuDynInst->exec_mask[lane]) {
345 vdst[lane] = (reinterpret_cast<VecElemU64*>(
346 gpuDynInst->d_data))[lane];
347 }
348 }
349 vdst.write();
350 } // completeAcc
351 // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods ---
352
354 InFmt_FLAT *iFmt)
355 : Inst_FLAT(iFmt, "flat_load_dwordx3")
356 {
357 setFlag(MemoryRef);
358 setFlag(Load);
359 } // Inst_FLAT__FLAT_LOAD_DWORDX3
360
362 {
363 } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
364
365 // --- description from .arch file ---
366 // Untyped buffer load 3 dwords.
367 void
369 {
370 Wavefront *wf = gpuDynInst->wavefront();
371
372 if (gpuDynInst->exec_mask.none()) {
373 wf->decVMemInstsIssued();
374 if (isFlat()) {
375 wf->decLGKMInstsIssued();
376 }
377 return;
378 }
379
380 gpuDynInst->execUnitId = wf->execUnitId;
381 gpuDynInst->latency.init(gpuDynInst->computeUnit());
382 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
383
385
386 issueRequestHelper(gpuDynInst);
387 } // execute
388
389 void
391 {
392 initMemRead<3>(gpuDynInst);
393 } // initiateAcc
394
395 void
397 {
398 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
399 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
400 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
401
402 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
403 if (gpuDynInst->exec_mask[lane]) {
404 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
405 gpuDynInst->d_data))[lane * 3];
406 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
407 gpuDynInst->d_data))[lane * 3 + 1];
408 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
409 gpuDynInst->d_data))[lane * 3 + 2];
410 }
411 }
412
413 vdst0.write();
414 vdst1.write();
415 vdst2.write();
416 } // completeAcc
417 // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods ---
418
420 InFmt_FLAT *iFmt)
421 : Inst_FLAT(iFmt, "flat_load_dwordx4")
422 {
423 setFlag(MemoryRef);
424 setFlag(Load);
425 } // Inst_FLAT__FLAT_LOAD_DWORDX4
426
428 {
429 } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
430
431 // --- description from .arch file ---
432 // Untyped buffer load 4 dwords.
433 void
435 {
436 Wavefront *wf = gpuDynInst->wavefront();
437
438 if (gpuDynInst->exec_mask.none()) {
439 wf->decVMemInstsIssued();
440 if (isFlat()) {
441 wf->decLGKMInstsIssued();
442 }
443 return;
444 }
445
446 gpuDynInst->execUnitId = wf->execUnitId;
447 gpuDynInst->latency.init(gpuDynInst->computeUnit());
448 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
449
451
452 issueRequestHelper(gpuDynInst);
453 } // execute
454
455 void
457 {
458 initMemRead<4>(gpuDynInst);
459 } // initiateAcc
460
461 void
463 {
464 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
465 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
466 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
467 VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
468
469 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
470 if (gpuDynInst->exec_mask[lane]) {
471 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
472 gpuDynInst->d_data))[lane * 4];
473 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
474 gpuDynInst->d_data))[lane * 4 + 1];
475 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
476 gpuDynInst->d_data))[lane * 4 + 2];
477 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
478 gpuDynInst->d_data))[lane * 4 + 3];
479 }
480 }
481
482 vdst0.write();
483 vdst1.write();
484 vdst2.write();
485 vdst3.write();
486 } // completeAcc
487 // --- Inst_FLAT__FLAT_STORE_BYTE class methods ---
488
490 : Inst_FLAT(iFmt, "flat_store_byte")
491 {
492 setFlag(MemoryRef);
493 setFlag(Store);
494 } // Inst_FLAT__FLAT_STORE_BYTE
495
497 {
498 } // ~Inst_FLAT__FLAT_STORE_BYTE
499
500 // --- description from .arch file ---
501 // Untyped buffer store byte.
502 void
504 {
505 Wavefront *wf = gpuDynInst->wavefront();
506
507 if (gpuDynInst->exec_mask.none()) {
508 wf->decVMemInstsIssued();
509 if (isFlat()) {
510 wf->decLGKMInstsIssued();
511 }
512 wf->decExpInstsIssued();
513 return;
514 }
515
516 gpuDynInst->execUnitId = wf->execUnitId;
517 gpuDynInst->latency.init(gpuDynInst->computeUnit());
518 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
519
520 ConstVecOperandU8 data(gpuDynInst, extData.DATA);
521
522 data.read();
523
525
526 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
527 if (gpuDynInst->exec_mask[lane]) {
528 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
529 = data[lane];
530 }
531 }
532
533 issueRequestHelper(gpuDynInst);
534 } // execute
535
536 void
538 {
539 initMemWrite<VecElemU8>(gpuDynInst);
540 } // initiateAcc
541
542 void
544 {
545 } // execute
546 // --- Inst_FLAT__FLAT_STORE_SHORT class methods ---
547
549 : Inst_FLAT(iFmt, "flat_store_short")
550 {
551 setFlag(MemoryRef);
552 setFlag(Store);
553 } // Inst_FLAT__FLAT_STORE_SHORT
554
556 {
557 } // ~Inst_FLAT__FLAT_STORE_SHORT
558
559 // --- description from .arch file ---
560 // Untyped buffer store short.
561 void
563 {
564 Wavefront *wf = gpuDynInst->wavefront();
565
566 if (gpuDynInst->exec_mask.none()) {
567 wf->decVMemInstsIssued();
568 if (isFlat()) {
569 wf->decLGKMInstsIssued();
570 }
571 wf->decExpInstsIssued();
572 return;
573 }
574
575 gpuDynInst->execUnitId = wf->execUnitId;
576 gpuDynInst->latency.init(gpuDynInst->computeUnit());
577 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
578
579 ConstVecOperandU16 data(gpuDynInst, extData.DATA);
580
581 data.read();
582
584
585 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
586 if (gpuDynInst->exec_mask[lane]) {
587 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
588 = data[lane];
589 }
590 }
591
592 issueRequestHelper(gpuDynInst);
593 } // execute
594
595 void
597 {
598 initMemWrite<VecElemU16>(gpuDynInst);
599 } // initiateAcc
600
601 void
603 {
604 } // completeAcc
605 // --- Inst_FLAT__FLAT_STORE_SHORT_D16_HI class methods ---
606
609 : Inst_FLAT(iFmt, "flat_store_short_d16_hi")
610 {
611 setFlag(MemoryRef);
612 setFlag(Store);
613 } // Inst_FLAT__FLAT_STORE_SHORT_D16_HI
614
616 {
617 } // ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI
618
619 // --- description from .arch file ---
620 // Untyped buffer store short.
621 void
623 {
624 Wavefront *wf = gpuDynInst->wavefront();
625
626 if (gpuDynInst->exec_mask.none()) {
627 wf->decVMemInstsIssued();
628 if (isFlat()) {
629 wf->decLGKMInstsIssued();
630 }
631 wf->decExpInstsIssued();
632 return;
633 }
634
635 gpuDynInst->execUnitId = wf->execUnitId;
636 gpuDynInst->latency.init(gpuDynInst->computeUnit());
637 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
638
639 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
640
641 data.read();
642
644
645 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
646 if (gpuDynInst->exec_mask[lane]) {
647 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
648 = (data[lane] >> 16);
649 }
650 }
651
652 issueRequestHelper(gpuDynInst);
653 } // execute
654
655 void
657 {
658 initMemWrite<VecElemU16>(gpuDynInst);
659 } // initiateAcc
660
661 void
665 // --- Inst_FLAT__FLAT_STORE_DWORD class methods ---
666
668 : Inst_FLAT(iFmt, "flat_store_dword")
669 {
670 setFlag(MemoryRef);
671 setFlag(Store);
672 } // Inst_FLAT__FLAT_STORE_DWORD
673
675 {
676 } // ~Inst_FLAT__FLAT_STORE_DWORD
677
678 // --- description from .arch file ---
679 // Untyped buffer store dword.
680 void
682 {
683 Wavefront *wf = gpuDynInst->wavefront();
684
685 if (gpuDynInst->exec_mask.none()) {
686 wf->decVMemInstsIssued();
687 if (isFlat()) {
688 wf->decLGKMInstsIssued();
689 }
690 wf->decExpInstsIssued();
691 return;
692 }
693
694 gpuDynInst->execUnitId = wf->execUnitId;
695 gpuDynInst->latency.init(gpuDynInst->computeUnit());
696 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
697
698 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
699
700 data.read();
701
703
704 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
705 if (gpuDynInst->exec_mask[lane]) {
706 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
707 = data[lane];
708 }
709 }
710
711 issueRequestHelper(gpuDynInst);
712 } // execute
713
714 void
716 {
717 initMemWrite<VecElemU32>(gpuDynInst);
718 } // initiateAcc
719
720 void
722 {
723 } // completeAcc
724 // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods ---
725
727 InFmt_FLAT *iFmt)
728 : Inst_FLAT(iFmt, "flat_store_dwordx2")
729 {
730 setFlag(MemoryRef);
731 setFlag(Store);
732 } // Inst_FLAT__FLAT_STORE_DWORDX2
733
735 {
736 } // ~Inst_FLAT__FLAT_STORE_DWORDX2
737
738 // --- description from .arch file ---
739 // Untyped buffer store 2 dwords.
740 void
742 {
743 Wavefront *wf = gpuDynInst->wavefront();
744
745 if (gpuDynInst->exec_mask.none()) {
746 wf->decVMemInstsIssued();
747 if (isFlat()) {
748 wf->decLGKMInstsIssued();
749 }
750 wf->decExpInstsIssued();
751 return;
752 }
753
754 gpuDynInst->execUnitId = wf->execUnitId;
755 gpuDynInst->latency.init(gpuDynInst->computeUnit());
756 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
757
758 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
759
760 data.read();
761
763
764 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
765 if (gpuDynInst->exec_mask[lane]) {
766 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
767 = data[lane];
768 }
769 }
770
771 issueRequestHelper(gpuDynInst);
772 } // execute
773
774 void
776 {
777 initMemWrite<VecElemU64>(gpuDynInst);
778 } // initiateAcc
779
780 void
782 {
783 } // completeAcc
784 // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods ---
785
787 InFmt_FLAT *iFmt)
788 : Inst_FLAT(iFmt, "flat_store_dwordx3")
789 {
790 setFlag(MemoryRef);
791 setFlag(Store);
792 } // Inst_FLAT__FLAT_STORE_DWORDX3
793
795 {
796 } // ~Inst_FLAT__FLAT_STORE_DWORDX3
797
798 // --- description from .arch file ---
799 // Untyped buffer store 3 dwords.
800 void
802 {
803 Wavefront *wf = gpuDynInst->wavefront();
804
805 if (gpuDynInst->exec_mask.none()) {
806 wf->decVMemInstsIssued();
807 if (isFlat()) {
808 wf->decLGKMInstsIssued();
809 }
810 wf->decExpInstsIssued();
811 return;
812 }
813
814 gpuDynInst->execUnitId = wf->execUnitId;
815 gpuDynInst->latency.init(gpuDynInst->computeUnit());
816 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
817
818 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
819 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
820 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
821
822 data0.read();
823 data1.read();
824 data2.read();
825
827
828 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
829 if (gpuDynInst->exec_mask[lane]) {
830 (reinterpret_cast<VecElemU32*>(
831 gpuDynInst->d_data))[lane * 3] = data0[lane];
832 (reinterpret_cast<VecElemU32*>(
833 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
834 (reinterpret_cast<VecElemU32*>(
835 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
836 }
837 }
838
839 issueRequestHelper(gpuDynInst);
840 } // execute
841
842 void
844 {
845 initMemWrite<3>(gpuDynInst);
846 } // initiateAcc
847
848 void
850 {
851 } // completeAcc
852 // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods ---
853
855 InFmt_FLAT *iFmt)
856 : Inst_FLAT(iFmt, "flat_store_dwordx4")
857 {
858 setFlag(MemoryRef);
859 setFlag(Store);
860 } // Inst_FLAT__FLAT_STORE_DWORDX4
861
863 {
864 } // ~Inst_FLAT__FLAT_STORE_DWORDX4
865
866 // --- description from .arch file ---
867 // Untyped buffer store 4 dwords.
868 void
870 {
871 Wavefront *wf = gpuDynInst->wavefront();
872
873 if (gpuDynInst->exec_mask.none()) {
874 wf->decVMemInstsIssued();
875 if (isFlat()) {
876 wf->decLGKMInstsIssued();
877 }
878 wf->decExpInstsIssued();
879 return;
880 }
881
882 gpuDynInst->execUnitId = wf->execUnitId;
883 gpuDynInst->latency.init(gpuDynInst->computeUnit());
884 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
885
886 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
887 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
888 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
889 ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
890
891 data0.read();
892 data1.read();
893 data2.read();
894 data3.read();
895
897
898 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
899 if (gpuDynInst->exec_mask[lane]) {
900 (reinterpret_cast<VecElemU32*>(
901 gpuDynInst->d_data))[lane * 4] = data0[lane];
902 (reinterpret_cast<VecElemU32*>(
903 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
904 (reinterpret_cast<VecElemU32*>(
905 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
906 (reinterpret_cast<VecElemU32*>(
907 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
908 }
909 }
910
911 issueRequestHelper(gpuDynInst);
912 } // execute
913
914 void
916 {
917 initMemWrite<4>(gpuDynInst);
918 } // initiateAcc
919
920 void
922 {
923 } // completeAcc
924 // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods ---
925
927 : Inst_FLAT(iFmt, "flat_atomic_swap")
928 {
929 setFlag(AtomicExch);
930 if (instData.GLC) {
931 setFlag(AtomicReturn);
932 } else {
933 setFlag(AtomicNoReturn);
934 }
935 setFlag(MemoryRef);
936 } // Inst_FLAT__FLAT_ATOMIC_SWAP
937
939 {
940 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
941
942 // --- description from .arch file ---
943 // 32b:
944 // tmp = MEM[ADDR];
945 // MEM[ADDR] = DATA;
946 // RETURN_DATA = tmp.
947 void
952
953 void
955 {
957 } // initiateAcc
958
959 void
964
965 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
966
967 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
968 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
969 : Inst_FLAT(iFmt, "flat_atomic_cmpswap")
970 {
971 setFlag(AtomicCAS);
972 if (instData.GLC) {
973 setFlag(AtomicReturn);
974 } else {
975 setFlag(AtomicNoReturn);
976 }
977 setFlag(MemoryRef);
978 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
979
981 {
982 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
983
984 // --- description from .arch file ---
985 // 32b:
986 // tmp = MEM[ADDR];
987 // src = DATA[0];
988 // cmp = DATA[1];
989 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
990 // RETURN_DATA[0] = tmp.
991 void
996
997 void
999 {
1000 initAtomicAccess<VecElemU32>(gpuDynInst);
1001 } // initiateAcc
1002
1003 void
1008 // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods ---
1009
1011 : Inst_FLAT(iFmt, "flat_atomic_add")
1012 {
1013 setFlag(AtomicAdd);
1014 if (instData.GLC) {
1015 setFlag(AtomicReturn);
1016 } else {
1017 setFlag(AtomicNoReturn);
1018 }
1019 setFlag(MemoryRef);
1020 } // Inst_FLAT__FLAT_ATOMIC_ADD
1021
1023 {
1024 } // ~Inst_FLAT__FLAT_ATOMIC_ADD
1025
1026 // --- description from .arch file ---
1027 // 32b:
1028 // tmp = MEM[ADDR];
1029 // MEM[ADDR] += DATA;
1030 // RETURN_DATA = tmp.
1031 void
1036
1037 void
1039 {
1040 initAtomicAccess<VecElemU32>(gpuDynInst);
1041 } // initiateAcc
1042
1043 void
1048 // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods ---
1049
1051 : Inst_FLAT(iFmt, "flat_atomic_sub")
1052 {
1053 setFlag(AtomicSub);
1054 if (instData.GLC) {
1055 setFlag(AtomicReturn);
1056 } else {
1057 setFlag(AtomicNoReturn);
1058 }
1059 setFlag(MemoryRef);
1060 } // Inst_FLAT__FLAT_ATOMIC_SUB
1061
1063 {
1064 } // ~Inst_FLAT__FLAT_ATOMIC_SUB
1065
1066 // --- description from .arch file ---
1067 // 32b:
1068 // tmp = MEM[ADDR];
1069 // MEM[ADDR] -= DATA;
1070 // RETURN_DATA = tmp.
1071 void
1076
1077 void
1079 {
1080 initAtomicAccess<VecElemU32>(gpuDynInst);
1081 } // initiateAcc
1082
1083 void
1088 // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods ---
1089
1091 : Inst_FLAT(iFmt, "flat_atomic_smin")
1092 {
1093 setFlag(AtomicMin);
1094 if (instData.GLC) {
1095 setFlag(AtomicReturn);
1096 } else {
1097 setFlag(AtomicNoReturn);
1098 }
1099 setFlag(MemoryRef);
1100 } // Inst_FLAT__FLAT_ATOMIC_SMIN
1101
1103 {
1104 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
1105
1106 // --- description from .arch file ---
1107 // 32b:
1108 // tmp = MEM[ADDR];
1109 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
1110 // RETURN_DATA = tmp.
1111 void
1116
1117 void
1119 {
1120 initAtomicAccess<VecElemI32>(gpuDynInst);
1121 } // initiateAcc
1122
1123 void
1128 // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods ---
1129
1131 : Inst_FLAT(iFmt, "flat_atomic_umin")
1132 {
1133 setFlag(AtomicMin);
1134 if (instData.GLC) {
1135 setFlag(AtomicReturn);
1136 } else {
1137 setFlag(AtomicNoReturn);
1138 }
1139 setFlag(MemoryRef);
1140 } // Inst_FLAT__FLAT_ATOMIC_UMIN
1141
1143 {
1144 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
1145
1146 // --- description from .arch file ---
1147 // 32b:
1148 // tmp = MEM[ADDR];
1149 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
1150 // RETURN_DATA = tmp.
1151 void
1156
1157 void
1159 {
1160 initAtomicAccess<VecElemU32>(gpuDynInst);
1161 } // initiateAcc
1162
1163 void
1168 // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods ---
1169
1171 : Inst_FLAT(iFmt, "flat_atomic_smax")
1172 {
1173 setFlag(AtomicMax);
1174 if (instData.GLC) {
1175 setFlag(AtomicReturn);
1176 } else {
1177 setFlag(AtomicNoReturn);
1178 }
1179 setFlag(MemoryRef);
1180 } // Inst_FLAT__FLAT_ATOMIC_SMAX
1181
1183 {
1184 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
1185
1186 // --- description from .arch file ---
1187 // 32b:
1188 // tmp = MEM[ADDR];
1189 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
1190 // RETURN_DATA = tmp.
1191 void
1196
1197 void
1199 {
1200 initAtomicAccess<VecElemI32>(gpuDynInst);
1201 } // initiateAcc
1202
1203 void
1208 // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods ---
1209
1211 : Inst_FLAT(iFmt, "flat_atomic_umax")
1212 {
1213 setFlag(AtomicMax);
1214 if (instData.GLC) {
1215 setFlag(AtomicReturn);
1216 } else {
1217 setFlag(AtomicNoReturn);
1218 }
1219 setFlag(MemoryRef);
1220 } // Inst_FLAT__FLAT_ATOMIC_UMAX
1221
1223 {
1224 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
1225
1226 // --- description from .arch file ---
1227 // 32b:
1228 // tmp = MEM[ADDR];
1229 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
1230 // RETURN_DATA = tmp.
1231 void
1236
1237 void
1239 {
1240 initAtomicAccess<VecElemU32>(gpuDynInst);
1241 } // initiateAcc
1242
1243 void
1248 // --- Inst_FLAT__FLAT_ATOMIC_AND class methods ---
1249
1251 : Inst_FLAT(iFmt, "flat_atomic_and")
1252 {
1253 setFlag(AtomicAnd);
1254 if (instData.GLC) {
1255 setFlag(AtomicReturn);
1256 } else {
1257 setFlag(AtomicNoReturn);
1258 }
1259 setFlag(MemoryRef);
1260 } // Inst_FLAT__FLAT_ATOMIC_AND
1261
1263 {
1264 } // ~Inst_FLAT__FLAT_ATOMIC_AND
1265
1266 // --- description from .arch file ---
1267 // 32b:
1268 // tmp = MEM[ADDR];
1269 // MEM[ADDR] &= DATA;
1270 // RETURN_DATA = tmp.
1271 void
1276
1277 void
1279 {
1280 initAtomicAccess<VecElemU32>(gpuDynInst);
1281 } // initiateAcc
1282
1283 void
1288 // --- Inst_FLAT__FLAT_ATOMIC_OR class methods ---
1289
1291 : Inst_FLAT(iFmt, "flat_atomic_or")
1292 {
1293 setFlag(AtomicOr);
1294 if (instData.GLC) {
1295 setFlag(AtomicReturn);
1296 } else {
1297 setFlag(AtomicNoReturn);
1298 }
1299 setFlag(MemoryRef);
1300 } // Inst_FLAT__FLAT_ATOMIC_OR
1301
1303 {
1304 } // ~Inst_FLAT__FLAT_ATOMIC_OR
1305
1306 // --- description from .arch file ---
1307 // 32b:
1308 // tmp = MEM[ADDR];
1309 // MEM[ADDR] |= DATA;
1310 // RETURN_DATA = tmp.
1311 void
1316
1317 void
1319 {
1320 initAtomicAccess<VecElemU32>(gpuDynInst);
1321 } // initiateAcc
1322
1323 void
1328
1329 // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods ---
1330
1332 : Inst_FLAT(iFmt, "flat_atomic_xor")
1333 {
1334 setFlag(AtomicXor);
1335 if (instData.GLC) {
1336 setFlag(AtomicReturn);
1337 } else {
1338 setFlag(AtomicNoReturn);
1339 }
1340 setFlag(MemoryRef);
1341 } // Inst_FLAT__FLAT_ATOMIC_XOR
1342
1344 {
1345 } // ~Inst_FLAT__FLAT_ATOMIC_XOR
1346
1347 // --- description from .arch file ---
1348 // 32b:
1349 // tmp = MEM[ADDR];
1350 // MEM[ADDR] ^= DATA;
1351 // RETURN_DATA = tmp.
1352 void
1357
1358 void
1360 {
1361 initAtomicAccess<VecElemU32>(gpuDynInst);
1362 } // initiateAcc
1363
1364 void
1369 // --- Inst_FLAT__FLAT_ATOMIC_INC class methods ---
1370
1372 : Inst_FLAT(iFmt, "flat_atomic_inc")
1373 {
1374 setFlag(AtomicInc);
1375 if (instData.GLC) {
1376 setFlag(AtomicReturn);
1377 } else {
1378 setFlag(AtomicNoReturn);
1379 }
1380 setFlag(MemoryRef);
1381 } // Inst_FLAT__FLAT_ATOMIC_INC
1382
1384 {
1385 } // ~Inst_FLAT__FLAT_ATOMIC_INC
1386
1387 // --- description from .arch file ---
1388 // 32b:
1389 // tmp = MEM[ADDR];
1390 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
1391 // RETURN_DATA = tmp.
1392 void
1397
1398 void
1400 {
1401 initAtomicAccess<VecElemU32>(gpuDynInst);
1402 } // initiateAcc
1403
1404 void
1409 // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods ---
1410
1412 : Inst_FLAT(iFmt, "flat_atomic_dec")
1413 {
1414 setFlag(AtomicDec);
1415 if (instData.GLC) {
1416 setFlag(AtomicReturn);
1417 } else {
1418 setFlag(AtomicNoReturn);
1419 }
1420 setFlag(MemoryRef);
1421 } // Inst_FLAT__FLAT_ATOMIC_DEC
1422
1424 {
1425 } // ~Inst_FLAT__FLAT_ATOMIC_DEC
1426
1427 // --- description from .arch file ---
1428 // 32b:
1429 // tmp = MEM[ADDR];
1430 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
1431 // (unsigned compare); RETURN_DATA = tmp.
1432 void
1437
1438 void
1440 {
1441 initAtomicAccess<VecElemU32>(gpuDynInst);
1442 } // initiateAcc
1443
1444 void
1449 // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods ---
1450
1452 InFmt_FLAT *iFmt)
1453 : Inst_FLAT(iFmt, "flat_atomic_swap_x2")
1454 {
1455 setFlag(AtomicExch);
1456 if (instData.GLC) {
1457 setFlag(AtomicReturn);
1458 } else {
1459 setFlag(AtomicNoReturn);
1460 }
1461 setFlag(MemoryRef);
1462 } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
1463
1465 {
1466 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
1467
1468 // --- description from .arch file ---
1469 // 64b:
1470 // tmp = MEM[ADDR];
1471 // MEM[ADDR] = DATA[0:1];
1472 // RETURN_DATA[0:1] = tmp.
1473 void
1478
1479 void
1481 {
1482 initAtomicAccess<VecElemU64>(gpuDynInst);
1483 } // initiateAcc
1484
1485 void
1490 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods ---
1491
1493 InFmt_FLAT *iFmt)
1494 : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
1495 {
1496 setFlag(AtomicCAS);
1497 if (instData.GLC) {
1498 setFlag(AtomicReturn);
1499 } else {
1500 setFlag(AtomicNoReturn);
1501 }
1502 setFlag(MemoryRef);
1503 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
1504
1506 {
1507 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
1508
1509 // --- description from .arch file ---
1510 // 64b:
1511 // tmp = MEM[ADDR];
1512 // src = DATA[0:1];
1513 // cmp = DATA[2:3];
1514 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1515 // RETURN_DATA[0:1] = tmp.
1516 void
1521
1522 void
1524 {
1525 initAtomicAccess<VecElemU64>(gpuDynInst);
1526 } // initiateAcc
1527
1528 void
1533 // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods ---
1534
1536 InFmt_FLAT *iFmt)
1537 : Inst_FLAT(iFmt, "flat_atomic_add_x2")
1538 {
1539 setFlag(AtomicAdd);
1540 if (instData.GLC) {
1541 setFlag(AtomicReturn);
1542 } else {
1543 setFlag(AtomicNoReturn);
1544 }
1545 setFlag(MemoryRef);
1546 } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
1547
1549 {
1550 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
1551
1552 // --- description from .arch file ---
1553 // 64b:
1554 // tmp = MEM[ADDR];
1555 // MEM[ADDR] += DATA[0:1];
1556 // RETURN_DATA[0:1] = tmp.
1557 void
1562
1563 void
1565 {
1566 initAtomicAccess<VecElemU64>(gpuDynInst);
1567 } // initiateAcc
1568
1569 void
1574 // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods ---
1575
1577 InFmt_FLAT *iFmt)
1578 : Inst_FLAT(iFmt, "flat_atomic_sub_x2")
1579 {
1580 setFlag(AtomicSub);
1581 if (instData.GLC) {
1582 setFlag(AtomicReturn);
1583 } else {
1584 setFlag(AtomicNoReturn);
1585 }
1586 setFlag(MemoryRef);
1587 } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
1588
1590 {
1591 } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
1592
1593 // --- description from .arch file ---
1594 // 64b:
1595 // tmp = MEM[ADDR];
1596 // MEM[ADDR] -= DATA[0:1];
1597 // RETURN_DATA[0:1] = tmp.
1598 void
1603
1604 void
1606 {
1607 initAtomicAccess<VecElemU64>(gpuDynInst);
1608 } // initiateAcc
1609
1610 void
1615 // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods ---
1616
1618 InFmt_FLAT *iFmt)
1619 : Inst_FLAT(iFmt, "flat_atomic_smin_x2")
1620 {
1621 setFlag(AtomicMin);
1622 if (instData.GLC) {
1623 setFlag(AtomicReturn);
1624 } else {
1625 setFlag(AtomicNoReturn);
1626 }
1627 setFlag(MemoryRef);
1628 } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
1629
1631 {
1632 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
1633
1634 // --- description from .arch file ---
1635 // 64b:
1636 // tmp = MEM[ADDR];
1637 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
1638 // RETURN_DATA[0:1] = tmp.
1639 void
1644
1645 void
1647 {
1648 initAtomicAccess<VecElemI64>(gpuDynInst);
1649 } // initiateAcc
1650
1651 void
1656 // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods ---
1657
1659 InFmt_FLAT *iFmt)
1660 : Inst_FLAT(iFmt, "flat_atomic_umin_x2")
1661 {
1662 setFlag(AtomicMin);
1663 if (instData.GLC) {
1664 setFlag(AtomicReturn);
1665 } else {
1666 setFlag(AtomicNoReturn);
1667 }
1668 setFlag(MemoryRef);
1669 } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
1670
1672 {
1673 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
1674
1675 // --- description from .arch file ---
1676 // 64b:
1677 // tmp = MEM[ADDR];
1678 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
1679 // RETURN_DATA[0:1] = tmp.
1680 void
1685
1686 void
1688 {
1689 initAtomicAccess<VecElemU64>(gpuDynInst);
1690 } // initiateAcc
1691
1692 void
1697 // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods ---
1698
1700 InFmt_FLAT *iFmt)
1701 : Inst_FLAT(iFmt, "flat_atomic_smax_x2")
1702 {
1703 setFlag(AtomicMax);
1704 if (instData.GLC) {
1705 setFlag(AtomicReturn);
1706 } else {
1707 setFlag(AtomicNoReturn);
1708 }
1709 setFlag(MemoryRef);
1710 } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
1711
1713 {
1714 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
1715
1716 // --- description from .arch file ---
1717 // 64b:
1718 // tmp = MEM[ADDR];
1719 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
1720 // RETURN_DATA[0:1] = tmp.
1721 void
1726
1727 void
1729 {
1730 initAtomicAccess<VecElemI64>(gpuDynInst);
1731 } // initiateAcc
1732
1733 void
1738 // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods ---
1739
1741 InFmt_FLAT *iFmt)
1742 : Inst_FLAT(iFmt, "flat_atomic_umax_x2")
1743 {
1744 setFlag(AtomicMax);
1745 if (instData.GLC) {
1746 setFlag(AtomicReturn);
1747 } else {
1748 setFlag(AtomicNoReturn);
1749 }
1750 setFlag(MemoryRef);
1751 } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
1752
1754 {
1755 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
1756
1757 // --- description from .arch file ---
1758 // 64b:
1759 // tmp = MEM[ADDR];
1760 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
1761 // RETURN_DATA[0:1] = tmp.
1762 void
1767
1768 void
1770 {
1771 initAtomicAccess<VecElemU64>(gpuDynInst);
1772 } // initiateAcc
1773
1774 void
1779 // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods ---
1780
1782 InFmt_FLAT *iFmt)
1783 : Inst_FLAT(iFmt, "flat_atomic_and_x2")
1784 {
1785 setFlag(AtomicAnd);
1786 if (instData.GLC) {
1787 setFlag(AtomicReturn);
1788 } else {
1789 setFlag(AtomicNoReturn);
1790 }
1791 setFlag(MemoryRef);
1792 } // Inst_FLAT__FLAT_ATOMIC_AND_X2
1793
1795 {
1796 } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
1797
1798 // --- description from .arch file ---
1799 // 64b:
1800 // tmp = MEM[ADDR];
1801 // MEM[ADDR] &= DATA[0:1];
1802 // RETURN_DATA[0:1] = tmp.
1803 void
1808
1809 void
1811 {
1812 initAtomicAccess<VecElemU64>(gpuDynInst);
1813 } // initiateAcc
1814
1815 void
1820 // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods ---
1821
1823 InFmt_FLAT *iFmt)
1824 : Inst_FLAT(iFmt, "flat_atomic_or_x2")
1825 {
1826 setFlag(AtomicOr);
1827 if (instData.GLC) {
1828 setFlag(AtomicReturn);
1829 } else {
1830 setFlag(AtomicNoReturn);
1831 }
1832 setFlag(MemoryRef);
1833 } // Inst_FLAT__FLAT_ATOMIC_OR_X2
1834
1836 {
1837 } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
1838
1839 // --- description from .arch file ---
1840 // 64b:
1841 // tmp = MEM[ADDR];
1842 // MEM[ADDR] |= DATA[0:1];
1843 // RETURN_DATA[0:1] = tmp.
1844 void
1849
1850 void
1852 {
1853 initAtomicAccess<VecElemU64>(gpuDynInst);
1854 } // initiateAcc
1855
1856 void
1861 // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods ---
1862
1864 InFmt_FLAT *iFmt)
1865 : Inst_FLAT(iFmt, "flat_atomic_xor_x2")
1866 {
1867 setFlag(AtomicXor);
1868 if (instData.GLC) {
1869 setFlag(AtomicReturn);
1870 } else {
1871 setFlag(AtomicNoReturn);
1872 }
1873 setFlag(MemoryRef);
1874 } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
1875
1877 {
1878 } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
1879
1880 // --- description from .arch file ---
1881 // 64b:
1882 // tmp = MEM[ADDR];
1883 // MEM[ADDR] ^= DATA[0:1];
1884 // RETURN_DATA[0:1] = tmp.
1885 void
1890
1891 void
1893 {
1894 initAtomicAccess<VecElemU64>(gpuDynInst);
1895 } // initiateAcc
1896
1897 void
1902 // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods ---
1903
1905 InFmt_FLAT *iFmt)
1906 : Inst_FLAT(iFmt, "flat_atomic_inc_x2")
1907 {
1908 setFlag(AtomicInc);
1909 if (instData.GLC) {
1910 setFlag(AtomicReturn);
1911 } else {
1912 setFlag(AtomicNoReturn);
1913 }
1914 setFlag(MemoryRef);
1915 } // Inst_FLAT__FLAT_ATOMIC_INC_X2
1916
1918 {
1919 } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
1920
1921 // --- description from .arch file ---
1922 // 64b:
1923 // tmp = MEM[ADDR];
1924 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
1925 // RETURN_DATA[0:1] = tmp.
1926 void
1931
1932 void
1934 {
1935 initAtomicAccess<VecElemU64>(gpuDynInst);
1936 } // initiateAcc
1937
1938 void
1943 // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods ---
1944
1946 InFmt_FLAT *iFmt)
1947 : Inst_FLAT(iFmt, "flat_atomic_dec_x2")
1948 {
1949 setFlag(AtomicDec);
1950 if (instData.GLC) {
1951 setFlag(AtomicReturn);
1952 } else {
1953 setFlag(AtomicNoReturn);
1954 }
1955 setFlag(MemoryRef);
1956 } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
1957
1959 {
1960 } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
1961
1962 // --- description from .arch file ---
1963 // 64b:
1964 // tmp = MEM[ADDR];
1965 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
1966 // (unsigned compare);
1967 // RETURN_DATA[0:1] = tmp.
1968 void
1973
1974 void
1976 {
1977 initAtomicAccess<VecElemU64>(gpuDynInst);
1978 } // initiateAcc
1979
1980 void
1985 // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods ---
1986
1988 InFmt_FLAT *iFmt)
1989 : Inst_FLAT(iFmt, "flat_atomic_add_f32")
1990 {
1991 setFlag(AtomicAdd);
1992 if (instData.GLC) {
1993 setFlag(AtomicReturn);
1994 } else {
1995 setFlag(AtomicNoReturn);
1996 }
1997 setFlag(MemoryRef);
1998 } // Inst_FLAT__FLAT_ATOMIC_ADD_F32
1999
2001 {
2002 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32
2003
2004 void
2009
2010 void
2012 {
2013 initAtomicAccess<VecElemF32>(gpuDynInst);
2014 } // initiateAcc
2015
2016 void
2021 // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods ---
2022
2024 InFmt_FLAT *iFmt)
2025 : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16")
2026 {
2027 setFlag(AtomicAdd);
2028 if (instData.GLC) {
2029 setFlag(AtomicReturn);
2030 } else {
2031 setFlag(AtomicNoReturn);
2032 }
2033 setFlag(MemoryRef);
2034 } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16
2035
2037 {
2038 } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16
2039
2040 void
2045
2046 void
2048 {
2049 } // initiateAcc
2050
2051 void
2053 {
2054 } // completeAcc
2055 // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods ---
2056
2058 InFmt_FLAT *iFmt)
2059 : Inst_FLAT(iFmt, "flat_atomic_add_f64")
2060 {
2061 setFlag(AtomicAdd);
2062 if (instData.GLC) {
2063 setFlag(AtomicReturn);
2064 } else {
2065 setFlag(AtomicNoReturn);
2066 }
2067 setFlag(MemoryRef);
2068 } // Inst_FLAT__FLAT_ATOMIC_ADD_F64
2069
2071 {
2072 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64
2073
2074 void
2079
2080 void
2082 {
2083 initAtomicAccess<VecElemF64>(gpuDynInst);
2084 } // initiateAcc
2085
2086 void
2091 // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods ---
2092
2094 InFmt_FLAT *iFmt)
2095 : Inst_FLAT(iFmt, "flat_atomic_min_f64")
2096 {
2097 setFlag(AtomicMin);
2098 if (instData.GLC) {
2099 setFlag(AtomicReturn);
2100 } else {
2101 setFlag(AtomicNoReturn);
2102 }
2103 setFlag(MemoryRef);
2104 } // Inst_FLAT__FLAT_ATOMIC_MIN_F64
2105
2107 {
2108 } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64
2109
2110 void
2115
2116 void
2118 {
2119 initAtomicAccess<VecElemF64>(gpuDynInst);
2120 } // initiateAcc
2121
2122 void
2127 // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods ---
2128
2130 InFmt_FLAT *iFmt)
2131 : Inst_FLAT(iFmt, "flat_atomic_max_f64")
2132 {
2133 setFlag(AtomicMax);
2134 if (instData.GLC) {
2135 setFlag(AtomicReturn);
2136 } else {
2137 setFlag(AtomicNoReturn);
2138 }
2139 setFlag(MemoryRef);
2140 } // Inst_FLAT__FLAT_ATOMIC_MAX_F64
2141
2143 {
2144 } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64
2145
2146 void
2151
2152 void
2154 {
2155 initAtomicAccess<VecElemF64>(gpuDynInst);
2156 } // initiateAcc
2157
2158 void
2163} // namespace VegaISA
2164} // namespace gem5
const char data[]
void setFlag(Flags flag)
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2017
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2011
void execute(GPUDynInstPtr) override
Definition flat.cc:2005
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2087
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2081
void execute(GPUDynInstPtr) override
Definition flat.cc:2075
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1570
void execute(GPUDynInstPtr) override
Definition flat.cc:1558
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1564
void execute(GPUDynInstPtr) override
Definition flat.cc:1032
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1044
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1038
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1810
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1816
void execute(GPUDynInstPtr) override
Definition flat.cc:1804
void execute(GPUDynInstPtr) override
Definition flat.cc:1272
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1278
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1284
void execute(GPUDynInstPtr) override
Definition flat.cc:1517
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1523
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1529
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1004
void execute(GPUDynInstPtr) override
Definition flat.cc:992
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:998
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1981
void execute(GPUDynInstPtr) override
Definition flat.cc:1969
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1975
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1439
void execute(GPUDynInstPtr) override
Definition flat.cc:1433
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1445
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1939
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1933
void execute(GPUDynInstPtr) override
Definition flat.cc:1927
void execute(GPUDynInstPtr) override
Definition flat.cc:1393
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1399
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1405
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2153
void execute(GPUDynInstPtr) override
Definition flat.cc:2147
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2159
void execute(GPUDynInstPtr) override
Definition flat.cc:2111
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2117
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2123
void execute(GPUDynInstPtr) override
Definition flat.cc:1845
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1857
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1851
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1324
void execute(GPUDynInstPtr) override
Definition flat.cc:1312
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1318
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:2047
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:2052
void execute(GPUDynInstPtr) override
Definition flat.cc:2041
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1728
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1734
void execute(GPUDynInstPtr) override
Definition flat.cc:1722
void execute(GPUDynInstPtr) override
Definition flat.cc:1192
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1204
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1198
void execute(GPUDynInstPtr) override
Definition flat.cc:1640
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1652
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1646
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1124
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1118
void execute(GPUDynInstPtr) override
Definition flat.cc:1112
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1605
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1611
void execute(GPUDynInstPtr) override
Definition flat.cc:1599
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1084
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1078
void execute(GPUDynInstPtr) override
Definition flat.cc:1072
void execute(GPUDynInstPtr) override
Definition flat.cc:1474
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1486
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1480
void execute(GPUDynInstPtr) override
Definition flat.cc:948
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:954
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:960
void execute(GPUDynInstPtr) override
Definition flat.cc:1763
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1769
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1775
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1238
void execute(GPUDynInstPtr) override
Definition flat.cc:1232
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1244
void execute(GPUDynInstPtr) override
Definition flat.cc:1681
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1687
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1693
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1158
void execute(GPUDynInstPtr) override
Definition flat.cc:1152
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1164
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1898
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1892
void execute(GPUDynInstPtr) override
Definition flat.cc:1886
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:1365
void execute(GPUDynInstPtr) override
Definition flat.cc:1353
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:1359
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:339
void execute(GPUDynInstPtr) override
Definition flat.cc:311
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:333
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:390
void execute(GPUDynInstPtr) override
Definition flat.cc:368
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:396
void execute(GPUDynInstPtr) override
Definition flat.cc:434
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:456
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:462
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:276
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:282
void execute(GPUDynInstPtr) override
Definition flat.cc:254
void execute(GPUDynInstPtr) override
Definition flat.cc:111
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:133
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:139
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:235
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:230
void execute(GPUDynInstPtr) override
Definition flat.cc:224
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:77
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:83
void execute(GPUDynInstPtr) override
Definition flat.cc:55
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:189
void execute(GPUDynInstPtr) override
Definition flat.cc:167
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:195
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:543
void execute(GPUDynInstPtr) override
Definition flat.cc:503
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:537
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:781
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:775
void execute(GPUDynInstPtr) override
Definition flat.cc:741
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:849
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:843
void execute(GPUDynInstPtr) override
Definition flat.cc:801
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:921
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:915
void execute(GPUDynInstPtr) override
Definition flat.cc:869
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:715
void execute(GPUDynInstPtr) override
Definition flat.cc:681
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:721
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:662
void execute(GPUDynInstPtr) override
Definition flat.cc:622
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:656
void execute(GPUDynInstPtr) override
Definition flat.cc:562
void completeAcc(GPUDynInstPtr) override
Definition flat.cc:602
void initiateAcc(GPUDynInstPtr) override
Definition flat.cc:596
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void initMemRead(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void read() override
read from the vrf.
Definition operand.hh:147
void write() override
write to the vrf.
Definition operand.hh:199
void decExpInstsIssued()
void decVMemInstsIssued()
void decLGKMInstsIssued()
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
uint16_t VecElemU16
uint32_t VecElemU32
uint64_t VecElemU64
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Tue Jun 18 2024 16:23:41 for gem5 by doxygen 1.11.0