gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
ds.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_DS__DS_ADD_U32 class methods ---
40
42 : Inst_DS(iFmt, "ds_add_u32")
43 {
44 setFlag(MemoryRef);
45 setFlag(GroupSegment);
46 setFlag(AtomicAdd);
47 setFlag(AtomicNoReturn);
48 } // Inst_DS__DS_ADD_U32
49
51 {
52 } // ~Inst_DS__DS_ADD_U32
53
54 // --- description from .arch file ---
55 // 32b:
56 // MEM[ADDR] += DATA;
57 void
59 {
60 Wavefront *wf = gpuDynInst->wavefront();
61
62 if (gpuDynInst->exec_mask.none()) {
64 wf->untrackLGKMInst(gpuDynInst);
65 return;
66 }
67
68 gpuDynInst->execUnitId = wf->execUnitId;
69 gpuDynInst->latency.init(gpuDynInst->computeUnit());
70 gpuDynInst->latency.set(
71 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
72 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
73 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
74
75 addr.read();
76 data.read();
77
78 calcAddr(gpuDynInst, addr);
79
80 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
81 if (gpuDynInst->exec_mask[lane]) {
82 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
83 = data[lane];
84 }
85 }
86
87 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
88 } // execute
89
90 void
92 {
93 Addr offset0 = instData.OFFSET0;
94 Addr offset1 = instData.OFFSET1;
95 Addr offset = (offset1 << 8) | offset0;
96
98 } // initiateAcc
99
100 void
102 {
103 } // completeAcc
104 // --- Inst_DS__DS_SUB_U32 class methods ---
105
107 : Inst_DS(iFmt, "ds_sub_u32")
108 {
109 } // Inst_DS__DS_SUB_U32
110
112 {
113 } // ~Inst_DS__DS_SUB_U32
114
115 // --- description from .arch file ---
116 // 32b:
117 // tmp = MEM[ADDR];
118 // MEM[ADDR] -= DATA;
119 // RETURN_DATA = tmp.
120 void
122 {
124 } // execute
125 // --- Inst_DS__DS_RSUB_U32 class methods ---
126
128 : Inst_DS(iFmt, "ds_rsub_u32")
129 {
130 } // Inst_DS__DS_RSUB_U32
131
133 {
134 } // ~Inst_DS__DS_RSUB_U32
135
136 // --- description from .arch file ---
137 // 32b:
138 // tmp = MEM[ADDR];
139 // MEM[ADDR] = DATA - MEM[ADDR];
140 // RETURN_DATA = tmp.
141 // Subtraction with reversed operands.
142 void
144 {
146 } // execute
147 // --- Inst_DS__DS_INC_U32 class methods ---
148
150 : Inst_DS(iFmt, "ds_inc_u32")
151 {
152 } // Inst_DS__DS_INC_U32
153
155 {
156 } // ~Inst_DS__DS_INC_U32
157
158 // --- description from .arch file ---
159 // 32b:
160 // tmp = MEM[ADDR];
161 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
162 // RETURN_DATA = tmp.
163 void
165 {
167 } // execute
168 // --- Inst_DS__DS_DEC_U32 class methods ---
169
171 : Inst_DS(iFmt, "ds_dec_u32")
172 {
173 } // Inst_DS__DS_DEC_U32
174
176 {
177 } // ~Inst_DS__DS_DEC_U32
178
179 // --- description from .arch file ---
180 // 32b:
181 // tmp = MEM[ADDR];
182 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
183 // (unsigned compare); RETURN_DATA = tmp.
184 void
186 {
188 } // execute
189 // --- Inst_DS__DS_MIN_I32 class methods ---
190
192 : Inst_DS(iFmt, "ds_min_i32")
193 {
194 } // Inst_DS__DS_MIN_I32
195
197 {
198 } // ~Inst_DS__DS_MIN_I32
199
200 // --- description from .arch file ---
201 // 32b:
202 // tmp = MEM[ADDR];
203 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
204 // RETURN_DATA = tmp.
205 void
207 {
209 } // execute
210 // --- Inst_DS__DS_MAX_I32 class methods ---
211
213 : Inst_DS(iFmt, "ds_max_i32")
214 {
215 } // Inst_DS__DS_MAX_I32
216
218 {
219 } // ~Inst_DS__DS_MAX_I32
220
221 // --- description from .arch file ---
222 // 32b:
223 // tmp = MEM[ADDR];
224 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
225 // RETURN_DATA = tmp.
226 void
228 {
230 } // execute
231 // --- Inst_DS__DS_MIN_U32 class methods ---
232
234 : Inst_DS(iFmt, "ds_min_u32")
235 {
236 } // Inst_DS__DS_MIN_U32
237
239 {
240 } // ~Inst_DS__DS_MIN_U32
241
242 // --- description from .arch file ---
243 // 32b:
244 // tmp = MEM[ADDR];
245 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
246 // RETURN_DATA = tmp.
247 void
249 {
251 } // execute
252 // --- Inst_DS__DS_MAX_U32 class methods ---
253
255 : Inst_DS(iFmt, "ds_max_u32")
256 {
257 } // Inst_DS__DS_MAX_U32
258
260 {
261 } // ~Inst_DS__DS_MAX_U32
262
263 // --- description from .arch file ---
264 // 32b:
265 // tmp = MEM[ADDR];
266 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
267 // RETURN_DATA = tmp.
268 void
270 {
272 } // execute
273 // --- Inst_DS__DS_AND_B32 class methods ---
274
276 : Inst_DS(iFmt, "ds_and_b32")
277 {
278 } // Inst_DS__DS_AND_B32
279
281 {
282 } // ~Inst_DS__DS_AND_B32
283
284 // --- description from .arch file ---
285 // 32b:
286 // tmp = MEM[ADDR];
287 // MEM[ADDR] &= DATA;
288 // RETURN_DATA = tmp.
289 void
291 {
293 } // execute
294 // --- Inst_DS__DS_OR_B32 class methods ---
295
297 : Inst_DS(iFmt, "ds_or_b32")
298 {
299 setFlag(MemoryRef);
300 setFlag(GroupSegment);
301 setFlag(AtomicOr);
302 setFlag(AtomicNoReturn);
303 } // Inst_DS__DS_OR_B32
304
306 {
307 } // ~Inst_DS__DS_OR_B32
308
309 // --- description from .arch file ---
310 // 32b:
311 // MEM[ADDR] |= DATA;
312 void
314 {
315 Wavefront *wf = gpuDynInst->wavefront();
316
317 if (gpuDynInst->exec_mask.none()) {
318 wf->decLGKMInstsIssued();
319 wf->untrackLGKMInst(gpuDynInst);
320 return;
321 }
322
323 gpuDynInst->execUnitId = wf->execUnitId;
324 gpuDynInst->latency.init(gpuDynInst->computeUnit());
325 gpuDynInst->latency.set(
326 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
327 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
328 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
329
330 addr.read();
331 data.read();
332
333 calcAddr(gpuDynInst, addr);
334
335 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
336 if (gpuDynInst->exec_mask[lane]) {
337 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
338 = data[lane];
339 }
340 }
341
342 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
343 } // execute
344
345 void
347 {
348 Addr offset0 = instData.OFFSET0;
349 Addr offset1 = instData.OFFSET1;
350 Addr offset = (offset1 << 8) | offset0;
351
353 } // initiateAcc
354
355 void
357 {
358 } // completeAcc
359
360 // --- Inst_DS__DS_XOR_B32 class methods ---
361
363 : Inst_DS(iFmt, "ds_xor_b32")
364 {
365 } // Inst_DS__DS_XOR_B32
366
368 {
369 } // ~Inst_DS__DS_XOR_B32
370
371 // --- description from .arch file ---
372 // 32b:
373 // tmp = MEM[ADDR];
374 // MEM[ADDR] ^= DATA;
375 // RETURN_DATA = tmp.
376 void
378 {
380 } // execute
381 // --- Inst_DS__DS_MSKOR_B32 class methods ---
382
384 : Inst_DS(iFmt, "ds_mskor_b32")
385 {
386 } // Inst_DS__DS_MSKOR_B32
387
389 {
390 } // ~Inst_DS__DS_MSKOR_B32
391
392 // --- description from .arch file ---
393 // 32b:
394 // tmp = MEM[ADDR];
395 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
396 // RETURN_DATA = tmp.
397 // Masked dword OR, D0 contains the mask and D1 contains the new value.
398 void
400 {
402 } // execute
403 // --- Inst_DS__DS_WRITE_B32 class methods ---
404
406 : Inst_DS(iFmt, "ds_write_b32")
407 {
408 setFlag(MemoryRef);
409 setFlag(Store);
410 } // Inst_DS__DS_WRITE_B32
411
413 {
414 } // ~Inst_DS__DS_WRITE_B32
415
416 // --- description from .arch file ---
417 // 32b:
418 // MEM[ADDR] = DATA.
419 // Write dword.
420 void
422 {
423 Wavefront *wf = gpuDynInst->wavefront();
424
425 if (gpuDynInst->exec_mask.none()) {
426 wf->decLGKMInstsIssued();
427 wf->untrackLGKMInst(gpuDynInst);
428 return;
429 }
430
431 gpuDynInst->execUnitId = wf->execUnitId;
432 gpuDynInst->latency.init(gpuDynInst->computeUnit());
433 gpuDynInst->latency.set(
434 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
435 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
436 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
437
438 addr.read();
439 data.read();
440
441 calcAddr(gpuDynInst, addr);
442
443 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
444 if (gpuDynInst->exec_mask[lane]) {
445 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
446 = data[lane];
447 }
448 }
449
450 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
451 } // execute
452
453 void
455 {
456 Addr offset0 = instData.OFFSET0;
457 Addr offset1 = instData.OFFSET1;
458 Addr offset = (offset1 << 8) | offset0;
459
460 initMemWrite<VecElemU32>(gpuDynInst, offset);
461 } // initiateAcc
462
463 void
465 {
466 } // completeAcc
467 // --- Inst_DS__DS_WRITE2_B32 class methods ---
468
470 : Inst_DS(iFmt, "ds_write2_b32")
471 {
472 setFlag(MemoryRef);
473 setFlag(Store);
474 } // Inst_DS__DS_WRITE2_B32
475
477 {
478 } // ~Inst_DS__DS_WRITE2_B32
479
480 // --- description from .arch file ---
481 // 32b:
482 // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
483 // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
484 // Write 2 dwords.
485 void
487 {
488 Wavefront *wf = gpuDynInst->wavefront();
489
490 if (gpuDynInst->exec_mask.none()) {
491 wf->decLGKMInstsIssued();
492 wf->untrackLGKMInst(gpuDynInst);
493 return;
494 }
495
496 gpuDynInst->execUnitId = wf->execUnitId;
497 gpuDynInst->latency.init(gpuDynInst->computeUnit());
498 gpuDynInst->latency.set(
499 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
500 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
501 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
502 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
503
504 addr.read();
505 data0.read();
506 data1.read();
507
508 calcAddr(gpuDynInst, addr);
509
510 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
511 if (gpuDynInst->exec_mask[lane]) {
512 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
513 = data0[lane];
514 (reinterpret_cast<VecElemU32*>(
515 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
516 }
517 }
518
519 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
520 } // execute
521
522 void
524 {
525 Addr offset0 = instData.OFFSET0 * 4;
526 Addr offset1 = instData.OFFSET1 * 4;
527
528 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
529 }
530
531 void
535 // --- Inst_DS__DS_WRITE2ST64_B32 class methods ---
536
538 : Inst_DS(iFmt, "ds_write2st64_b32")
539 {
540 setFlag(MemoryRef);
541 setFlag(Store);
542 } // Inst_DS__DS_WRITE2ST64_B32
543
545 {
546 } // ~Inst_DS__DS_WRITE2ST64_B32
547
548 // --- description from .arch file ---
549 // 32b:
550 // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
551 // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
552 // Write 2 dwords.
553 void
555 {
556 Wavefront *wf = gpuDynInst->wavefront();
557
558 if (gpuDynInst->exec_mask.none()) {
559 wf->decLGKMInstsIssued();
560 wf->untrackLGKMInst(gpuDynInst);
561 return;
562 }
563
564 gpuDynInst->execUnitId = wf->execUnitId;
565 gpuDynInst->latency.init(gpuDynInst->computeUnit());
566 gpuDynInst->latency.set(
567 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
568 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
569 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
570 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
571
572 addr.read();
573 data0.read();
574 data1.read();
575
576 calcAddr(gpuDynInst, addr);
577
578 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
579 if (gpuDynInst->exec_mask[lane]) {
580 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
581 = data0[lane];
582 (reinterpret_cast<VecElemU32*>(
583 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
584 }
585 }
586
587 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
588 } // execute
589
590 void
592 {
593 Addr offset0 = instData.OFFSET0 * 4 * 64;
594 Addr offset1 = instData.OFFSET1 * 4 * 64;
595
596 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
597 }
598
599 void
603 // --- Inst_DS__DS_CMPST_B32 class methods ---
604
606 : Inst_DS(iFmt, "ds_cmpst_b32")
607 {
608 } // Inst_DS__DS_CMPST_B32
609
611 {
612 } // ~Inst_DS__DS_CMPST_B32
613
614 // --- description from .arch file ---
615 // 32b:
616 // tmp = MEM[ADDR];
617 // src = DATA2;
618 // cmp = DATA;
619 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
620 // RETURN_DATA[0] = tmp.
621 // Compare and store.
622 // Caution, the order of src and cmp are the *opposite* of the
623 // --- BUFFER_ATOMIC_CMPSWAP opcode.
624 void
626 {
628 } // execute
629 // --- Inst_DS__DS_CMPST_F32 class methods ---
630
632 : Inst_DS(iFmt, "ds_cmpst_f32")
633 {
634 setFlag(F32);
635 } // Inst_DS__DS_CMPST_F32
636
638 {
639 } // ~Inst_DS__DS_CMPST_F32
640
641 // --- description from .arch file ---
642 // 32b:
643 // tmp = MEM[ADDR];
644 // src = DATA2;
645 // cmp = DATA;
646 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
647 // RETURN_DATA[0] = tmp.
648 // Floating point compare and store that handles NaN/INF/denormal values.
649 // Caution, the order of src and cmp are the *opposite* of the
650 // --- BUFFER_ATOMIC_FCMPSWAP opcode.
651 void
653 {
655 } // execute
656 // --- Inst_DS__DS_MIN_F32 class methods ---
657
659 : Inst_DS(iFmt, "ds_min_f32")
660 {
661 setFlag(F32);
662 } // Inst_DS__DS_MIN_F32
663
665 {
666 } // ~Inst_DS__DS_MIN_F32
667
668 // --- description from .arch file ---
669 // 32b.
670 // tmp = MEM[ADDR];
671 // src = DATA;
672 // cmp = DATA2;
673 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
674 // Floating point minimum that handles NaN/INF/denormal values.
675 // Note that this opcode is slightly more general-purpose than
676 // --- BUFFER_ATOMIC_FMIN.
677 void
679 {
681 } // execute
682 // --- Inst_DS__DS_MAX_F32 class methods ---
683
685 : Inst_DS(iFmt, "ds_max_f32")
686 {
687 setFlag(F32);
688 } // Inst_DS__DS_MAX_F32
689
691 {
692 } // ~Inst_DS__DS_MAX_F32
693
694 // --- description from .arch file ---
695 // 32b.
696 // tmp = MEM[ADDR];
697 // src = DATA;
698 // cmp = DATA2;
699 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
700 // Floating point maximum that handles NaN/INF/denormal values.
701 // Note that this opcode is slightly more general-purpose than
702 // --- BUFFER_ATOMIC_FMAX.
703 void
705 {
707 } // execute
708 // --- Inst_DS__DS_NOP class methods ---
709
711 : Inst_DS(iFmt, "ds_nop")
712 {
713 setFlag(Nop);
714 } // Inst_DS__DS_NOP
715
717 {
718 } // ~Inst_DS__DS_NOP
719
720 // --- description from .arch file ---
721 // Do nothing.
722 void
724 {
725 gpuDynInst->wavefront()->decLGKMInstsIssued();
726 gpuDynInst->wavefront()->untrackLGKMInst(gpuDynInst);
727 } // execute
728 // --- Inst_DS__DS_ADD_F32 class methods ---
729
731 : Inst_DS(iFmt, "ds_add_f32")
732 {
733 setFlag(F32);
734 setFlag(MemoryRef);
735 setFlag(GroupSegment);
736 setFlag(AtomicAdd);
737 setFlag(AtomicNoReturn);
738 } // Inst_DS__DS_ADD_F32
739
741 {
742 } // ~Inst_DS__DS_ADD_F32
743
744 // --- description from .arch file ---
745 // 32b:
746 // MEM[ADDR] += DATA;
747 // Floating point add that handles NaN/INF/denormal values.
748 void
750 {
751 Wavefront *wf = gpuDynInst->wavefront();
752
753 if (gpuDynInst->exec_mask.none()) {
754 wf->decLGKMInstsIssued();
755 wf->untrackLGKMInst(gpuDynInst);
756 return;
757 }
758
759 gpuDynInst->execUnitId = wf->execUnitId;
760 gpuDynInst->latency.init(gpuDynInst->computeUnit());
761 gpuDynInst->latency.set(
762 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
763 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
764 ConstVecOperandF32 data(gpuDynInst, extData.DATA0);
765
766 addr.read();
767 data.read();
768
769 calcAddr(gpuDynInst, addr);
770
771 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
772 if (gpuDynInst->exec_mask[lane]) {
773 (reinterpret_cast<VecElemF32*>(gpuDynInst->a_data))[lane]
774 = data[lane];
775 }
776 }
777
778 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
779 } // execute
780
781 void
783 {
784 Addr offset0 = instData.OFFSET0;
785 Addr offset1 = instData.OFFSET1;
786 Addr offset = (offset1 << 8) | offset0;
787
789 } // initiateAcc
790
791 void
793 {
794 } // completeAcc
795 // --- Inst_DS__DS_WRITE_B8 class methods ---
796
798 : Inst_DS(iFmt, "ds_write_b8")
799 {
800 setFlag(MemoryRef);
801 setFlag(Store);
802 } // Inst_DS__DS_WRITE_B8
803
805 {
806 } // ~Inst_DS__DS_WRITE_B8
807
808 // --- description from .arch file ---
809 // MEM[ADDR] = DATA[7:0].
810 // Byte write.
811 void
813 {
814 Wavefront *wf = gpuDynInst->wavefront();
815
816 if (gpuDynInst->exec_mask.none()) {
817 wf->decLGKMInstsIssued();
818 wf->untrackLGKMInst(gpuDynInst);
819 return;
820 }
821
822 gpuDynInst->execUnitId = wf->execUnitId;
823 gpuDynInst->latency.init(gpuDynInst->computeUnit());
824 gpuDynInst->latency.set(
825 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
826 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
827 ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
828
829 addr.read();
830 data.read();
831
832 calcAddr(gpuDynInst, addr);
833
834 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
835 if (gpuDynInst->exec_mask[lane]) {
836 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
837 = data[lane];
838 }
839 }
840
841 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
842 } // execute
843
844 void
846 {
847 Addr offset0 = instData.OFFSET0;
848 Addr offset1 = instData.OFFSET1;
849 Addr offset = (offset1 << 8) | offset0;
850
851 initMemWrite<VecElemU8>(gpuDynInst, offset);
852 } // initiateAcc
853
854 void
856 {
857 } // completeAcc
858 // --- Inst_DS__DS_WRITE_B8_D16_HI class methods ---
859
861 : Inst_DS(iFmt, "ds_write_b8_d16_hi")
862 {
863 setFlag(MemoryRef);
864 setFlag(Store);
865 } // Inst_DS__DS_WRITE_B8_D16_HI
866
868 {
869 } // ~Inst_DS__DS_WRITE_B8_D16_HI
870
871 // --- description from .arch file ---
872 // MEM[ADDR] = DATA[23:16].
873 // Byte write in to high word.
874 void
876 {
877 Wavefront *wf = gpuDynInst->wavefront();
878
879 if (gpuDynInst->exec_mask.none()) {
880 wf->decLGKMInstsIssued();
881 wf->untrackLGKMInst(gpuDynInst);
882 return;
883 }
884
885 gpuDynInst->execUnitId = wf->execUnitId;
886 gpuDynInst->latency.init(gpuDynInst->computeUnit());
887 gpuDynInst->latency.set(
888 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
889 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
890 ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
891
892 addr.read();
893 data.read();
894
895 calcAddr(gpuDynInst, addr);
896
897 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
898 if (gpuDynInst->exec_mask[lane]) {
899 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
900 = bits(data[lane], 23, 16);
901 }
902 }
903
904 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
905 } // execute
906
907 void
909 {
910 Addr offset0 = instData.OFFSET0;
911 Addr offset1 = instData.OFFSET1;
912 Addr offset = (offset1 << 8) | offset0;
913
914 initMemWrite<VecElemU8>(gpuDynInst, offset);
915 } // initiateAcc
916
917 void
919 {
920 } // completeAcc
921 // --- Inst_DS__DS_WRITE_B16 class methods ---
922
924 : Inst_DS(iFmt, "ds_write_b16")
925 {
926 setFlag(MemoryRef);
927 setFlag(Store);
928 } // Inst_DS__DS_WRITE_B16
929
931 {
932 } // ~Inst_DS__DS_WRITE_B16
933
934 // --- description from .arch file ---
935 // MEM[ADDR] = DATA[15:0]
936 // Short write.
937 void
939 {
940 Wavefront *wf = gpuDynInst->wavefront();
941
942 if (gpuDynInst->exec_mask.none()) {
943 wf->decLGKMInstsIssued();
944 wf->untrackLGKMInst(gpuDynInst);
945 return;
946 }
947
948 gpuDynInst->execUnitId = wf->execUnitId;
949 gpuDynInst->latency.init(gpuDynInst->computeUnit());
950 gpuDynInst->latency.set(
951 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
952 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
953 ConstVecOperandU16 data(gpuDynInst, extData.DATA0);
954
955 addr.read();
956 data.read();
957
958 calcAddr(gpuDynInst, addr);
959
960 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
961 if (gpuDynInst->exec_mask[lane]) {
962 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
963 = data[lane];
964 }
965 }
966
967 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
968 } // execute
969
970 void
972 {
973 Addr offset0 = instData.OFFSET0;
974 Addr offset1 = instData.OFFSET1;
975 Addr offset = (offset1 << 8) | offset0;
976
977 initMemWrite<VecElemU16>(gpuDynInst, offset);
978 } // initiateAcc
979
980 void
982 {
983 } // completeAcc
984 // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
985
987 : Inst_DS(iFmt, "ds_add_rtn_u32")
988 {
989 setFlag(MemoryRef);
990 setFlag(AtomicAdd);
991 setFlag(AtomicReturn);
992 } // Inst_DS__DS_ADD_RTN_U32
993
995 {
996 } // ~Inst_DS__DS_ADD_RTN_U32
997
998 // --- description from .arch file ---
999 // 32b:
1000 // tmp = MEM[ADDR];
1001 // MEM[ADDR] += DATA;
1002 // RETURN_DATA = tmp.
1003 void
1005 {
1006 Wavefront *wf = gpuDynInst->wavefront();
1007
1008 if (gpuDynInst->exec_mask.none()) {
1009 wf->decLGKMInstsIssued();
1010 return;
1011 }
1012
1013 gpuDynInst->execUnitId = wf->execUnitId;
1014 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1015 gpuDynInst->latency.set(
1016 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1017 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1018 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
1019
1020 addr.read();
1021 data.read();
1022
1023 calcAddr(gpuDynInst, addr);
1024
1025 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1026 if (gpuDynInst->exec_mask[lane]) {
1027 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
1028 = data[lane];
1029 }
1030 }
1031
1032 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1033 } // execute
1034
1035 void
1037 {
1038 Addr offset0 = instData.OFFSET0;
1039 Addr offset1 = instData.OFFSET1;
1040 Addr offset = (offset1 << 8) | offset0;
1041
1043 } // initiateAcc
1044
1045 void
1047 {
1048 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1049
1050 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1051 if (gpuDynInst->exec_mask[lane]) {
1052 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1053 gpuDynInst->d_data))[lane];
1054 }
1055 }
1056
1057 vdst.write();
1058 } // completeAcc
1059 // --- Inst_DS__DS_SUB_RTN_U32 class methods ---
1060
1062 : Inst_DS(iFmt, "ds_sub_rtn_u32")
1063 {
1064 } // Inst_DS__DS_SUB_RTN_U32
1065
1067 {
1068 } // ~Inst_DS__DS_SUB_RTN_U32
1069
1070 // --- description from .arch file ---
1071 // 32b:
1072 // tmp = MEM[ADDR];
1073 // MEM[ADDR] -= DATA;
1074 // RETURN_DATA = tmp.
1075 void
1077 {
1079 } // execute
1080 // --- Inst_DS__DS_RSUB_RTN_U32 class methods ---
1081
1083 : Inst_DS(iFmt, "ds_rsub_rtn_u32")
1084 {
1085 } // Inst_DS__DS_RSUB_RTN_U32
1086
1088 {
1089 } // ~Inst_DS__DS_RSUB_RTN_U32
1090
1091 // --- description from .arch file ---
1092 // 32b:
1093 // tmp = MEM[ADDR];
1094 // MEM[ADDR] = DATA - MEM[ADDR];
1095 // RETURN_DATA = tmp.
1096 // Subtraction with reversed operands.
1097 void
1099 {
1101 } // execute
1102 // --- Inst_DS__DS_INC_RTN_U32 class methods ---
1103
1105 : Inst_DS(iFmt, "ds_inc_rtn_u32")
1106 {
1107 } // Inst_DS__DS_INC_RTN_U32
1108
1110 {
1111 } // ~Inst_DS__DS_INC_RTN_U32
1112
1113 // --- description from .arch file ---
1114 // 32b:
1115 // tmp = MEM[ADDR];
1116 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
1117 // RETURN_DATA = tmp.
1118 void
1120 {
1122 } // execute
1123 // --- Inst_DS__DS_DEC_RTN_U32 class methods ---
1124
1126 : Inst_DS(iFmt, "ds_dec_rtn_u32")
1127 {
1128 } // Inst_DS__DS_DEC_RTN_U32
1129
1131 {
1132 } // ~Inst_DS__DS_DEC_RTN_U32
1133
1134 // --- description from .arch file ---
1135 // 32b:
1136 // tmp = MEM[ADDR];
1137 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
1138 // (unsigned compare); RETURN_DATA = tmp.
1139 void
1141 {
1143 } // execute
1144 // --- Inst_DS__DS_MIN_RTN_I32 class methods ---
1145
1147 : Inst_DS(iFmt, "ds_min_rtn_i32")
1148 {
1149 } // Inst_DS__DS_MIN_RTN_I32
1150
1152 {
1153 } // ~Inst_DS__DS_MIN_RTN_I32
1154
1155 // --- description from .arch file ---
1156 // 32b:
1157 // tmp = MEM[ADDR];
1158 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
1159 // RETURN_DATA = tmp.
1160 void
1162 {
1164 } // execute
1165 // --- Inst_DS__DS_MAX_RTN_I32 class methods ---
1166
1168 : Inst_DS(iFmt, "ds_max_rtn_i32")
1169 {
1170 } // Inst_DS__DS_MAX_RTN_I32
1171
1173 {
1174 } // ~Inst_DS__DS_MAX_RTN_I32
1175
1176 // --- description from .arch file ---
1177 // 32b:
1178 // tmp = MEM[ADDR];
1179 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
1180 // RETURN_DATA = tmp.
1181 void
1183 {
1185 } // execute
1186 // --- Inst_DS__DS_MIN_RTN_U32 class methods ---
1187
1189 : Inst_DS(iFmt, "ds_min_rtn_u32")
1190 {
1191 } // Inst_DS__DS_MIN_RTN_U32
1192
1194 {
1195 } // ~Inst_DS__DS_MIN_RTN_U32
1196
1197 // --- description from .arch file ---
1198 // 32b:
1199 // tmp = MEM[ADDR];
1200 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
1201 // RETURN_DATA = tmp.
1202 void
1204 {
1206 } // execute
1207 // --- Inst_DS__DS_MAX_RTN_U32 class methods ---
1208
1210 : Inst_DS(iFmt, "ds_max_rtn_u32")
1211 {
1212 } // Inst_DS__DS_MAX_RTN_U32
1213
1215 {
1216 } // ~Inst_DS__DS_MAX_RTN_U32
1217
1218 // --- description from .arch file ---
1219 // 32b:
1220 // tmp = MEM[ADDR];
1221 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
1222 // RETURN_DATA = tmp.
1223 void
1225 {
1227 } // execute
1228 // --- Inst_DS__DS_AND_RTN_B32 class methods ---
1229
1231 : Inst_DS(iFmt, "ds_and_rtn_b32")
1232 {
1233 } // Inst_DS__DS_AND_RTN_B32
1234
1236 {
1237 } // ~Inst_DS__DS_AND_RTN_B32
1238
1239 // --- description from .arch file ---
1240 // 32b:
1241 // tmp = MEM[ADDR];
1242 // MEM[ADDR] &= DATA;
1243 // RETURN_DATA = tmp.
1244 void
1246 {
1248 } // execute
1249 // --- Inst_DS__DS_OR_RTN_B32 class methods ---
1250
1252 : Inst_DS(iFmt, "ds_or_rtn_b32")
1253 {
1254 } // Inst_DS__DS_OR_RTN_B32
1255
1257 {
1258 } // ~Inst_DS__DS_OR_RTN_B32
1259
1260 // --- description from .arch file ---
1261 // 32b:
1262 // tmp = MEM[ADDR];
1263 // MEM[ADDR] |= DATA;
1264 // RETURN_DATA = tmp.
1265 void
1267 {
1269 } // execute
1270 // --- Inst_DS__DS_XOR_RTN_B32 class methods ---
1271
1273 : Inst_DS(iFmt, "ds_xor_rtn_b32")
1274 {
1275 } // Inst_DS__DS_XOR_RTN_B32
1276
1278 {
1279 } // ~Inst_DS__DS_XOR_RTN_B32
1280
1281 // --- description from .arch file ---
1282 // 32b:
1283 // tmp = MEM[ADDR];
1284 // MEM[ADDR] ^= DATA;
1285 // RETURN_DATA = tmp.
1286 void
1288 {
1290 } // execute
1291 // --- Inst_DS__DS_MSKOR_RTN_B32 class methods ---
1292
1294 : Inst_DS(iFmt, "ds_mskor_rtn_b32")
1295 {
1296 } // Inst_DS__DS_MSKOR_RTN_B32
1297
1299 {
1300 } // ~Inst_DS__DS_MSKOR_RTN_B32
1301
1302 // --- description from .arch file ---
1303 // 32b:
1304 // tmp = MEM[ADDR];
1305 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
1306 // RETURN_DATA = tmp.
1307 // Masked dword OR, D0 contains the mask and D1 contains the new value.
1308 void
1310 {
1312 } // execute
1313 // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods ---
1314
1316 : Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
1317 {
1318 } // Inst_DS__DS_WRXCHG_RTN_B32
1319
1321 {
1322 } // ~Inst_DS__DS_WRXCHG_RTN_B32
1323
1324 // --- description from .arch file ---
1325 // tmp = MEM[ADDR];
1326 // MEM[ADDR] = DATA;
1327 // RETURN_DATA = tmp.
1328 // Write-exchange operation.
1329 void
1331 {
1333 } // execute
1334 // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods ---
1335
1337 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
1338 {
1339 } // Inst_DS__DS_WRXCHG2_RTN_B32
1340
1342 {
1343 } // ~Inst_DS__DS_WRXCHG2_RTN_B32
1344
1345 // --- description from .arch file ---
1346 // Write-exchange 2 separate dwords.
1347 void
1349 {
1351 } // execute
1352 // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods ---
1353
1355 InFmt_DS *iFmt)
1356 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
1357 {
1358 } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
1359
1361 {
1362 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
1363
1364 // --- description from .arch file ---
1365 // Write-exchange 2 separate dwords with a stride of 64 dwords.
1366 void
1371 // --- Inst_DS__DS_CMPST_RTN_B32 class methods ---
1372
1374 : Inst_DS(iFmt, "ds_cmpst_rtn_b32")
1375 {
1376 setFlag(MemoryRef);
1377 setFlag(AtomicCAS);
1378 setFlag(AtomicReturn);
1379 } // Inst_DS__DS_CMPST_RTN_B32
1380
1382 {
1383 } // ~Inst_DS__DS_CMPST_RTN_B32
1384
1385 // --- description from .arch file ---
1386 // 32b:
1387 // tmp = MEM[ADDR];
1388 // src = DATA2;
1389 // cmp = DATA;
1390 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1391 // RETURN_DATA[0] = tmp.
1392 // Compare and store.
1393 // Caution, the order of src and cmp are the *opposite* of the
1394 // --- BUFFER_ATOMIC_CMPSWAP opcode.
1395 void
1397 {
1398 //panicUnimplemented();
1399 Wavefront *wf = gpuDynInst->wavefront();
1400
1401 if (gpuDynInst->exec_mask.none()) {
1402 wf->decLGKMInstsIssued();
1403 return;
1404 }
1405
1406 gpuDynInst->execUnitId = wf->execUnitId;
1407 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1408 gpuDynInst->latency.set(
1409 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1410
1411 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1412 ConstVecOperandU32 src(gpuDynInst, extData.DATA1);
1413 ConstVecOperandU32 cmp(gpuDynInst, extData.DATA0);
1414
1415 addr.read();
1416 src.read();
1417 cmp.read();
1418 calcAddr(gpuDynInst, addr);
1419
1420 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1421 if (gpuDynInst->exec_mask[lane]) {
1422 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
1423 = src[lane];
1424 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
1425 = cmp[lane];
1426 }
1427 }
1428
1429 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1430
1431 } // execute
1432 // --- Inst_DS__DS_CMPST_RTN_F32 class methods ---
1433
1434 void
1436 {
1437 Addr offset0 = instData.OFFSET0;
1438 Addr offset1 = instData.OFFSET1;
1439 Addr offset = (offset1 << 8) | offset0;
1440
1442 } // initiateAcc
1443
1444 void
1446 {
1447 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1448
1449 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1450 if (gpuDynInst->exec_mask[lane]) {
1451 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1452 gpuDynInst->d_data))[lane];
1453 }
1454 }
1455
1456 vdst.write();
1457 } // completeAcc
1458 // --- Inst_DS__DS_CMPST_RTN_F32 class methods ---
1459
1461 : Inst_DS(iFmt, "ds_cmpst_rtn_f32")
1462 {
1463 setFlag(F32);
1464 } // Inst_DS__DS_CMPST_RTN_F32
1465
1467 {
1468 } // ~Inst_DS__DS_CMPST_RTN_F32
1469
1470 // --- description from .arch file ---
1471 // 32b:
1472 // tmp = MEM[ADDR];
1473 // src = DATA2;
1474 // cmp = DATA;
1475 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1476 // RETURN_DATA[0] = tmp.
1477 // Floating point compare and store that handles NaN/INF/denormal values.
1478 // Caution, the order of src and cmp are the *opposite* of the
1479 // --- BUFFER_ATOMIC_FCMPSWAP opcode.
1480 void
1482 {
1484 } // execute
1485 // --- Inst_DS__DS_MIN_RTN_F32 class methods ---
1486
1488 : Inst_DS(iFmt, "ds_min_rtn_f32")
1489 {
1490 setFlag(F32);
1491 } // Inst_DS__DS_MIN_RTN_F32
1492
1494 {
1495 } // ~Inst_DS__DS_MIN_RTN_F32
1496
1497 // --- description from .arch file ---
1498 // 32b.
1499 // tmp = MEM[ADDR];
1500 // src = DATA;
1501 // cmp = DATA2;
1502 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
1503 // Floating point minimum that handles NaN/INF/denormal values.
1504 // Note that this opcode is slightly more general-purpose than
1505 // --- BUFFER_ATOMIC_FMIN.
1506 void
1508 {
1510 } // execute
1511 // --- Inst_DS__DS_MAX_RTN_F32 class methods ---
1512
1514 : Inst_DS(iFmt, "ds_max_rtn_f32")
1515 {
1516 setFlag(F32);
1517 } // Inst_DS__DS_MAX_RTN_F32
1518
1520 {
1521 } // ~Inst_DS__DS_MAX_RTN_F32
1522
1523 // --- description from .arch file ---
1524 // 32b.
1525 // tmp = MEM[ADDR];
1526 // src = DATA;
1527 // cmp = DATA2;
1528 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
1529 // Floating point maximum that handles NaN/INF/denormal values.
1530 // Note that this opcode is slightly more general-purpose than
1531 // --- BUFFER_ATOMIC_FMAX.
1532 void
1534 {
1536 } // execute
1537 // --- Inst_DS__DS_WRAP_RTN_B32 class methods ---
1538
1540 : Inst_DS(iFmt, "ds_wrap_rtn_b32")
1541 {
1542 } // Inst_DS__DS_WRAP_RTN_B32
1543
1545 {
1546 } // ~Inst_DS__DS_WRAP_RTN_B32
1547
1548 // --- description from .arch file ---
1549 // tmp = MEM[ADDR];
1550 // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
1551 // RETURN_DATA = tmp.
1552 void
1554 {
1556 } // execute
1557 // --- Inst_DS__DS_ADD_RTN_F32 class methods ---
1558
1560 : Inst_DS(iFmt, "ds_add_rtn_f32")
1561 {
1562 setFlag(F32);
1563 } // Inst_DS__DS_ADD_RTN_F32
1564
1566 {
1567 } // ~Inst_DS__DS_ADD_RTN_F32
1568
1569 // --- description from .arch file ---
1570 // 32b:
1571 // tmp = MEM[ADDR];
1572 // MEM[ADDR] += DATA;
1573 // RETURN_DATA = tmp.
1574 // Floating point add that handles NaN/INF/denormal values.
1575 void
1577 {
1579 } // execute
1580 // --- Inst_DS__DS_READ_B32 class methods ---
1581
1583 : Inst_DS(iFmt, "ds_read_b32")
1584 {
1585 setFlag(MemoryRef);
1586 setFlag(Load);
1587 } // Inst_DS__DS_READ_B32
1588
1590 {
1591 } // ~Inst_DS__DS_READ_B32
1592
1593 // --- description from .arch file ---
1594 // RETURN_DATA = MEM[ADDR].
1595 // Dword read.
1596 void
1598 {
1599 Wavefront *wf = gpuDynInst->wavefront();
1600
1601 if (gpuDynInst->exec_mask.none()) {
1602 wf->decLGKMInstsIssued();
1603 wf->untrackLGKMInst(gpuDynInst);
1604 return;
1605 }
1606
1607 gpuDynInst->execUnitId = wf->execUnitId;
1608 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1609 gpuDynInst->latency.set(
1610 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1611 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1612
1613 addr.read();
1614
1615 calcAddr(gpuDynInst, addr);
1616
1617 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1618 } // execute
1619
1620 void
1622 {
1623 Addr offset0 = instData.OFFSET0;
1624 Addr offset1 = instData.OFFSET1;
1625 Addr offset = (offset1 << 8) | offset0;
1626
1627 initMemRead<VecElemU32>(gpuDynInst, offset);
1628 } // initiateAcc
1629
1630 void
1632 {
1633 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1634
1635 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1636 if (gpuDynInst->exec_mask[lane]) {
1637 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1638 gpuDynInst->d_data))[lane];
1639 }
1640 }
1641
1642 vdst.write();
1643 } // completeAcc
1644 // --- Inst_DS__DS_READ2_B32 class methods ---
1645
1647 : Inst_DS(iFmt, "ds_read2_b32")
1648 {
1649 setFlag(MemoryRef);
1650 setFlag(Load);
1651 } // Inst_DS__DS_READ2_B32
1652
1654 {
1655 } // ~Inst_DS__DS_READ2_B32
1656
1657 // --- description from .arch file ---
1658 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
1659 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
1660 // Read 2 dwords.
1661 void
1663 {
1664 Wavefront *wf = gpuDynInst->wavefront();
1665
1666 if (gpuDynInst->exec_mask.none()) {
1667 wf->decLGKMInstsIssued();
1668 wf->untrackLGKMInst(gpuDynInst);
1669 return;
1670 }
1671
1672 gpuDynInst->execUnitId = wf->execUnitId;
1673 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1674 gpuDynInst->latency.set(
1675 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1676 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1677
1678 addr.read();
1679
1680 calcAddr(gpuDynInst, addr);
1681
1682 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1683 } // execute
1684
1685 void
1687 {
1688 Addr offset0 = instData.OFFSET0 * 4;
1689 Addr offset1 = instData.OFFSET1 * 4;
1690
1691 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
1692 } // initiateAcc
1693
1694 void
1696 {
1697 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
1698 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
1699
1700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1701 if (gpuDynInst->exec_mask[lane]) {
1702 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1703 gpuDynInst->d_data))[lane * 2];
1704 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1705 gpuDynInst->d_data))[lane * 2 + 1];
1706 }
1707 }
1708
1709 vdst0.write();
1710 vdst1.write();
1711 } // completeAcc
1712 // --- Inst_DS__DS_READ2ST64_B32 class methods ---
1713
1715 : Inst_DS(iFmt, "ds_read2st64_b32")
1716 {
1717 setFlag(MemoryRef);
1718 setFlag(Load);
1719 } // Inst_DS__DS_READ2ST64_B32
1720
1722 {
1723 } // ~Inst_DS__DS_READ2ST64_B32
1724
1725 // --- description from .arch file ---
1726 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
1727 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
1728 // Read 2 dwords.
1729 void
1731 {
1732 Wavefront *wf = gpuDynInst->wavefront();
1733
1734 if (gpuDynInst->exec_mask.none()) {
1735 wf->decLGKMInstsIssued();
1736 wf->untrackLGKMInst(gpuDynInst);
1737 return;
1738 }
1739
1740 gpuDynInst->execUnitId = wf->execUnitId;
1741 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1742 gpuDynInst->latency.set(
1743 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1744 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1745
1746 addr.read();
1747
1748 calcAddr(gpuDynInst, addr);
1749
1750 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1751 } // execute
1752
1753 void
1755 {
1756 Addr offset0 = (instData.OFFSET0 * 4 * 64);
1757 Addr offset1 = (instData.OFFSET1 * 4 * 64);
1758
1759 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
1760 }
1761
1762 void
1764 {
1765 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
1766 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
1767
1768 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1769 if (gpuDynInst->exec_mask[lane]) {
1770 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1771 gpuDynInst->d_data))[lane * 2];
1772 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1773 gpuDynInst->d_data))[lane * 2 + 1];
1774 }
1775 }
1776
1777 vdst0.write();
1778 vdst1.write();
1779 }
1780 // --- Inst_DS__DS_READ_I8 class methods ---
1781
1783 : Inst_DS(iFmt, "ds_read_i8")
1784 {
1785 setFlag(MemoryRef);
1786 setFlag(Load);
1787 } // Inst_DS__DS_READ_I8
1788
1790 {
1791 } // ~Inst_DS__DS_READ_I8
1792
1793 // --- description from .arch file ---
1794 // RETURN_DATA = signext(MEM[ADDR][7:0]).
1795 // Signed byte read.
1796 void
1798 {
1799 Wavefront *wf = gpuDynInst->wavefront();
1800
1801 if (gpuDynInst->exec_mask.none()) {
1802 wf->decLGKMInstsIssued();
1803 wf->untrackLGKMInst(gpuDynInst);
1804 return;
1805 }
1806
1807 gpuDynInst->execUnitId = wf->execUnitId;
1808 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1809 gpuDynInst->latency.set(
1810 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1811 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1812
1813 addr.read();
1814
1815 calcAddr(gpuDynInst, addr);
1816
1817 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1818 } // execute
1819
1820 void
1822 {
1823 Addr offset0 = instData.OFFSET0;
1824 Addr offset1 = instData.OFFSET1;
1825 Addr offset = (offset1 << 8) | offset0;
1826
1827 initMemRead<VecElemI8>(gpuDynInst, offset);
1828 } // initiateAcc
1829
1830 void
1832 {
1833 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1834
1835 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1836 if (gpuDynInst->exec_mask[lane]) {
1837 vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast<VecElemI8*>(
1838 gpuDynInst->d_data))[lane]);
1839 }
1840 }
1841
1842 vdst.write();
1843 } // completeAcc
1844 // --- Inst_DS__DS_READ_U8 class methods ---
1845
1847 : Inst_DS(iFmt, "ds_read_u8")
1848 {
1849 setFlag(MemoryRef);
1850 setFlag(Load);
1851 } // Inst_DS__DS_READ_U8
1852
1854 {
1855 } // ~Inst_DS__DS_READ_U8
1856
1857 // --- description from .arch file ---
1858 // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
1859 // Unsigned byte read.
1860 void
1862 {
1863 Wavefront *wf = gpuDynInst->wavefront();
1864
1865 if (gpuDynInst->exec_mask.none()) {
1866 wf->decLGKMInstsIssued();
1867 wf->untrackLGKMInst(gpuDynInst);
1868 return;
1869 }
1870
1871 gpuDynInst->execUnitId = wf->execUnitId;
1872 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1873 gpuDynInst->latency.set(
1874 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1875 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1876
1877 addr.read();
1878
1879 calcAddr(gpuDynInst, addr);
1880
1881 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1882 } // execute
1883
1884 void
1886 {
1887 Addr offset0 = instData.OFFSET0;
1888 Addr offset1 = instData.OFFSET1;
1889 Addr offset = (offset1 << 8) | offset0;
1890
1891 initMemRead<VecElemU8>(gpuDynInst, offset);
1892 } // initiateAcc
1893
1894 void
1896 {
1897 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1898
1899 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1900 if (gpuDynInst->exec_mask[lane]) {
1901 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
1902 gpuDynInst->d_data))[lane];
1903 }
1904 }
1905
1906 vdst.write();
1907 } // completeAcc
1908 // --- Inst_DS__DS_READ_I16 class methods ---
1909
1911 : Inst_DS(iFmt, "ds_read_i16")
1912 {
1913 setFlag(MemoryRef);
1914 setFlag(Load);
1915 } // Inst_DS__DS_READ_I16
1916
1918 {
1919 } // ~Inst_DS__DS_READ_I16
1920
1921 // --- description from .arch file ---
1922 // RETURN_DATA = signext(MEM[ADDR][15:0]).
1923 // Signed short read.
1924 void
1926 {
1928 } // execute
1929 // --- Inst_DS__DS_READ_U16 class methods ---
1930
1932 : Inst_DS(iFmt, "ds_read_u16")
1933 {
1934 setFlag(MemoryRef);
1935 setFlag(Load);
1936 } // Inst_DS__DS_READ_U16
1937
1939 {
1940 } // ~Inst_DS__DS_READ_U16
1941
1942 // --- description from .arch file ---
1943 // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
1944 // Unsigned short read.
1945 void
1947 {
1948 Wavefront *wf = gpuDynInst->wavefront();
1949
1950 if (gpuDynInst->exec_mask.none()) {
1951 wf->decLGKMInstsIssued();
1952 wf->untrackLGKMInst(gpuDynInst);
1953 return;
1954 }
1955
1956 gpuDynInst->execUnitId = wf->execUnitId;
1957 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1958 gpuDynInst->latency.set(
1959 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1960 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1961
1962 addr.read();
1963
1964 calcAddr(gpuDynInst, addr);
1965
1966 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1967 } // execute
1968 void
1970 {
1971 Addr offset0 = instData.OFFSET0;
1972 Addr offset1 = instData.OFFSET1;
1973 Addr offset = (offset1 << 8) | offset0;
1974
1975 initMemRead<VecElemU16>(gpuDynInst, offset);
1976 } // initiateAcc
1977
1978 void
1980 {
1981 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1982
1983 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1984 if (gpuDynInst->exec_mask[lane]) {
1985 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
1986 gpuDynInst->d_data))[lane];
1987 }
1988 }
1989
1990 vdst.write();
1991 } // completeAcc
1992 // --- Inst_DS__DS_READ_U16_D16 class methods ---
1993
1996 : Inst_DS(iFmt, "ds_read_u16_d16_hi")
1997 {
1998 setFlag(MemoryRef);
1999 setFlag(Load);
2000 } // Inst_DS__DS_READ_U16_D16
2001
2003 {
2004 } // ~Inst_DS__DS_READ_U16_D16
2005
2006 // --- description from .arch file ---
2007 // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16;
2008 // // RETURN_DATA[31:16] is preserved.
2009 void
2011 {
2012 Wavefront *wf = gpuDynInst->wavefront();
2013
2014 if (gpuDynInst->exec_mask.none()) {
2015 wf->decLGKMInstsIssued();
2016 wf->untrackLGKMInst(gpuDynInst);
2017 return;
2018 }
2019
2020 gpuDynInst->execUnitId = wf->execUnitId;
2021 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2022 gpuDynInst->latency.set(
2023 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2024 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2025
2026 addr.read();
2027
2028 calcAddr(gpuDynInst, addr);
2029
2030 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2031 } // execute
2032 void
2034 {
2035 Addr offset0 = instData.OFFSET0;
2036 Addr offset1 = instData.OFFSET1;
2037 Addr offset = (offset1 << 8) | offset0;
2038
2039 initMemRead<VecElemU16>(gpuDynInst, offset);
2040 } // initiateAcc
2041
2042 void
2044 {
2045 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2046
2047 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2048 if (gpuDynInst->exec_mask[lane]) {
2049 VecElemU16 ds_val = reinterpret_cast<VecElemU16*>(
2050 gpuDynInst->d_data)[lane];
2051 replaceBits(vdst[lane], 15, 0, ds_val);
2052 }
2053 }
2054
2055 vdst.write();
2056 } // completeAcc
2057 // --- Inst_DS__DS_READ_U16_D16_HI class methods ---
2058
2061 : Inst_DS(iFmt, "ds_read_u16_d16_hi")
2062 {
2063 setFlag(MemoryRef);
2064 setFlag(Load);
2065 } // Inst_DS__DS_READ_U16_D16_HI
2066
2068 {
2069 } // ~Inst_DS__DS_READ_U16_D16_HI
2070
2071 // --- description from .arch file ---
2072 // RETURN_DATA[31 : 16].u16 = MEM[ADDR].u16;
2073 // // RETURN_DATA[15:0] is preserved.
2074 void
2076 {
2077 Wavefront *wf = gpuDynInst->wavefront();
2078
2079 if (gpuDynInst->exec_mask.none()) {
2080 wf->decLGKMInstsIssued();
2081 wf->untrackLGKMInst(gpuDynInst);
2082 return;
2083 }
2084
2085 gpuDynInst->execUnitId = wf->execUnitId;
2086 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2087 gpuDynInst->latency.set(
2088 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2089 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2090
2091 addr.read();
2092
2093 calcAddr(gpuDynInst, addr);
2094
2095 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2096 } // execute
2097 void
2099 {
2100 Addr offset0 = instData.OFFSET0;
2101 Addr offset1 = instData.OFFSET1;
2102 Addr offset = (offset1 << 8) | offset0;
2103
2104 initMemRead<VecElemU16>(gpuDynInst, offset);
2105 } // initiateAcc
2106
2107 void
2109 {
2110 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2111
2112 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2113 if (gpuDynInst->exec_mask[lane]) {
2114 VecElemU16 ds_val = reinterpret_cast<VecElemU16*>(
2115 gpuDynInst->d_data)[lane];
2116 replaceBits(vdst[lane], 31, 16, ds_val);
2117 }
2118 }
2119
2120 vdst.write();
2121 } // completeAcc
2122 // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
2123
2125 : Inst_DS(iFmt, "ds_swizzle_b32")
2126 {
2132 setFlag(Load);
2133 setFlag(ALU);
2134 } // Inst_DS__DS_SWIZZLE_B32
2135
2137 {
2138 } // ~Inst_DS__DS_SWIZZLE_B32
2139
2140 // --- description from .arch file ---
2141 // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
2142 // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for
2143 // --- details.
2144 void
2146 {
2147 Wavefront *wf = gpuDynInst->wavefront();
2148 wf->decLGKMInstsIssued();
2149 wf->untrackLGKMInst(gpuDynInst);
2150
2151 if (gpuDynInst->exec_mask.none()) {
2152 return;
2153 }
2154
2155 gpuDynInst->execUnitId = wf->execUnitId;
2156 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2157 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2158 ->cyclesToTicks(Cycles(24)));
2159
2160 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2161 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2179 VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
2180
2181 data.read();
2182
2183 if (bits(ds_pattern, 15)) {
2184 // QDMode
2185 for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
2191 if (gpuDynInst->exec_mask[lane]) {
2192 int index0 = lane + bits(ds_pattern, 1, 0);
2193 panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
2194 "is out of bounds.\n", gpuDynInst->disassemble(),
2195 index0);
2196 vdst[lane]
2197 = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
2198 }
2199 if (gpuDynInst->exec_mask[lane + 1]) {
2200 int index1 = lane + bits(ds_pattern, 3, 2);
2201 panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
2202 "is out of bounds.\n", gpuDynInst->disassemble(),
2203 index1);
2204 vdst[lane + 1]
2205 = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
2206 }
2207 if (gpuDynInst->exec_mask[lane + 2]) {
2208 int index2 = lane + bits(ds_pattern, 5, 4);
2209 panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
2210 "is out of bounds.\n", gpuDynInst->disassemble(),
2211 index2);
2212 vdst[lane + 2]
2213 = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
2214 }
2215 if (gpuDynInst->exec_mask[lane + 3]) {
2216 int index3 = lane + bits(ds_pattern, 7, 6);
2217 panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
2218 "is out of bounds.\n", gpuDynInst->disassemble(),
2219 index3);
2220 vdst[lane + 3]
2221 = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
2222 }
2223 }
2224 } else {
2225 // Bit Mode
2226 int and_mask = bits(ds_pattern, 4, 0);
2227 int or_mask = bits(ds_pattern, 9, 5);
2228 int xor_mask = bits(ds_pattern, 14, 10);
2229 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2230 if (gpuDynInst->exec_mask[lane]) {
2231 int index = (((lane & and_mask) | or_mask) ^ xor_mask);
2232 // Adjust for the next 32 lanes.
2233 if (lane > 31) {
2234 index += 32;
2235 }
2236 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
2237 "out of bounds.\n", gpuDynInst->disassemble(),
2238 index);
2239 vdst[lane]
2240 = gpuDynInst->exec_mask[index] ? data[index] : 0;
2241 }
2242 }
2243 }
2244
2245 vdst.write();
2246
2253 wf->computeUnit->vrf[wf->simdId]->
2254 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2259 wf->rdLmReqsInPipe--;
2260 } // execute
2261 // --- Inst_DS__DS_PERMUTE_B32 class methods ---
2262
2264 : Inst_DS(iFmt, "ds_permute_b32")
2265 {
2266 setFlag(MemoryRef);
2272 setFlag(Load);
2273 } // Inst_DS__DS_PERMUTE_B32
2274
2276 {
2277 } // ~Inst_DS__DS_PERMUTE_B32
2278
2279 // --- description from .arch file ---
2280 // Forward permute.
2281 void
2283 {
2284 Wavefront *wf = gpuDynInst->wavefront();
2285 wf->decLGKMInstsIssued();
2286 wf->untrackLGKMInst(gpuDynInst);
2287
2288 if (gpuDynInst->exec_mask.none()) {
2289 return;
2290 }
2291
2292 gpuDynInst->execUnitId = wf->execUnitId;
2293 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2294 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2295 ->cyclesToTicks(Cycles(24)));
2296 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2297 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2298 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2299
2300 addr.read();
2301 data.read();
2302
2303 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2304 if (gpuDynInst->exec_mask[lane]) {
2311 assert(!instData.OFFSET1);
2318 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
2319 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
2320 "of bounds.\n", gpuDynInst->disassemble(), index);
2326 if (wf->execMask(index)) {
2327 vdst[index] = data[lane];
2328 } else {
2329 vdst[index] = 0;
2330 }
2331 }
2332 }
2333
2334 vdst.write();
2335
2342 wf->computeUnit->vrf[wf->simdId]->
2343 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2348 wf->rdLmReqsInPipe--;
2349 } // execute
2350 // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
2351
2353 : Inst_DS(iFmt, "ds_bpermute_b32")
2354 {
2355 setFlag(MemoryRef);
2361 setFlag(Load);
2362 } // Inst_DS__DS_BPERMUTE_B32
2363
2365 {
2366 } // ~Inst_DS__DS_BPERMUTE_B32
2367
2368 // --- description from .arch file ---
2369 // Backward permute.
2370 void
2372 {
2373 Wavefront *wf = gpuDynInst->wavefront();
2374 wf->decLGKMInstsIssued();
2375 wf->untrackLGKMInst(gpuDynInst);
2376
2377 if (gpuDynInst->exec_mask.none()) {
2378 return;
2379 }
2380
2381 gpuDynInst->execUnitId = wf->execUnitId;
2382 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2383 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2384 ->cyclesToTicks(Cycles(24)));
2385 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2386 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2387 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2388
2389 addr.read();
2390 data.read();
2391
2392 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2393 if (gpuDynInst->exec_mask[lane]) {
2400 assert(!instData.OFFSET1);
2407 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
2408 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
2409 "of bounds.\n", gpuDynInst->disassemble(), index);
2415 if (wf->execMask(index)) {
2416 vdst[lane] = data[index];
2417 } else {
2418 vdst[lane] = 0;
2419 }
2420 }
2421 }
2422
2423 vdst.write();
2424
2431 wf->computeUnit->vrf[wf->simdId]->
2432 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2437 wf->rdLmReqsInPipe--;
2438 } // execute
2439
2440 // --- Inst_DS__DS_ADD_U64 class methods ---
2441
2443 : Inst_DS(iFmt, "ds_add_u64")
2444 {
2445 setFlag(MemoryRef);
2446 setFlag(GroupSegment);
2447 setFlag(AtomicAdd);
2448 setFlag(AtomicNoReturn);
2449 } // Inst_DS__DS_ADD_U64
2450
2452 {
2453 } // ~Inst_DS__DS_ADD_U64
2454
2455 // --- description from .arch file ---
2456 // 64b:
2457 // MEM[ADDR] += DATA[0:1];
2458 void
2460 {
2461 Wavefront *wf = gpuDynInst->wavefront();
2462
2463 if (gpuDynInst->exec_mask.none()) {
2464 wf->decLGKMInstsIssued();
2465 wf->untrackLGKMInst(gpuDynInst);
2466 return;
2467 }
2468
2469 gpuDynInst->execUnitId = wf->execUnitId;
2470 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2471 gpuDynInst->latency.set(
2472 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2473 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2474 ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
2475
2476 addr.read();
2477 data.read();
2478
2479 calcAddr(gpuDynInst, addr);
2480
2481 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2482 if (gpuDynInst->exec_mask[lane]) {
2483 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
2484 = data[lane];
2485 }
2486 }
2487
2488 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2489 } // execute
2490
2491 void
2493 {
2494 Addr offset0 = instData.OFFSET0;
2495 Addr offset1 = instData.OFFSET1;
2496 Addr offset = (offset1 << 8) | offset0;
2497
2499 } // initiateAcc
2500
2501 void
2503 {
2504 } // completeAcc
2505 // --- Inst_DS__DS_SUB_U64 class methods ---
2506
2508 : Inst_DS(iFmt, "ds_sub_u64")
2509 {
2510 } // Inst_DS__DS_SUB_U64
2511
2513 {
2514 } // ~Inst_DS__DS_SUB_U64
2515
2516 // --- description from .arch file ---
2517 // 64b:
2518 // tmp = MEM[ADDR];
2519 // MEM[ADDR] -= DATA[0:1];
2520 // RETURN_DATA[0:1] = tmp.
2521 void
2523 {
2525 } // execute
2526 // --- Inst_DS__DS_RSUB_U64 class methods ---
2527
2529 : Inst_DS(iFmt, "ds_rsub_u64")
2530 {
2531 } // Inst_DS__DS_RSUB_U64
2532
2534 {
2535 } // ~Inst_DS__DS_RSUB_U64
2536
2537 // --- description from .arch file ---
2538 // 64b:
2539 // tmp = MEM[ADDR];
2540 // MEM[ADDR] = DATA - MEM[ADDR];
2541 // RETURN_DATA = tmp.
2542 // Subtraction with reversed operands.
2543 void
2545 {
2547 } // execute
2548 // --- Inst_DS__DS_INC_U64 class methods ---
2549
2551 : Inst_DS(iFmt, "ds_inc_u64")
2552 {
2553 } // Inst_DS__DS_INC_U64
2554
2556 {
2557 } // ~Inst_DS__DS_INC_U64
2558
2559 // --- description from .arch file ---
2560 // 64b:
2561 // tmp = MEM[ADDR];
2562 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
2563 // RETURN_DATA[0:1] = tmp.
2564 void
2566 {
2568 } // execute
2569 // --- Inst_DS__DS_DEC_U64 class methods ---
2570
2572 : Inst_DS(iFmt, "ds_dec_u64")
2573 {
2574 } // Inst_DS__DS_DEC_U64
2575
2577 {
2578 } // ~Inst_DS__DS_DEC_U64
2579
2580 // --- description from .arch file ---
2581 // 64b:
2582 // tmp = MEM[ADDR];
2583 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
2584 // (unsigned compare);
2585 // RETURN_DATA[0:1] = tmp.
2586 void
2588 {
2590 } // execute
2591 // --- Inst_DS__DS_MIN_I64 class methods ---
2592
2594 : Inst_DS(iFmt, "ds_min_i64")
2595 {
2596 } // Inst_DS__DS_MIN_I64
2597
2599 {
2600 } // ~Inst_DS__DS_MIN_I64
2601
2602 // --- description from .arch file ---
2603 // 64b:
2604 // tmp = MEM[ADDR];
2605 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
2606 // RETURN_DATA[0:1] = tmp.
2607 void
2609 {
2611 } // execute
2612 // --- Inst_DS__DS_MAX_I64 class methods ---
2613
2615 : Inst_DS(iFmt, "ds_max_i64")
2616 {
2617 } // Inst_DS__DS_MAX_I64
2618
2620 {
2621 } // ~Inst_DS__DS_MAX_I64
2622
2623 // --- description from .arch file ---
2624 // 64b:
2625 // tmp = MEM[ADDR];
2626 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2627 // RETURN_DATA[0:1] = tmp.
2628 void
2630 {
2632 } // execute
2633 // --- Inst_DS__DS_MIN_U64 class methods ---
2634
2636 : Inst_DS(iFmt, "ds_min_u64")
2637 {
2638 } // Inst_DS__DS_MIN_U64
2639
2641 {
2642 } // ~Inst_DS__DS_MIN_U64
2643
2644 // --- description from .arch file ---
2645 // 64b:
2646 // tmp = MEM[ADDR];
2647 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
2648 // RETURN_DATA[0:1] = tmp.
2649 void
2651 {
2653 } // execute
2654 // --- Inst_DS__DS_MAX_U64 class methods ---
2655
2657 : Inst_DS(iFmt, "ds_max_u64")
2658 {
2659 } // Inst_DS__DS_MAX_U64
2660
2662 {
2663 } // ~Inst_DS__DS_MAX_U64
2664
2665 // --- description from .arch file ---
2666 // 64b:
2667 // tmp = MEM[ADDR];
2668 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2669 // RETURN_DATA[0:1] = tmp.
2670 void
2672 {
2674 } // execute
2675 // --- Inst_DS__DS_AND_B64 class methods ---
2676
2678 : Inst_DS(iFmt, "ds_and_b64")
2679 {
2680 } // Inst_DS__DS_AND_B64
2681
2683 {
2684 } // ~Inst_DS__DS_AND_B64
2685
2686 // --- description from .arch file ---
2687 // 64b:
2688 // tmp = MEM[ADDR];
2689 // MEM[ADDR] &= DATA[0:1];
2690 // RETURN_DATA[0:1] = tmp.
2691 void
2693 {
2695 } // execute
2696 // --- Inst_DS__DS_OR_B64 class methods ---
2697
2699 : Inst_DS(iFmt, "ds_or_b64")
2700 {
2701 } // Inst_DS__DS_OR_B64
2702
2704 {
2705 } // ~Inst_DS__DS_OR_B64
2706
2707 // --- description from .arch file ---
2708 // 64b:
2709 // tmp = MEM[ADDR];
2710 // MEM[ADDR] |= DATA[0:1];
2711 // RETURN_DATA[0:1] = tmp.
2712 void
2714 {
2716 } // execute
2717 // --- Inst_DS__DS_XOR_B64 class methods ---
2718
2720 : Inst_DS(iFmt, "ds_xor_b64")
2721 {
2722 } // Inst_DS__DS_XOR_B64
2723
2725 {
2726 } // ~Inst_DS__DS_XOR_B64
2727
2728 // --- description from .arch file ---
2729 // 64b:
2730 // tmp = MEM[ADDR];
2731 // MEM[ADDR] ^= DATA[0:1];
2732 // RETURN_DATA[0:1] = tmp.
2733 void
2735 {
2737 } // execute
2738 // --- Inst_DS__DS_MSKOR_B64 class methods ---
2739
2741 : Inst_DS(iFmt, "ds_mskor_b64")
2742 {
2743 } // Inst_DS__DS_MSKOR_B64
2744
2746 {
2747 } // ~Inst_DS__DS_MSKOR_B64
2748
2749 // --- description from .arch file ---
2750 // 64b:
2751 // tmp = MEM[ADDR];
2752 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
2753 // RETURN_DATA = tmp.
2754 // Masked dword OR, D0 contains the mask and D1 contains the new value.
2755 void
2757 {
2759 } // execute
2760 // --- Inst_DS__DS_WRITE_B64 class methods ---
2761
2763 : Inst_DS(iFmt, "ds_write_b64")
2764 {
2765 setFlag(MemoryRef);
2766 setFlag(Store);
2767 } // Inst_DS__DS_WRITE_B64
2768
2770 {
2771 } // ~Inst_DS__DS_WRITE_B64
2772
2773 // --- description from .arch file ---
2774 // 64b:
2775 // MEM[ADDR] = DATA.
2776 // Write qword.
2777 void
2779 {
2780 Wavefront *wf = gpuDynInst->wavefront();
2781
2782 if (gpuDynInst->exec_mask.none()) {
2783 wf->decLGKMInstsIssued();
2784 wf->untrackLGKMInst(gpuDynInst);
2785 return;
2786 }
2787
2788 gpuDynInst->execUnitId = wf->execUnitId;
2789 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2790 gpuDynInst->latency.set(
2791 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2792 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2793 ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
2794
2795 addr.read();
2796 data.read();
2797
2798 calcAddr(gpuDynInst, addr);
2799
2800 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2801 if (gpuDynInst->exec_mask[lane]) {
2802 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
2803 = data[lane];
2804 }
2805 }
2806
2807 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2808 } // execute
2809
2810 void
2812 {
2813 Addr offset0 = instData.OFFSET0;
2814 Addr offset1 = instData.OFFSET1;
2815 Addr offset = (offset1 << 8) | offset0;
2816
2817 initMemWrite<VecElemU64>(gpuDynInst, offset);
2818 } // initiateAcc
2819
2820 void
2822 {
2823 } // completeAcc
2824 // --- Inst_DS__DS_WRITE2_B64 class methods ---
2825
2827 : Inst_DS(iFmt, "ds_write2_b64")
2828 {
2829 setFlag(MemoryRef);
2830 setFlag(Store);
2831 } // Inst_DS__DS_WRITE2_B64
2832
2834 {
2835 } // ~Inst_DS__DS_WRITE2_B64
2836
2837 // --- description from .arch file ---
2838 // 64b:
2839 // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
2840 // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
2841 // Write 2 qwords.
2842 void
2844 {
2845 Wavefront *wf = gpuDynInst->wavefront();
2846
2847 if (gpuDynInst->exec_mask.none()) {
2848 wf->decLGKMInstsIssued();
2849 wf->untrackLGKMInst(gpuDynInst);
2850 return;
2851 }
2852
2853 gpuDynInst->execUnitId = wf->execUnitId;
2854 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2855 gpuDynInst->latency.set(
2856 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2857 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2858 ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
2859 ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
2860
2861 addr.read();
2862 data0.read();
2863 data1.read();
2864
2865 calcAddr(gpuDynInst, addr);
2866
2867 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2868 if (gpuDynInst->exec_mask[lane]) {
2869 (reinterpret_cast<VecElemU64*>(
2870 gpuDynInst->d_data))[lane * 2] = data0[lane];
2871 (reinterpret_cast<VecElemU64*>(
2872 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
2873 }
2874 }
2875
2876 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2877 } // execute
2878
2879 void
2881 {
2882 Addr offset0 = instData.OFFSET0 * 8;
2883 Addr offset1 = instData.OFFSET1 * 8;
2884
2885 initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
2886 }
2887
2888 void
2892 // --- Inst_DS__DS_WRITE2ST64_B64 class methods ---
2893
2895 : Inst_DS(iFmt, "ds_write2st64_b64")
2896 {
2897 setFlag(MemoryRef);
2898 setFlag(Store);
2899 } // Inst_DS__DS_WRITE2ST64_B64
2900
2902 {
2903 } // ~Inst_DS__DS_WRITE2ST64_B64
2904
2905 // --- description from .arch file ---
2906 // 64b:
2907 // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
2908 // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
2909 // Write 2 qwords.
2910 void
2912 {
2913 Wavefront *wf = gpuDynInst->wavefront();
2914
2915 if (gpuDynInst->exec_mask.none()) {
2916 wf->decLGKMInstsIssued();
2917 wf->untrackLGKMInst(gpuDynInst);
2918 return;
2919 }
2920
2921 gpuDynInst->execUnitId = wf->execUnitId;
2922 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2923 gpuDynInst->latency.set(
2924 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2925 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2926 ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
2927 ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
2928
2929 addr.read();
2930 data0.read();
2931 data1.read();
2932
2933 calcAddr(gpuDynInst, addr);
2934
2935 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2936 if (gpuDynInst->exec_mask[lane]) {
2937 (reinterpret_cast<VecElemU64*>(
2938 gpuDynInst->d_data))[lane * 2] = data0[lane];
2939 (reinterpret_cast<VecElemU64*>(
2940 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
2941 }
2942 }
2943
2944 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2945 } // execute
2946
2947 void
2949 {
2950 Addr offset0 = instData.OFFSET0 * 8 * 64;
2951 Addr offset1 = instData.OFFSET1 * 8 * 64;
2952
2953 initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
2954 }
2955
2956 void
2960 // --- Inst_DS__DS_CMPST_B64 class methods ---
2961
2963 : Inst_DS(iFmt, "ds_cmpst_b64")
2964 {
2965 } // Inst_DS__DS_CMPST_B64
2966
2968 {
2969 } // ~Inst_DS__DS_CMPST_B64
2970
2971 // --- description from .arch file ---
2972 // 64b:
2973 // tmp = MEM[ADDR];
2974 // src = DATA2;
2975 // cmp = DATA;
2976 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2977 // RETURN_DATA[0] = tmp.
2978 // Compare and store.
2979 // Caution, the order of src and cmp are the *opposite* of the
2980 // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
2981 void
2983 {
2985 } // execute
2986 // --- Inst_DS__DS_CMPST_F64 class methods ---
2987
2989 : Inst_DS(iFmt, "ds_cmpst_f64")
2990 {
2991 setFlag(F64);
2992 } // Inst_DS__DS_CMPST_F64
2993
2995 {
2996 } // ~Inst_DS__DS_CMPST_F64
2997
2998 // --- description from .arch file ---
2999 // 64b:
3000 // tmp = MEM[ADDR];
3001 // src = DATA2;
3002 // cmp = DATA;
3003 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
3004 // RETURN_DATA[0] = tmp.
3005 // Floating point compare and store that handles NaN/INF/denormal values.
3006 // Caution, the order of src and cmp are the *opposite* of the
3007 // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
3008 void
3010 {
3012 } // execute
3013 // --- Inst_DS__DS_MIN_F64 class methods ---
3014
3016 : Inst_DS(iFmt, "ds_min_f64")
3017 {
3018 setFlag(F64);
3019 } // Inst_DS__DS_MIN_F64
3020
3022 {
3023 } // ~Inst_DS__DS_MIN_F64
3024
3025 // --- description from .arch file ---
3026 // 64b.
3027 // tmp = MEM[ADDR];
3028 // src = DATA;
3029 // cmp = DATA2;
3030 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
3031 // Floating point minimum that handles NaN/INF/denormal values.
3032 // Note that this opcode is slightly more general-purpose than
3033 // --- BUFFER_ATOMIC_FMIN_X2.
3034 void
3036 {
3038 } // execute
3039 // --- Inst_DS__DS_MAX_F64 class methods ---
3040
3042 : Inst_DS(iFmt, "ds_max_f64")
3043 {
3044 setFlag(F64);
3045 } // Inst_DS__DS_MAX_F64
3046
3048 {
3049 } // ~Inst_DS__DS_MAX_F64
3050
3051 // --- description from .arch file ---
3052 // 64b.
3053 // tmp = MEM[ADDR];
3054 // src = DATA;
3055 // cmp = DATA2;
3056 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
3057 // Floating point maximum that handles NaN/INF/denormal values.
3058 // Note that this opcode is slightly more general-purpose than
3059 // --- BUFFER_ATOMIC_FMAX_X2.
3060 void
3062 {
3064 } // execute
3065 // --- Inst_DS__DS_ADD_RTN_U64 class methods ---
3066
3068 : Inst_DS(iFmt, "ds_add_rtn_u64")
3069 {
3070 } // Inst_DS__DS_ADD_RTN_U64
3071
3073 {
3074 } // ~Inst_DS__DS_ADD_RTN_U64
3075
3076 // --- description from .arch file ---
3077 // 64b:
3078 // tmp = MEM[ADDR];
3079 // MEM[ADDR] += DATA[0:1];
3080 // RETURN_DATA[0:1] = tmp.
3081 void
3083 {
3085 } // execute
3086 // --- Inst_DS__DS_SUB_RTN_U64 class methods ---
3087
3089 : Inst_DS(iFmt, "ds_sub_rtn_u64")
3090 {
3091 } // Inst_DS__DS_SUB_RTN_U64
3092
3094 {
3095 } // ~Inst_DS__DS_SUB_RTN_U64
3096
3097 // --- description from .arch file ---
3098 // 64b:
3099 // tmp = MEM[ADDR];
3100 // MEM[ADDR] -= DATA[0:1];
3101 // RETURN_DATA[0:1] = tmp.
3102 void
3104 {
3106 } // execute
3107 // --- Inst_DS__DS_RSUB_RTN_U64 class methods ---
3108
3110 : Inst_DS(iFmt, "ds_rsub_rtn_u64")
3111 {
3112 } // Inst_DS__DS_RSUB_RTN_U64
3113
3115 {
3116 } // ~Inst_DS__DS_RSUB_RTN_U64
3117
3118 // --- description from .arch file ---
3119 // 64b:
3120 // tmp = MEM[ADDR];
3121 // MEM[ADDR] = DATA - MEM[ADDR];
3122 // RETURN_DATA = tmp.
3123 // Subtraction with reversed operands.
3124 void
3126 {
3128 } // execute
3129 // --- Inst_DS__DS_INC_RTN_U64 class methods ---
3130
3132 : Inst_DS(iFmt, "ds_inc_rtn_u64")
3133 {
3134 } // Inst_DS__DS_INC_RTN_U64
3135
3137 {
3138 } // ~Inst_DS__DS_INC_RTN_U64
3139
3140 // --- description from .arch file ---
3141 // 64b:
3142 // tmp = MEM[ADDR];
3143 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
3144 // RETURN_DATA[0:1] = tmp.
3145 void
3147 {
3149 } // execute
3150 // --- Inst_DS__DS_DEC_RTN_U64 class methods ---
3151
3153 : Inst_DS(iFmt, "ds_dec_rtn_u64")
3154 {
3155 } // Inst_DS__DS_DEC_RTN_U64
3156
3158 {
3159 } // ~Inst_DS__DS_DEC_RTN_U64
3160
3161 // --- description from .arch file ---
3162 // 64b:
3163 // tmp = MEM[ADDR];
3164 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
3165 // (unsigned compare);
3166 // RETURN_DATA[0:1] = tmp.
3167 void
3169 {
3171 } // execute
3172 // --- Inst_DS__DS_MIN_RTN_I64 class methods ---
3173
3175 : Inst_DS(iFmt, "ds_min_rtn_i64")
3176 {
3177 } // Inst_DS__DS_MIN_RTN_I64
3178
3180 {
3181 } // ~Inst_DS__DS_MIN_RTN_I64
3182
3183 // --- description from .arch file ---
3184 // 64b:
3185 // tmp = MEM[ADDR];
3186 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
3187 // RETURN_DATA[0:1] = tmp.
3188 void
3190 {
3192 } // execute
3193 // --- Inst_DS__DS_MAX_RTN_I64 class methods ---
3194
3196 : Inst_DS(iFmt, "ds_max_rtn_i64")
3197 {
3198 } // Inst_DS__DS_MAX_RTN_I64
3199
3201 {
3202 } // ~Inst_DS__DS_MAX_RTN_I64
3203
3204 // --- description from .arch file ---
3205 // 64b:
3206 // tmp = MEM[ADDR];
3207 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
3208 // RETURN_DATA[0:1] = tmp.
3209 void
3211 {
3213 } // execute
3214 // --- Inst_DS__DS_MIN_RTN_U64 class methods ---
3215
3217 : Inst_DS(iFmt, "ds_min_rtn_u64")
3218 {
3219 } // Inst_DS__DS_MIN_RTN_U64
3220
3222 {
3223 } // ~Inst_DS__DS_MIN_RTN_U64
3224
3225 // --- description from .arch file ---
3226 // 64b:
3227 // tmp = MEM[ADDR];
3228 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
3229 // RETURN_DATA[0:1] = tmp.
3230 void
3232 {
3234 } // execute
3235 // --- Inst_DS__DS_MAX_RTN_U64 class methods ---
3236
3238 : Inst_DS(iFmt, "ds_max_rtn_u64")
3239 {
3240 } // Inst_DS__DS_MAX_RTN_U64
3241
3243 {
3244 } // ~Inst_DS__DS_MAX_RTN_U64
3245
3246 // --- description from .arch file ---
3247 // 64b:
3248 // tmp = MEM[ADDR];
3249 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
3250 // RETURN_DATA[0:1] = tmp.
3251 void
3253 {
3255 } // execute
3256 // --- Inst_DS__DS_AND_RTN_B64 class methods ---
3257
3259 : Inst_DS(iFmt, "ds_and_rtn_b64")
3260 {
3261 } // Inst_DS__DS_AND_RTN_B64
3262
3264 {
3265 } // ~Inst_DS__DS_AND_RTN_B64
3266
3267 // --- description from .arch file ---
3268 // 64b:
3269 // tmp = MEM[ADDR];
3270 // MEM[ADDR] &= DATA[0:1];
3271 // RETURN_DATA[0:1] = tmp.
3272 void
3274 {
3276 } // execute
3277 // --- Inst_DS__DS_OR_RTN_B64 class methods ---
3278
3280 : Inst_DS(iFmt, "ds_or_rtn_b64")
3281 {
3282 } // Inst_DS__DS_OR_RTN_B64
3283
3285 {
3286 } // ~Inst_DS__DS_OR_RTN_B64
3287
3288 // --- description from .arch file ---
3289 // 64b:
3290 // tmp = MEM[ADDR];
3291 // MEM[ADDR] |= DATA[0:1];
3292 // RETURN_DATA[0:1] = tmp.
3293 void
3295 {
3297 } // execute
3298 // --- Inst_DS__DS_XOR_RTN_B64 class methods ---
3299
3301 : Inst_DS(iFmt, "ds_xor_rtn_b64")
3302 {
3303 } // Inst_DS__DS_XOR_RTN_B64
3304
3306 {
3307 } // ~Inst_DS__DS_XOR_RTN_B64
3308
3309 // --- description from .arch file ---
3310 // 64b:
3311 // tmp = MEM[ADDR];
3312 // MEM[ADDR] ^= DATA[0:1];
3313 // RETURN_DATA[0:1] = tmp.
3314 void
3316 {
3318 } // execute
3319 // --- Inst_DS__DS_MSKOR_RTN_B64 class methods ---
3320
3322 : Inst_DS(iFmt, "ds_mskor_rtn_b64")
3323 {
3324 } // Inst_DS__DS_MSKOR_RTN_B64
3325
3327 {
3328 } // ~Inst_DS__DS_MSKOR_RTN_B64
3329
3330 // --- description from .arch file ---
3331 // 64b:
3332 // tmp = MEM[ADDR];
3333 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
3334 // RETURN_DATA = tmp.
3335 // Masked dword OR, D0 contains the mask and D1 contains the new value.
3336 void
3338 {
3340 } // execute
3341 // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods ---
3342
3344 : Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
3345 {
3346 } // Inst_DS__DS_WRXCHG_RTN_B64
3347
3349 {
3350 } // ~Inst_DS__DS_WRXCHG_RTN_B64
3351
3352 // --- description from .arch file ---
3353 // tmp = MEM[ADDR];
3354 // MEM[ADDR] = DATA;
3355 // RETURN_DATA = tmp.
3356 // Write-exchange operation.
3357 void
3359 {
3361 } // execute
3362 // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods ---
3363
3365 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
3366 {
3367 } // Inst_DS__DS_WRXCHG2_RTN_B64
3368
3370 {
3371 } // ~Inst_DS__DS_WRXCHG2_RTN_B64
3372
3373 // --- description from .arch file ---
3374 // Write-exchange 2 separate qwords.
3375 void
3377 {
3379 } // execute
3380 // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods ---
3381
3383 InFmt_DS *iFmt)
3384 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
3385 {
3386 } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
3387
3389 {
3390 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
3391
3392 // --- description from .arch file ---
3393 // Write-exchange 2 qwords with a stride of 64 qwords.
3394 void
3399 // --- Inst_DS__DS_CMPST_RTN_B64 class methods ---
3400
3402 : Inst_DS(iFmt, "ds_cmpst_rtn_b64")
3403 {
3404 } // Inst_DS__DS_CMPST_RTN_B64
3405
3407 {
3408 } // ~Inst_DS__DS_CMPST_RTN_B64
3409
3410 // --- description from .arch file ---
3411 // 64b:
3412 // tmp = MEM[ADDR];
3413 // src = DATA2;
3414 // cmp = DATA;
3415 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
3416 // RETURN_DATA[0] = tmp.
3417 // Compare and store.
3418 // Caution, the order of src and cmp are the *opposite* of the
3419 // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
3420 void
3422 {
3424 } // execute
3425 // --- Inst_DS__DS_CMPST_RTN_F64 class methods ---
3426
3428 : Inst_DS(iFmt, "ds_cmpst_rtn_f64")
3429 {
3430 setFlag(F64);
3431 } // Inst_DS__DS_CMPST_RTN_F64
3432
3434 {
3435 } // ~Inst_DS__DS_CMPST_RTN_F64
3436
3437 // --- description from .arch file ---
3438 // 64b:
3439 // tmp = MEM[ADDR];
3440 // src = DATA2;
3441 // cmp = DATA;
3442 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
3443 // RETURN_DATA[0] = tmp.
3444 // Floating point compare and store that handles NaN/INF/denormal values.
3445 // Caution, the order of src and cmp are the *opposite* of the
3446 // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
3447 void
3449 {
3451 } // execute
3452 // --- Inst_DS__DS_MIN_RTN_F64 class methods ---
3453
3455 : Inst_DS(iFmt, "ds_min_rtn_f64")
3456 {
3457 setFlag(F64);
3458 } // Inst_DS__DS_MIN_RTN_F64
3459
3461 {
3462 } // ~Inst_DS__DS_MIN_RTN_F64
3463
3464 // --- description from .arch file ---
3465 // 64b.
3466 // tmp = MEM[ADDR];
3467 // src = DATA;
3468 // cmp = DATA2;
3469 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
3470 // Floating point minimum that handles NaN/INF/denormal values.
3471 // Note that this opcode is slightly more general-purpose than
3472 // --- BUFFER_ATOMIC_FMIN_X2.
3473 void
3475 {
3477 } // execute
3478 // --- Inst_DS__DS_MAX_RTN_F64 class methods ---
3479
3481 : Inst_DS(iFmt, "ds_max_rtn_f64")
3482 {
3483 setFlag(F64);
3484 } // Inst_DS__DS_MAX_RTN_F64
3485
3487 {
3488 } // ~Inst_DS__DS_MAX_RTN_F64
3489
3490 // --- description from .arch file ---
3491 // 64b.
3492 // tmp = MEM[ADDR];
3493 // src = DATA;
3494 // cmp = DATA2;
3495 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
3496 // Floating point maximum that handles NaN/INF/denormal values.
3497 // Note that this opcode is slightly more general-purpose than
3498 // --- BUFFER_ATOMIC_FMAX_X2.
3499 void
3501 {
3503 } // execute
3504 // --- Inst_DS__DS_READ_B64 class methods ---
3505
3507 : Inst_DS(iFmt, "ds_read_b64")
3508 {
3509 setFlag(MemoryRef);
3510 setFlag(Load);
3511 } // Inst_DS__DS_READ_B64
3512
3514 {
3515 } // ~Inst_DS__DS_READ_B64
3516
3517 // --- description from .arch file ---
3518 // RETURN_DATA = MEM[ADDR].
3519 // Read 1 qword.
3520 void
3522 {
3523 Wavefront *wf = gpuDynInst->wavefront();
3524
3525 if (gpuDynInst->exec_mask.none()) {
3526 wf->decLGKMInstsIssued();
3527 wf->untrackLGKMInst(gpuDynInst);
3528 return;
3529 }
3530
3531 gpuDynInst->execUnitId = wf->execUnitId;
3532 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3533 gpuDynInst->latency.set(
3534 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3535 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3536
3537 addr.read();
3538
3539 calcAddr(gpuDynInst, addr);
3540
3541 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3542 } // execute
3543
3544 void
3546 {
3547 Addr offset0 = instData.OFFSET0;
3548 Addr offset1 = instData.OFFSET1;
3549 Addr offset = (offset1 << 8) | offset0;
3550
3551 initMemRead<VecElemU64>(gpuDynInst, offset);
3552 } // initiateAcc
3553
3554 void
3556 {
3557 VecOperandU64 vdst(gpuDynInst, extData.VDST);
3558
3559 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3560 if (gpuDynInst->exec_mask[lane]) {
3561 vdst[lane] = (reinterpret_cast<VecElemU64*>(
3562 gpuDynInst->d_data))[lane];
3563 }
3564 }
3565
3566 vdst.write();
3567 } // completeAcc
3568 // --- Inst_DS__DS_READ2_B64 class methods ---
3569
3571 : Inst_DS(iFmt, "ds_read2_b64")
3572 {
3573 setFlag(MemoryRef);
3574 setFlag(Load);
3575 } // Inst_DS__DS_READ2_B64
3576
3578 {
3579 } // ~Inst_DS__DS_READ2_B64
3580
3581 // --- description from .arch file ---
3582 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
3583 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
3584 // Read 2 qwords.
3585 void
3587 {
3588 Wavefront *wf = gpuDynInst->wavefront();
3589
3590 if (gpuDynInst->exec_mask.none()) {
3591 wf->decLGKMInstsIssued();
3592 wf->untrackLGKMInst(gpuDynInst);
3593 return;
3594 }
3595
3596 gpuDynInst->execUnitId = wf->execUnitId;
3597 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3598 gpuDynInst->latency.set(
3599 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3600 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3601
3602 addr.read();
3603
3604 calcAddr(gpuDynInst, addr);
3605
3606 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3607 } // execute
3608
3609 void
3611 {
3612 Addr offset0 = instData.OFFSET0 * 8;
3613 Addr offset1 = instData.OFFSET1 * 8;
3614
3615 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
3616 } // initiateAcc
3617
3618 void
3620 {
3621 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
3622 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
3623
3624 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3625 if (gpuDynInst->exec_mask[lane]) {
3626 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
3627 gpuDynInst->d_data))[lane * 2];
3628 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
3629 gpuDynInst->d_data))[lane * 2 + 1];
3630 }
3631 }
3632
3633 vdst0.write();
3634 vdst1.write();
3635 } // completeAcc
3636 // --- Inst_DS__DS_READ2ST64_B64 class methods ---
3637
3639 : Inst_DS(iFmt, "ds_read2st64_b64")
3640 {
3641 setFlag(MemoryRef);
3642 setFlag(Load);
3643 } // Inst_DS__DS_READ2ST64_B64
3644
3646 {
3647 } // ~Inst_DS__DS_READ2ST64_B64
3648
3649 // --- description from .arch file ---
3650 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
3651 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
3652 // Read 2 qwords.
3653 void
3655 {
3656 Wavefront *wf = gpuDynInst->wavefront();
3657
3658 if (gpuDynInst->exec_mask.none()) {
3659 wf->decLGKMInstsIssued();
3660 wf->untrackLGKMInst(gpuDynInst);
3661 return;
3662 }
3663
3664 gpuDynInst->execUnitId = wf->execUnitId;
3665 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3666 gpuDynInst->latency.set(
3667 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3668 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3669
3670 addr.read();
3671
3672 calcAddr(gpuDynInst, addr);
3673
3674 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3675 } // execute
3676
3677 void
3679 {
3680 Addr offset0 = (instData.OFFSET0 * 8 * 64);
3681 Addr offset1 = (instData.OFFSET1 * 8 * 64);
3682
3683 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
3684 }
3685
3686 void
3688 {
3689 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
3690 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
3691
3692 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3693 if (gpuDynInst->exec_mask[lane]) {
3694 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
3695 gpuDynInst->d_data))[lane * 2];
3696 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
3697 gpuDynInst->d_data))[lane * 2 + 1];
3698 }
3699 }
3700
3701 vdst0.write();
3702 vdst1.write();
3703 }
3704 // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods ---
3705
3707 InFmt_DS *iFmt)
3708 : Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
3709 {
3710 } // Inst_DS__DS_CONDXCHG32_RTN_B64
3711
3713 {
3714 } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
3715
3716 // --- description from .arch file ---
3717 // Conditional write exchange.
3718 void
3723 // --- Inst_DS__DS_ADD_SRC2_U32 class methods ---
3724
3726 : Inst_DS(iFmt, "ds_add_src2_u32")
3727 {
3728 } // Inst_DS__DS_ADD_SRC2_U32
3729
3731 {
3732 } // ~Inst_DS__DS_ADD_SRC2_U32
3733
3734 // --- description from .arch file ---
3735 // 32b:
3736 // A = ADDR_BASE;
3737 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3738 // --- {offset1[6],offset1[6:0],offset0});
3739 // MEM[A] = MEM[A] + MEM[B].
3740 void
3742 {
3744 } // execute
3745 // --- Inst_DS__DS_SUB_SRC2_U32 class methods ---
3746
3748 : Inst_DS(iFmt, "ds_sub_src2_u32")
3749 {
3750 } // Inst_DS__DS_SUB_SRC2_U32
3751
3753 {
3754 } // ~Inst_DS__DS_SUB_SRC2_U32
3755
3756 // --- description from .arch file ---
3757 // 32b:
3758 // A = ADDR_BASE;
3759 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3760 // --- {offset1[6],offset1[6:0],offset0});
3761 // MEM[A] = MEM[A] - MEM[B].
3762 void
3764 {
3766 } // execute
3767 // --- Inst_DS__DS_RSUB_SRC2_U32 class methods ---
3768
3770 : Inst_DS(iFmt, "ds_rsub_src2_u32")
3771 {
3772 } // Inst_DS__DS_RSUB_SRC2_U32
3773
3775 {
3776 } // ~Inst_DS__DS_RSUB_SRC2_U32
3777
3778 // --- description from .arch file ---
3779 // 32b:
3780 // A = ADDR_BASE;
3781 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3782 // --- {offset1[6],offset1[6:0],offset0});
3783 // MEM[A] = MEM[B] - MEM[A].
3784 void
3786 {
3788 } // execute
3789 // --- Inst_DS__DS_INC_SRC2_U32 class methods ---
3790
3792 : Inst_DS(iFmt, "ds_inc_src2_u32")
3793 {
3794 } // Inst_DS__DS_INC_SRC2_U32
3795
3797 {
3798 } // ~Inst_DS__DS_INC_SRC2_U32
3799
3800 // --- description from .arch file ---
3801 // 32b:
3802 // A = ADDR_BASE;
3803 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3804 // --- {offset1[6],offset1[6:0],offset0});
3805 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
3806 void
3808 {
3810 } // execute
3811 // --- Inst_DS__DS_DEC_SRC2_U32 class methods ---
3812
3814 : Inst_DS(iFmt, "ds_dec_src2_u32")
3815 {
3816 } // Inst_DS__DS_DEC_SRC2_U32
3817
3819 {
3820 } // ~Inst_DS__DS_DEC_SRC2_U32
3821
3822 // --- description from .arch file ---
3823 // 32b:
3824 // A = ADDR_BASE;
3825 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3826 // --- {offset1[6],offset1[6:0],offset0});
3827 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
3828 // Uint decrement.
3829 void
3831 {
3833 } // execute
3834 // --- Inst_DS__DS_MIN_SRC2_I32 class methods ---
3835
3837 : Inst_DS(iFmt, "ds_min_src2_i32")
3838 {
3839 } // Inst_DS__DS_MIN_SRC2_I32
3840
3842 {
3843 } // ~Inst_DS__DS_MIN_SRC2_I32
3844
3845 // --- description from .arch file ---
3846 // 32b:
3847 // A = ADDR_BASE;
3848 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3849 // --- {offset1[6],offset1[6:0],offset0});
3850 // MEM[A] = min(MEM[A], MEM[B]).
3851 void
3853 {
3855 } // execute
3856 // --- Inst_DS__DS_MAX_SRC2_I32 class methods ---
3857
3859 : Inst_DS(iFmt, "ds_max_src2_i32")
3860 {
3861 } // Inst_DS__DS_MAX_SRC2_I32
3862
3864 {
3865 } // ~Inst_DS__DS_MAX_SRC2_I32
3866
3867 // --- description from .arch file ---
3868 // 32b:
3869 // A = ADDR_BASE;
3870 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3871 // --- {offset1[6],offset1[6:0],offset0});
3872 // MEM[A] = max(MEM[A], MEM[B]).
3873 void
3875 {
3877 } // execute
3878 // --- Inst_DS__DS_MIN_SRC2_U32 class methods ---
3879
3881 : Inst_DS(iFmt, "ds_min_src2_u32")
3882 {
3883 } // Inst_DS__DS_MIN_SRC2_U32
3884
3886 {
3887 } // ~Inst_DS__DS_MIN_SRC2_U32
3888
3889 // --- description from .arch file ---
3890 // 32b:
3891 // A = ADDR_BASE;
3892 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3893 // --- {offset1[6],offset1[6:0],offset0});
3894 // MEM[A] = min(MEM[A], MEM[B]).
3895 void
3897 {
3899 } // execute
3900 // --- Inst_DS__DS_MAX_SRC2_U32 class methods ---
3901
3903 : Inst_DS(iFmt, "ds_max_src2_u32")
3904 {
3905 } // Inst_DS__DS_MAX_SRC2_U32
3906
3908 {
3909 } // ~Inst_DS__DS_MAX_SRC2_U32
3910
3911 // --- description from .arch file ---
3912 // 32b:
3913 // A = ADDR_BASE;
3914 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3915 // --- {offset1[6],offset1[6:0],offset0});
3916 // MEM[A] = max(MEM[A], MEM[B]).
3917 void
3919 {
3921 } // execute
3922 // --- Inst_DS__DS_AND_SRC2_B32 class methods ---
3923
3925 : Inst_DS(iFmt, "ds_and_src2_b32")
3926 {
3927 } // Inst_DS__DS_AND_SRC2_B32
3928
3930 {
3931 } // ~Inst_DS__DS_AND_SRC2_B32
3932
3933 // --- description from .arch file ---
3934 // 32b:
3935 // A = ADDR_BASE;
3936 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3937 // --- {offset1[6],offset1[6:0],offset0});
3938 // MEM[A] = MEM[A] & MEM[B].
3939 void
3941 {
3943 } // execute
3944 // --- Inst_DS__DS_OR_SRC2_B32 class methods ---
3945
3947 : Inst_DS(iFmt, "ds_or_src2_b32")
3948 {
3949 } // Inst_DS__DS_OR_SRC2_B32
3950
3952 {
3953 } // ~Inst_DS__DS_OR_SRC2_B32
3954
3955 // --- description from .arch file ---
3956 // 32b:
3957 // A = ADDR_BASE;
3958 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3959 // --- {offset1[6],offset1[6:0],offset0});
3960 // MEM[A] = MEM[A] | MEM[B].
3961 void
3963 {
3965 } // execute
3966 // --- Inst_DS__DS_XOR_SRC2_B32 class methods ---
3967
3969 : Inst_DS(iFmt, "ds_xor_src2_b32")
3970 {
3971 } // Inst_DS__DS_XOR_SRC2_B32
3972
3974 {
3975 } // ~Inst_DS__DS_XOR_SRC2_B32
3976
3977 // --- description from .arch file ---
3978 // 32b:
3979 // A = ADDR_BASE;
3980 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3981 // --- {offset1[6],offset1[6:0],offset0});
3982 // MEM[A] = MEM[A] ^ MEM[B].
3983 void
3985 {
3987 } // execute
3988 // --- Inst_DS__DS_WRITE_SRC2_B32 class methods ---
3989
3991 : Inst_DS(iFmt, "ds_write_src2_b32")
3992 {
3993 setFlag(MemoryRef);
3994 setFlag(Store);
3995 } // Inst_DS__DS_WRITE_SRC2_B32
3996
3998 {
3999 } // ~Inst_DS__DS_WRITE_SRC2_B32
4000
4001 // --- description from .arch file ---
4002 // 32b:
4003 // A = ADDR_BASE;
4004 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4005 // --- {offset1[6],offset1[6:0],offset0});
4006 // MEM[A] = MEM[B].
4007 // Write dword.
4008 void
4010 {
4012 } // execute
4013 // --- Inst_DS__DS_MIN_SRC2_F32 class methods ---
4014
4016 : Inst_DS(iFmt, "ds_min_src2_f32")
4017 {
4018 setFlag(F32);
4019 } // Inst_DS__DS_MIN_SRC2_F32
4020
4022 {
4023 } // ~Inst_DS__DS_MIN_SRC2_F32
4024
4025 // --- description from .arch file ---
4026 // 32b:
4027 // A = ADDR_BASE;
4028 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4029 // --- {offset1[6],offset1[6:0],offset0});
4030 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
4031 // Float, handles NaN/INF/denorm.
4032 void
4034 {
4036 } // execute
4037 // --- Inst_DS__DS_MAX_SRC2_F32 class methods ---
4038
4040 : Inst_DS(iFmt, "ds_max_src2_f32")
4041 {
4042 setFlag(F32);
4043 } // Inst_DS__DS_MAX_SRC2_F32
4044
4046 {
4047 } // ~Inst_DS__DS_MAX_SRC2_F32
4048
4049 // --- description from .arch file ---
4050 // 32b:
4051 // A = ADDR_BASE;
4052 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4053 // --- {offset1[6],offset1[6:0],offset0});
4054 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
4055 // Float, handles NaN/INF/denorm.
4056 void
4058 {
4060 } // execute
4061 // --- Inst_DS__DS_ADD_SRC2_F32 class methods ---
4062
4064 : Inst_DS(iFmt, "ds_add_src2_f32")
4065 {
4066 setFlag(F32);
4067 } // Inst_DS__DS_ADD_SRC2_F32
4068
4070 {
4071 } // ~Inst_DS__DS_ADD_SRC2_F32
4072
4073 // --- description from .arch file ---
4074 // 32b:
4075 // A = ADDR_BASE;
4076 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4077 // --- {offset1[6],offset1[6:0],offset0});
4078 // MEM[A] = MEM[B] + MEM[A].
4079 // Float, handles NaN/INF/denorm.
4080 void
4082 {
4084 } // execute
4085 // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods ---
4086
4088 InFmt_DS *iFmt)
4089 : Inst_DS(iFmt, "ds_gws_sema_release_all")
4090 {
4091 } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
4092
4094 {
4095 } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
4096
4097 // --- description from .arch file ---
4098 // GDS Only: The GWS resource (rid) indicated will process this opcode by
4099 // updating the counter and labeling the specified resource as a semaphore.
4100 // //Determine the GWS resource to work on
4101 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4102 // //Incr the state counter of the resource
4103 // state.counter[rid] = state.wave_in_queue;
4104 // state.type = SEMAPHORE;
4105 // return rd_done; //release calling wave
4106 // This action will release ALL queued waves; it Will have no effect if no
4107 // --- waves are present.
4108 void
4113 // --- Inst_DS__DS_GWS_INIT class methods ---
4114
4116 : Inst_DS(iFmt, "ds_gws_init")
4117 {
4118 } // Inst_DS__DS_GWS_INIT
4119
4121 {
4122 } // ~Inst_DS__DS_GWS_INIT
4123
4124 // --- description from .arch file ---
4125 // GDS Only: Initialize a barrier or semaphore resource.
4126 // //Determine the GWS resource to work on
4127 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4128 // //Get the value to use in init
4129 // index = find_first_valid(vector mask)
4130 // value = DATA[thread: index]
4131 // //Set the state of the resource
4132 // state.counter[rid] = lsb(value); //limit #waves
4133 // state.flag[rid] = 0;
4134 // return rd_done; //release calling wave
4135 void
4137 {
4139 } // execute
4140 // --- Inst_DS__DS_GWS_SEMA_V class methods ---
4141
4143 : Inst_DS(iFmt, "ds_gws_sema_v")
4144 {
4145 } // Inst_DS__DS_GWS_SEMA_V
4146
4148 {
4149 } // ~Inst_DS__DS_GWS_SEMA_V
4150
4151 // --- description from .arch file ---
4152 // GDS Only: The GWS resource indicated will process this opcode by
4153 // updating the counter and labeling the resource as a semaphore.
4154 // //Determine the GWS resource to work on
4155 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4156 // //Incr the state counter of the resource
4157 // state.counter[rid]++;
4158 // state.type = SEMAPHORE;
4159 // return rd_done; //release calling wave
4160 // This action will release one waved if any are queued in this resource.
4161 void
4163 {
4165 } // execute
4166 // --- Inst_DS__DS_GWS_SEMA_BR class methods ---
4167
4169 : Inst_DS(iFmt, "ds_gws_sema_br")
4170 {
4171 } // Inst_DS__DS_GWS_SEMA_BR
4172
4174 {
4175 } // ~Inst_DS__DS_GWS_SEMA_BR
4176
4177 // --- description from .arch file ---
4178 // GDS Only: The GWS resource indicated will process this opcode by
4179 // updating the counter by the bulk release delivered count and labeling
4180 // the resource as a semaphore.
4181 // //Determine the GWS resource to work on
4182 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4183 // index = find first valid (vector mask)
4184 // count = DATA[thread: index];
4185 // //Add count to the resource state counter
4186 // state.counter[rid] += count;
4187 // state.type = SEMAPHORE;
4188 // return rd_done; //release calling wave
4189 // This action will release count number of waves, immediately if queued,
4190 // or as they arrive from the noted resource.
4191 void
4193 {
4195 } // execute
4196 // --- Inst_DS__DS_GWS_SEMA_P class methods ---
4197
4199 : Inst_DS(iFmt, "ds_gws_sema_p")
4200 {
4201 } // Inst_DS__DS_GWS_SEMA_P
4202
4204 {
4205 } // ~Inst_DS__DS_GWS_SEMA_P
4206
4207 // --- description from .arch file ---
4208 // GDS Only: The GWS resource indicated will process this opcode by
4209 // queueing it until counter enables a release and then decrementing the
4210 // counter of the resource as a semaphore.
4211 // //Determine the GWS resource to work on
4212 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4213 // state.type = SEMAPHORE;
4214 // ENQUEUE until(state[rid].counter > 0)
4215 // state[rid].counter--;
4216 // return rd_done
4217 void
4219 {
4221 } // execute
4222 // --- Inst_DS__DS_GWS_BARRIER class methods ---
4223
4225 : Inst_DS(iFmt, "ds_gws_barrier")
4226 {
4227 } // Inst_DS__DS_GWS_BARRIER
4228
4230 {
4231 } // ~Inst_DS__DS_GWS_BARRIER
4232
4233 // --- description from .arch file ---
4234 // GDS Only: The GWS resource indicated will process this opcode by
4235 // queueing it until barrier is satisfied. The number of waves needed is
4236 // passed in as DATA of first valid thread.
4237 // //Determine the GWS resource to work on
4238 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0];
4239 // index = find first valid (vector mask);
4240 // value = DATA[thread: index];
4241 // // Input Decision Machine
4242 // state.type[rid] = BARRIER;
4243 // if (state[rid].counter <= 0) {
4244 // thread[rid].flag = state[rid].flag;
4245 // ENQUEUE;
4246 // state[rid].flag = !state.flag;
4247 // state[rid].counter = value;
4248 // return rd_done;
4249 // } else {
4250 // state[rid].counter--;
4251 // thread.flag = state[rid].flag;
4252 // ENQUEUE;
4253 // }
4254 // Since the waves deliver the count for the next barrier, this function
4255 // can have a different size barrier for each occurrence.
4256 // // Release Machine
4257 // if (state.type == BARRIER) {
4258 // if (state.flag != thread.flag) {
4259 // return rd_done;
4260 // }
4261 // }
4262 void
4264 {
4266 } // execute
4267 // --- Inst_DS__DS_CONSUME class methods ---
4268
4270 : Inst_DS(iFmt, "ds_consume")
4271 {
4272 } // Inst_DS__DS_CONSUME
4273
4275 {
4276 } // ~Inst_DS__DS_CONSUME
4277
4278 // --- description from .arch file ---
4279 // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS
4280 // memory at (M0.base + instr_offset). Return the pre-operation value to
4281 // VGPRs.
4282 void
4284 {
4286 } // execute
4287 // --- Inst_DS__DS_APPEND class methods ---
4288
4290 : Inst_DS(iFmt, "ds_append")
4291 {
4292 } // Inst_DS__DS_APPEND
4293
4295 {
4296 } // ~Inst_DS__DS_APPEND
4297
4298 // --- description from .arch file ---
4299 // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory
4300 // at (M0.base + instr_offset). Return the pre-operation value to VGPRs.
4301 void
4303 {
4305 } // execute
4306 // --- Inst_DS__DS_ORDERED_COUNT class methods ---
4307
4309 : Inst_DS(iFmt, "ds_ordered_count")
4310 {
4311 } // Inst_DS__DS_ORDERED_COUNT
4312
4314 {
4315 } // ~Inst_DS__DS_ORDERED_COUNT
4316
4317 // --- description from .arch file ---
4318 // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated
4319 // ordered-count counters (aka 'packers'). Additional bits of instr.offset
4320 // field are overloaded to hold packer-id, 'last'.
4321 void
4323 {
4325 } // execute
4326 // --- Inst_DS__DS_ADD_SRC2_U64 class methods ---
4327
4329 : Inst_DS(iFmt, "ds_add_src2_u64")
4330 {
4331 } // Inst_DS__DS_ADD_SRC2_U64
4332
4334 {
4335 } // ~Inst_DS__DS_ADD_SRC2_U64
4336
4337 // --- description from .arch file ---
4338 // 64b:
4339 // A = ADDR_BASE;
4340 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4341 // --- {offset1[6],offset1[6:0],offset0});
4342 // MEM[A] = MEM[A] + MEM[B].
4343 void
4345 {
4347 } // execute
4348 // --- Inst_DS__DS_SUB_SRC2_U64 class methods ---
4349
4351 : Inst_DS(iFmt, "ds_sub_src2_u64")
4352 {
4353 } // Inst_DS__DS_SUB_SRC2_U64
4354
4356 {
4357 } // ~Inst_DS__DS_SUB_SRC2_U64
4358
4359 // --- description from .arch file ---
4360 // 64b:
4361 // A = ADDR_BASE;
4362 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4363 // --- {offset1[6],offset1[6:0],offset0});
4364 // MEM[A] = MEM[A] - MEM[B].
4365 void
4367 {
4369 } // execute
4370 // --- Inst_DS__DS_RSUB_SRC2_U64 class methods ---
4371
4373 : Inst_DS(iFmt, "ds_rsub_src2_u64")
4374 {
4375 } // Inst_DS__DS_RSUB_SRC2_U64
4376
4378 {
4379 } // ~Inst_DS__DS_RSUB_SRC2_U64
4380
4381 // --- description from .arch file ---
4382 // 64b:
4383 // A = ADDR_BASE;
4384 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4385 // --- {offset1[6],offset1[6:0],offset0});
4386 // MEM[A] = MEM[B] - MEM[A].
4387 void
4389 {
4391 } // execute
4392 // --- Inst_DS__DS_INC_SRC2_U64 class methods ---
4393
4395 : Inst_DS(iFmt, "ds_inc_src2_u64")
4396 {
4397 } // Inst_DS__DS_INC_SRC2_U64
4398
4400 {
4401 } // ~Inst_DS__DS_INC_SRC2_U64
4402
4403 // --- description from .arch file ---
4404 // 64b:
4405 // A = ADDR_BASE;
4406 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4407 // --- {offset1[6],offset1[6:0],offset0});
4408 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
4409 void
4411 {
4413 } // execute
4414 // --- Inst_DS__DS_DEC_SRC2_U64 class methods ---
4415
4417 : Inst_DS(iFmt, "ds_dec_src2_u64")
4418 {
4419 } // Inst_DS__DS_DEC_SRC2_U64
4420
4422 {
4423 } // ~Inst_DS__DS_DEC_SRC2_U64
4424
4425 // --- description from .arch file ---
4426 // 64b:
4427 // A = ADDR_BASE;
4428 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4429 // --- {offset1[6],offset1[6:0],offset0});
4430 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
4431 // Uint decrement.
4432 void
4434 {
4436 } // execute
4437 // --- Inst_DS__DS_MIN_SRC2_I64 class methods ---
4438
4440 : Inst_DS(iFmt, "ds_min_src2_i64")
4441 {
4442 } // Inst_DS__DS_MIN_SRC2_I64
4443
4445 {
4446 } // ~Inst_DS__DS_MIN_SRC2_I64
4447
4448 // --- description from .arch file ---
4449 // 64b:
4450 // A = ADDR_BASE;
4451 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4452 // --- {offset1[6],offset1[6:0],offset0});
4453 // MEM[A] = min(MEM[A], MEM[B]).
4454 void
4456 {
4458 } // execute
4459 // --- Inst_DS__DS_MAX_SRC2_I64 class methods ---
4460
4462 : Inst_DS(iFmt, "ds_max_src2_i64")
4463 {
4464 } // Inst_DS__DS_MAX_SRC2_I64
4465
4467 {
4468 } // ~Inst_DS__DS_MAX_SRC2_I64
4469
4470 // --- description from .arch file ---
4471 // 64b:
4472 // A = ADDR_BASE;
4473 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4474 // --- {offset1[6],offset1[6:0],offset0});
4475 // MEM[A] = max(MEM[A], MEM[B]).
4476 void
4478 {
4480 } // execute
4481 // --- Inst_DS__DS_MIN_SRC2_U64 class methods ---
4482
4484 : Inst_DS(iFmt, "ds_min_src2_u64")
4485 {
4486 } // Inst_DS__DS_MIN_SRC2_U64
4487
4489 {
4490 } // ~Inst_DS__DS_MIN_SRC2_U64
4491
4492 // --- description from .arch file ---
4493 // 64b:
4494 // A = ADDR_BASE;
4495 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4496 // --- {offset1[6],offset1[6:0],offset0});
4497 // MEM[A] = min(MEM[A], MEM[B]).
4498 void
4500 {
4502 } // execute
4503 // --- Inst_DS__DS_MAX_SRC2_U64 class methods ---
4504
4506 : Inst_DS(iFmt, "ds_max_src2_u64")
4507 {
4508 } // Inst_DS__DS_MAX_SRC2_U64
4509
4511 {
4512 } // ~Inst_DS__DS_MAX_SRC2_U64
4513
4514 // --- description from .arch file ---
4515 // 64b:
4516 // A = ADDR_BASE;
4517 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4518 // --- {offset1[6],offset1[6:0],offset0});
4519 // MEM[A] = max(MEM[A], MEM[B]).
4520 void
4522 {
4524 } // execute
4525 // --- Inst_DS__DS_AND_SRC2_B64 class methods ---
4526
4528 : Inst_DS(iFmt, "ds_and_src2_b64")
4529 {
4530 } // Inst_DS__DS_AND_SRC2_B64
4531
4533 {
4534 } // ~Inst_DS__DS_AND_SRC2_B64
4535
4536 // --- description from .arch file ---
4537 // 64b:
4538 // A = ADDR_BASE;
4539 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4540 // --- {offset1[6],offset1[6:0],offset0});
4541 // MEM[A] = MEM[A] & MEM[B].
4542 void
4544 {
4546 } // execute
4547 // --- Inst_DS__DS_OR_SRC2_B64 class methods ---
4548
4550 : Inst_DS(iFmt, "ds_or_src2_b64")
4551 {
4552 } // Inst_DS__DS_OR_SRC2_B64
4553
4555 {
4556 } // ~Inst_DS__DS_OR_SRC2_B64
4557
4558 // --- description from .arch file ---
4559 // 64b:
4560 // A = ADDR_BASE;
4561 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4562 // --- {offset1[6],offset1[6:0],offset0});
4563 // MEM[A] = MEM[A] | MEM[B].
4564 void
4566 {
4568 } // execute
4569 // --- Inst_DS__DS_XOR_SRC2_B64 class methods ---
4570
4572 : Inst_DS(iFmt, "ds_xor_src2_b64")
4573 {
4574 } // Inst_DS__DS_XOR_SRC2_B64
4575
4577 {
4578 } // ~Inst_DS__DS_XOR_SRC2_B64
4579
4580 // --- description from .arch file ---
4581 // 64b:
4582 // A = ADDR_BASE;
4583 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4584 // --- {offset1[6],offset1[6:0],offset0});
4585 // MEM[A] = MEM[A] ^ MEM[B].
4586 void
4588 {
4590 } // execute
4591 // --- Inst_DS__DS_WRITE_SRC2_B64 class methods ---
4592
4594 : Inst_DS(iFmt, "ds_write_src2_b64")
4595 {
4596 setFlag(MemoryRef);
4597 setFlag(Store);
4598 } // Inst_DS__DS_WRITE_SRC2_B64
4599
4601 {
4602 } // ~Inst_DS__DS_WRITE_SRC2_B64
4603
4604 // --- description from .arch file ---
4605 // 64b:
4606 // A = ADDR_BASE;
4607 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4608 // --- {offset1[6],offset1[6:0],offset0});
4609 // MEM[A] = MEM[B].
4610 // Write qword.
4611 void
4613 {
4615 } // execute
4616 // --- Inst_DS__DS_MIN_SRC2_F64 class methods ---
4617
4619 : Inst_DS(iFmt, "ds_min_src2_f64")
4620 {
4621 setFlag(F64);
4622 } // Inst_DS__DS_MIN_SRC2_F64
4623
4625 {
4626 } // ~Inst_DS__DS_MIN_SRC2_F64
4627
4628 // --- description from .arch file ---
4629 // 64b:
4630 // A = ADDR_BASE;
4631 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4632 // --- {offset1[6],offset1[6:0],offset0});
4633 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
4634 // Float, handles NaN/INF/denorm.
4635 void
4637 {
4639 } // execute
4640 // --- Inst_DS__DS_MAX_SRC2_F64 class methods ---
4641
4643 : Inst_DS(iFmt, "ds_max_src2_f64")
4644 {
4645 setFlag(F64);
4646 } // Inst_DS__DS_MAX_SRC2_F64
4647
4649 {
4650 } // ~Inst_DS__DS_MAX_SRC2_F64
4651
4652 // --- description from .arch file ---
4653 // 64b:
4654 // A = ADDR_BASE;
4655 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4656 // --- {offset1[6],offset1[6:0],offset0});
4657 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
4658 // Float, handles NaN/INF/denorm.
4659 void
4661 {
4663 } // execute
4664 // --- Inst_DS__DS_WRITE_B96 class methods ---
4665
4667 : Inst_DS(iFmt, "ds_write_b96")
4668 {
4669 setFlag(MemoryRef);
4670 setFlag(Store);
4671 } // Inst_DS__DS_WRITE_B96
4672
4674 {
4675 } // ~Inst_DS__DS_WRITE_B96
4676
4677 // --- description from .arch file ---
4678 // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
4679 // Tri-dword write.
4680 void
4682 {
4683 Wavefront *wf = gpuDynInst->wavefront();
4684 gpuDynInst->execUnitId = wf->execUnitId;
4685 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4686 gpuDynInst->latency.set(
4687 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4688 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4689 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
4690 ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
4691 ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
4692
4693 addr.read();
4694 data0.read();
4695 data1.read();
4696 data2.read();
4697
4698 calcAddr(gpuDynInst, addr);
4699
4700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4701 if (gpuDynInst->exec_mask[lane]) {
4702 (reinterpret_cast<VecElemU32*>(
4703 gpuDynInst->d_data))[lane * 3] = data0[lane];
4704 (reinterpret_cast<VecElemU32*>(
4705 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
4706 (reinterpret_cast<VecElemU32*>(
4707 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
4708 }
4709 }
4710
4711 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4712 } // execute
4713
4714 void
4716 {
4717 Addr offset0 = instData.OFFSET0;
4718 Addr offset1 = instData.OFFSET1;
4719 Addr offset = (offset1 << 8) | offset0;
4720
4721 initMemWrite<3>(gpuDynInst, offset);
4722 } // initiateAcc
4723
4724 void
4726 {
4727 } // completeAcc
4728 // --- Inst_DS__DS_WRITE_B128 class methods ---
4729
4731 : Inst_DS(iFmt, "ds_write_b128")
4732 {
4733 setFlag(MemoryRef);
4734 setFlag(Store);
4735 } // Inst_DS__DS_WRITE_B128
4736
4738 {
4739 } // ~Inst_DS__DS_WRITE_B128
4740
4741 // --- description from .arch file ---
4742 // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
4743 // Qword write.
4744 void
4746 {
4747 Wavefront *wf = gpuDynInst->wavefront();
4748 gpuDynInst->execUnitId = wf->execUnitId;
4749 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4750 gpuDynInst->latency.set(
4751 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4752 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4753 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
4754 ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
4755 ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
4756 ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3);
4757
4758 addr.read();
4759 data0.read();
4760 data1.read();
4761 data2.read();
4762 data3.read();
4763
4764 calcAddr(gpuDynInst, addr);
4765
4766 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4767 if (gpuDynInst->exec_mask[lane]) {
4768 (reinterpret_cast<VecElemU32*>(
4769 gpuDynInst->d_data))[lane * 4] = data0[lane];
4770 (reinterpret_cast<VecElemU32*>(
4771 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
4772 (reinterpret_cast<VecElemU32*>(
4773 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
4774 (reinterpret_cast<VecElemU32*>(
4775 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
4776 }
4777 }
4778
4779 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4780 } // execute
4781
4782 void
4784 {
4785 Addr offset0 = instData.OFFSET0;
4786 Addr offset1 = instData.OFFSET1;
4787 Addr offset = (offset1 << 8) | offset0;
4788
4789 initMemWrite<4>(gpuDynInst, offset);
4790 } // initiateAcc
4791
4792 void
4794 {
4795 } // completeAcc
4796 // --- Inst_DS__DS_READ_B96 class methods ---
4797
4799 : Inst_DS(iFmt, "ds_read_b96")
4800 {
4801 setFlag(MemoryRef);
4802 setFlag(Load);
4803 } // Inst_DS__DS_READ_B96
4804
4806 {
4807 } // ~Inst_DS__DS_READ_B96
4808
4809 // --- description from .arch file ---
4810 // Tri-dword read.
4811 void
4813 {
4814 Wavefront *wf = gpuDynInst->wavefront();
4815 gpuDynInst->execUnitId = wf->execUnitId;
4816 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4817 gpuDynInst->latency.set(
4818 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4819 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4820
4821 addr.read();
4822
4823 calcAddr(gpuDynInst, addr);
4824
4825 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4826 } // execute
4827
4828 void
4830 {
4831 Addr offset0 = instData.OFFSET0;
4832 Addr offset1 = instData.OFFSET1;
4833 Addr offset = (offset1 << 8) | offset0;
4834
4835 initMemRead<3>(gpuDynInst, offset);
4836 }
4837
4838 void
4840 {
4841 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
4842 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
4843 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
4844
4845 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4846 if (gpuDynInst->exec_mask[lane]) {
4847 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
4848 gpuDynInst->d_data))[lane * 3];
4849 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
4850 gpuDynInst->d_data))[lane * 3 + 1];
4851 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
4852 gpuDynInst->d_data))[lane * 3 + 2];
4853 }
4854 }
4855
4856 vdst0.write();
4857 vdst1.write();
4858 vdst2.write();
4859 }
4860 // --- Inst_DS__DS_READ_B128 class methods ---
4861
4863 : Inst_DS(iFmt, "ds_read_b128")
4864 {
4865 setFlag(MemoryRef);
4866 setFlag(Load);
4867 } // Inst_DS__DS_READ_B128
4868
4870 {
4871 } // ~Inst_DS__DS_READ_B128
4872
4873 // --- description from .arch file ---
4874 // Qword read.
4875 void
4877 {
4878 Wavefront *wf = gpuDynInst->wavefront();
4879 gpuDynInst->execUnitId = wf->execUnitId;
4880 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4881 gpuDynInst->latency.set(
4882 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4883 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4884
4885 addr.read();
4886
4887 calcAddr(gpuDynInst, addr);
4888
4889 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4890 } // execute
4891
4892 void
4894 {
4895 Addr offset0 = instData.OFFSET0;
4896 Addr offset1 = instData.OFFSET1;
4897 Addr offset = (offset1 << 8) | offset0;
4898
4899 initMemRead<4>(gpuDynInst, offset);
4900 } // initiateAcc
4901
4902 void
4904 {
4905 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
4906 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
4907 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
4908 VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
4909
4910 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4911 if (gpuDynInst->exec_mask[lane]) {
4912 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
4913 gpuDynInst->d_data))[lane * 4];
4914 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
4915 gpuDynInst->d_data))[lane * 4 + 1];
4916 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
4917 gpuDynInst->d_data))[lane * 4 + 2];
4918 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
4919 gpuDynInst->d_data))[lane * 4 + 3];
4920 }
4921 }
4922
4923 vdst0.write();
4924 vdst1.write();
4925 vdst2.write();
4926 vdst3.write();
4927 } // completeAcc
4928} // namespace VegaISA
4929} // namespace gem5
const char data[]
std::vector< VectorRegisterFile * > vrf
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
void setFlag(Flags flag)
Nop class.
Definition nop.hh:49
void execute(GPUDynInstPtr) override
Definition ds.cc:749
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:782
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:792
void execute(GPUDynInstPtr) override
Definition ds.cc:1576
void execute(GPUDynInstPtr) override
Definition ds.cc:1004
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1036
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1046
void execute(GPUDynInstPtr) override
Definition ds.cc:3082
void execute(GPUDynInstPtr) override
Definition ds.cc:4081
void execute(GPUDynInstPtr) override
Definition ds.cc:3741
void execute(GPUDynInstPtr) override
Definition ds.cc:4344
Inst_DS__DS_ADD_U32(InFmt_DS *)
Definition ds.cc:41
void execute(GPUDynInstPtr) override
Definition ds.cc:58
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:101
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:91
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:2502
void execute(GPUDynInstPtr) override
Definition ds.cc:2459
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:2492
void execute(GPUDynInstPtr) override
Definition ds.cc:290
void execute(GPUDynInstPtr) override
Definition ds.cc:2692
void execute(GPUDynInstPtr) override
Definition ds.cc:1245
void execute(GPUDynInstPtr) override
Definition ds.cc:3273
void execute(GPUDynInstPtr) override
Definition ds.cc:3940
void execute(GPUDynInstPtr) override
Definition ds.cc:4543
void execute(GPUDynInstPtr) override
Definition ds.cc:4302
void execute(GPUDynInstPtr) override
Definition ds.cc:2371
void execute(GPUDynInstPtr) override
Definition ds.cc:625
void execute(GPUDynInstPtr) override
Definition ds.cc:2982
void execute(GPUDynInstPtr) override
Definition ds.cc:652
void execute(GPUDynInstPtr) override
Definition ds.cc:3009
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1445
void execute(GPUDynInstPtr) override
Definition ds.cc:1396
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1435
void execute(GPUDynInstPtr) override
Definition ds.cc:3421
void execute(GPUDynInstPtr) override
Definition ds.cc:1481
void execute(GPUDynInstPtr) override
Definition ds.cc:3448
void execute(GPUDynInstPtr) override
Definition ds.cc:3719
void execute(GPUDynInstPtr) override
Definition ds.cc:4283
void execute(GPUDynInstPtr) override
Definition ds.cc:1140
void execute(GPUDynInstPtr) override
Definition ds.cc:3168
void execute(GPUDynInstPtr) override
Definition ds.cc:3830
void execute(GPUDynInstPtr) override
Definition ds.cc:4433
void execute(GPUDynInstPtr) override
Definition ds.cc:185
void execute(GPUDynInstPtr) override
Definition ds.cc:2587
void execute(GPUDynInstPtr) override
Definition ds.cc:4263
void execute(GPUDynInstPtr) override
Definition ds.cc:4136
void execute(GPUDynInstPtr) override
Definition ds.cc:4192
void execute(GPUDynInstPtr) override
Definition ds.cc:4218
void execute(GPUDynInstPtr) override
Definition ds.cc:4109
void execute(GPUDynInstPtr) override
Definition ds.cc:4162
void execute(GPUDynInstPtr) override
Definition ds.cc:1119
void execute(GPUDynInstPtr) override
Definition ds.cc:3146
void execute(GPUDynInstPtr) override
Definition ds.cc:3807
void execute(GPUDynInstPtr) override
Definition ds.cc:4410
void execute(GPUDynInstPtr) override
Definition ds.cc:164
void execute(GPUDynInstPtr) override
Definition ds.cc:2565
void execute(GPUDynInstPtr) override
Definition ds.cc:704
void execute(GPUDynInstPtr) override
Definition ds.cc:3061
void execute(GPUDynInstPtr) override
Definition ds.cc:227
void execute(GPUDynInstPtr) override
Definition ds.cc:2629
void execute(GPUDynInstPtr) override
Definition ds.cc:1533
void execute(GPUDynInstPtr) override
Definition ds.cc:3500
void execute(GPUDynInstPtr) override
Definition ds.cc:1182
void execute(GPUDynInstPtr) override
Definition ds.cc:3210
void execute(GPUDynInstPtr) override
Definition ds.cc:1224
void execute(GPUDynInstPtr) override
Definition ds.cc:3252
void execute(GPUDynInstPtr) override
Definition ds.cc:4057
void execute(GPUDynInstPtr) override
Definition ds.cc:4660
void execute(GPUDynInstPtr) override
Definition ds.cc:3874
void execute(GPUDynInstPtr) override
Definition ds.cc:4477
void execute(GPUDynInstPtr) override
Definition ds.cc:3918
void execute(GPUDynInstPtr) override
Definition ds.cc:4521
void execute(GPUDynInstPtr) override
Definition ds.cc:269
void execute(GPUDynInstPtr) override
Definition ds.cc:2671
void execute(GPUDynInstPtr) override
Definition ds.cc:678
void execute(GPUDynInstPtr) override
Definition ds.cc:3035
void execute(GPUDynInstPtr) override
Definition ds.cc:206
void execute(GPUDynInstPtr) override
Definition ds.cc:2608
void execute(GPUDynInstPtr) override
Definition ds.cc:1507
void execute(GPUDynInstPtr) override
Definition ds.cc:3474
void execute(GPUDynInstPtr) override
Definition ds.cc:1161
void execute(GPUDynInstPtr) override
Definition ds.cc:3189
void execute(GPUDynInstPtr) override
Definition ds.cc:1203
void execute(GPUDynInstPtr) override
Definition ds.cc:3231
void execute(GPUDynInstPtr) override
Definition ds.cc:4033
void execute(GPUDynInstPtr) override
Definition ds.cc:4636
void execute(GPUDynInstPtr) override
Definition ds.cc:3852
void execute(GPUDynInstPtr) override
Definition ds.cc:4455
void execute(GPUDynInstPtr) override
Definition ds.cc:3896
void execute(GPUDynInstPtr) override
Definition ds.cc:4499
void execute(GPUDynInstPtr) override
Definition ds.cc:248
void execute(GPUDynInstPtr) override
Definition ds.cc:2650
void execute(GPUDynInstPtr) override
Definition ds.cc:399
void execute(GPUDynInstPtr) override
Definition ds.cc:2756
void execute(GPUDynInstPtr) override
Definition ds.cc:1309
void execute(GPUDynInstPtr) override
Definition ds.cc:3337
Inst_DS__DS_NOP(InFmt_DS *)
Definition ds.cc:710
void execute(GPUDynInstPtr) override
Definition ds.cc:723
void execute(GPUDynInstPtr) override
Definition ds.cc:4322
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:356
void execute(GPUDynInstPtr) override
Definition ds.cc:313
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:346
void execute(GPUDynInstPtr) override
Definition ds.cc:2713
void execute(GPUDynInstPtr) override
Definition ds.cc:1266
void execute(GPUDynInstPtr) override
Definition ds.cc:3294
void execute(GPUDynInstPtr) override
Definition ds.cc:3962
void execute(GPUDynInstPtr) override
Definition ds.cc:4565
void execute(GPUDynInstPtr) override
Definition ds.cc:2282
void execute(GPUDynInstPtr) override
Definition ds.cc:1730
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1763
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1754
void execute(GPUDynInstPtr) override
Definition ds.cc:3654
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3678
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3687
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1695
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1686
void execute(GPUDynInstPtr) override
Definition ds.cc:1662
void execute(GPUDynInstPtr) override
Definition ds.cc:3586
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3610
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3619
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4903
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4893
void execute(GPUDynInstPtr) override
Definition ds.cc:4876
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1631
void execute(GPUDynInstPtr) override
Definition ds.cc:1597
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1621
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3555
void execute(GPUDynInstPtr) override
Definition ds.cc:3521
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3545
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4829
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4839
void execute(GPUDynInstPtr) override
Definition ds.cc:4812
void execute(GPUDynInstPtr) override
Definition ds.cc:1925
void execute(GPUDynInstPtr) override
Definition ds.cc:1797
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1831
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1821
void execute(GPUDynInstPtr) override
Definition ds.cc:2075
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2098
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2108
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2043
void execute(GPUDynInstPtr) override
Definition ds.cc:2010
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2033
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1979
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1969
void execute(GPUDynInstPtr) override
Definition ds.cc:1946
void execute(GPUDynInstPtr) override
Definition ds.cc:1861
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1895
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1885
void execute(GPUDynInstPtr) override
Definition ds.cc:1098
void execute(GPUDynInstPtr) override
Definition ds.cc:3125
void execute(GPUDynInstPtr) override
Definition ds.cc:3785
void execute(GPUDynInstPtr) override
Definition ds.cc:4388
void execute(GPUDynInstPtr) override
Definition ds.cc:143
void execute(GPUDynInstPtr) override
Definition ds.cc:2544
void execute(GPUDynInstPtr) override
Definition ds.cc:1076
void execute(GPUDynInstPtr) override
Definition ds.cc:3103
void execute(GPUDynInstPtr) override
Definition ds.cc:3763
void execute(GPUDynInstPtr) override
Definition ds.cc:4366
void execute(GPUDynInstPtr) override
Definition ds.cc:121
void execute(GPUDynInstPtr) override
Definition ds.cc:2522
void execute(GPUDynInstPtr) override
Definition ds.cc:2145
void execute(GPUDynInstPtr) override
Definition ds.cc:1553
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:591
void execute(GPUDynInstPtr) override
Definition ds.cc:554
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:600
void execute(GPUDynInstPtr) override
Definition ds.cc:2911
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2957
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2948
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:523
void execute(GPUDynInstPtr) override
Definition ds.cc:486
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:532
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2880
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2889
void execute(GPUDynInstPtr) override
Definition ds.cc:2843
void execute(GPUDynInstPtr) override
Definition ds.cc:4745
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4783
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4793
void execute(GPUDynInstPtr) override
Definition ds.cc:938
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:981
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:971
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:454
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:464
void execute(GPUDynInstPtr) override
Definition ds.cc:421
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2811
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2821
void execute(GPUDynInstPtr) override
Definition ds.cc:2778
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:918
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:908
void execute(GPUDynInstPtr) override
Definition ds.cc:875
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:845
void execute(GPUDynInstPtr) override
Definition ds.cc:812
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:855
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4715
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4725
void execute(GPUDynInstPtr) override
Definition ds.cc:4681
void execute(GPUDynInstPtr) override
Definition ds.cc:4009
void execute(GPUDynInstPtr) override
Definition ds.cc:4612
void execute(GPUDynInstPtr) override
Definition ds.cc:1367
void execute(GPUDynInstPtr) override
Definition ds.cc:3395
void execute(GPUDynInstPtr) override
Definition ds.cc:1348
void execute(GPUDynInstPtr) override
Definition ds.cc:3376
void execute(GPUDynInstPtr) override
Definition ds.cc:1330
void execute(GPUDynInstPtr) override
Definition ds.cc:3358
void execute(GPUDynInstPtr) override
Definition ds.cc:377
void execute(GPUDynInstPtr) override
Definition ds.cc:2734
void execute(GPUDynInstPtr) override
Definition ds.cc:1287
void execute(GPUDynInstPtr) override
Definition ds.cc:3315
void execute(GPUDynInstPtr) override
Definition ds.cc:3984
void execute(GPUDynInstPtr) override
Definition ds.cc:4587
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
Inst_DS(InFmt_DS *, const std::string &opcode)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void read() override
read from the vrf.
Definition operand.hh:148
void write() override
write to the vrf.
Definition operand.hh:203
const int simdId
Definition wavefront.hh:102
ComputeUnit * computeUnit
Definition wavefront.hh:109
void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
void decLGKMInstsIssued()
VectorMask & execMask()
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:129
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 30, 0 > index
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
VecOperand< VecElemF32, true > ConstVecOperandF32
Definition operand.hh:846
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
VecOperand< VecElemU8, true, 1 > ConstVecOperandU8
Definition operand.hh:840
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
uint16_t VecElemU16
uint32_t VecElemU32
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
Definition operand.hh:842
const int NumVecElemPerVecReg(64)
uint64_t VecElemU64
VecOperand< VecElemU64, false > VecOperandU64
Definition operand.hh:832
VecOperand< VecElemU64, true > ConstVecOperandU64
Definition operand.hh:847
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

Generated on Mon May 26 2025 09:18:31 for gem5 by doxygen 1.13.2