gem5 v24.0.0.0
Loading...
Searching...
No Matches
ds.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_DS__DS_ADD_U32 class methods ---
40
42 : Inst_DS(iFmt, "ds_add_u32")
43 {
44 setFlag(MemoryRef);
45 setFlag(GroupSegment);
46 setFlag(AtomicAdd);
47 setFlag(AtomicNoReturn);
48 } // Inst_DS__DS_ADD_U32
49
51 {
52 } // ~Inst_DS__DS_ADD_U32
53
54 // --- description from .arch file ---
55 // 32b:
56 // MEM[ADDR] += DATA;
57 void
59 {
60 Wavefront *wf = gpuDynInst->wavefront();
61
62 if (gpuDynInst->exec_mask.none()) {
64 return;
65 }
66
67 gpuDynInst->execUnitId = wf->execUnitId;
68 gpuDynInst->latency.init(gpuDynInst->computeUnit());
69 gpuDynInst->latency.set(
70 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
73
74 addr.read();
75 data.read();
76
77 calcAddr(gpuDynInst, addr);
78
79 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
80 if (gpuDynInst->exec_mask[lane]) {
81 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
82 = data[lane];
83 }
84 }
85
86 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
87 } // execute
88
89 void
91 {
92 Addr offset0 = instData.OFFSET0;
93 Addr offset1 = instData.OFFSET1;
94 Addr offset = (offset1 << 8) | offset0;
95
97 } // initiateAcc
98
99 void
101 {
102 } // completeAcc
103 // --- Inst_DS__DS_SUB_U32 class methods ---
104
106 : Inst_DS(iFmt, "ds_sub_u32")
107 {
108 } // Inst_DS__DS_SUB_U32
109
111 {
112 } // ~Inst_DS__DS_SUB_U32
113
114 // --- description from .arch file ---
115 // 32b:
116 // tmp = MEM[ADDR];
117 // MEM[ADDR] -= DATA;
118 // RETURN_DATA = tmp.
119 void
121 {
123 } // execute
124 // --- Inst_DS__DS_RSUB_U32 class methods ---
125
127 : Inst_DS(iFmt, "ds_rsub_u32")
128 {
129 } // Inst_DS__DS_RSUB_U32
130
132 {
133 } // ~Inst_DS__DS_RSUB_U32
134
135 // --- description from .arch file ---
136 // 32b:
137 // tmp = MEM[ADDR];
138 // MEM[ADDR] = DATA - MEM[ADDR];
139 // RETURN_DATA = tmp.
140 // Subtraction with reversed operands.
141 void
143 {
145 } // execute
146 // --- Inst_DS__DS_INC_U32 class methods ---
147
149 : Inst_DS(iFmt, "ds_inc_u32")
150 {
151 } // Inst_DS__DS_INC_U32
152
154 {
155 } // ~Inst_DS__DS_INC_U32
156
157 // --- description from .arch file ---
158 // 32b:
159 // tmp = MEM[ADDR];
160 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
161 // RETURN_DATA = tmp.
162 void
164 {
166 } // execute
167 // --- Inst_DS__DS_DEC_U32 class methods ---
168
170 : Inst_DS(iFmt, "ds_dec_u32")
171 {
172 } // Inst_DS__DS_DEC_U32
173
175 {
176 } // ~Inst_DS__DS_DEC_U32
177
178 // --- description from .arch file ---
179 // 32b:
180 // tmp = MEM[ADDR];
181 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
182 // (unsigned compare); RETURN_DATA = tmp.
183 void
185 {
187 } // execute
188 // --- Inst_DS__DS_MIN_I32 class methods ---
189
191 : Inst_DS(iFmt, "ds_min_i32")
192 {
193 } // Inst_DS__DS_MIN_I32
194
196 {
197 } // ~Inst_DS__DS_MIN_I32
198
199 // --- description from .arch file ---
200 // 32b:
201 // tmp = MEM[ADDR];
202 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
203 // RETURN_DATA = tmp.
204 void
206 {
208 } // execute
209 // --- Inst_DS__DS_MAX_I32 class methods ---
210
212 : Inst_DS(iFmt, "ds_max_i32")
213 {
214 } // Inst_DS__DS_MAX_I32
215
217 {
218 } // ~Inst_DS__DS_MAX_I32
219
220 // --- description from .arch file ---
221 // 32b:
222 // tmp = MEM[ADDR];
223 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
224 // RETURN_DATA = tmp.
225 void
227 {
229 } // execute
230 // --- Inst_DS__DS_MIN_U32 class methods ---
231
233 : Inst_DS(iFmt, "ds_min_u32")
234 {
235 } // Inst_DS__DS_MIN_U32
236
238 {
239 } // ~Inst_DS__DS_MIN_U32
240
241 // --- description from .arch file ---
242 // 32b:
243 // tmp = MEM[ADDR];
244 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
245 // RETURN_DATA = tmp.
246 void
248 {
250 } // execute
251 // --- Inst_DS__DS_MAX_U32 class methods ---
252
254 : Inst_DS(iFmt, "ds_max_u32")
255 {
256 } // Inst_DS__DS_MAX_U32
257
259 {
260 } // ~Inst_DS__DS_MAX_U32
261
262 // --- description from .arch file ---
263 // 32b:
264 // tmp = MEM[ADDR];
265 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
266 // RETURN_DATA = tmp.
267 void
269 {
271 } // execute
272 // --- Inst_DS__DS_AND_B32 class methods ---
273
275 : Inst_DS(iFmt, "ds_and_b32")
276 {
277 } // Inst_DS__DS_AND_B32
278
280 {
281 } // ~Inst_DS__DS_AND_B32
282
283 // --- description from .arch file ---
284 // 32b:
285 // tmp = MEM[ADDR];
286 // MEM[ADDR] &= DATA;
287 // RETURN_DATA = tmp.
288 void
290 {
292 } // execute
293 // --- Inst_DS__DS_OR_B32 class methods ---
294
296 : Inst_DS(iFmt, "ds_or_b32")
297 {
298 setFlag(MemoryRef);
299 setFlag(GroupSegment);
300 setFlag(AtomicOr);
301 setFlag(AtomicNoReturn);
302 } // Inst_DS__DS_OR_B32
303
305 {
306 } // ~Inst_DS__DS_OR_B32
307
308 // --- description from .arch file ---
309 // 32b:
310 // MEM[ADDR] |= DATA;
311 void
313 {
314 Wavefront *wf = gpuDynInst->wavefront();
315
316 if (gpuDynInst->exec_mask.none()) {
317 wf->decLGKMInstsIssued();
318 return;
319 }
320
321 gpuDynInst->execUnitId = wf->execUnitId;
322 gpuDynInst->latency.init(gpuDynInst->computeUnit());
323 gpuDynInst->latency.set(
324 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
325 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
327
328 addr.read();
329 data.read();
330
331 calcAddr(gpuDynInst, addr);
332
333 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
334 if (gpuDynInst->exec_mask[lane]) {
335 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
336 = data[lane];
337 }
338 }
339
340 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
341 } // execute
342
343 void
345 {
346 Addr offset0 = instData.OFFSET0;
347 Addr offset1 = instData.OFFSET1;
348 Addr offset = (offset1 << 8) | offset0;
349
351 } // initiateAcc
352
353 void
355 {
356 } // completeAcc
357
358 // --- Inst_DS__DS_XOR_B32 class methods ---
359
361 : Inst_DS(iFmt, "ds_xor_b32")
362 {
363 } // Inst_DS__DS_XOR_B32
364
366 {
367 } // ~Inst_DS__DS_XOR_B32
368
369 // --- description from .arch file ---
370 // 32b:
371 // tmp = MEM[ADDR];
372 // MEM[ADDR] ^= DATA;
373 // RETURN_DATA = tmp.
374 void
376 {
378 } // execute
379 // --- Inst_DS__DS_MSKOR_B32 class methods ---
380
382 : Inst_DS(iFmt, "ds_mskor_b32")
383 {
384 } // Inst_DS__DS_MSKOR_B32
385
387 {
388 } // ~Inst_DS__DS_MSKOR_B32
389
390 // --- description from .arch file ---
391 // 32b:
392 // tmp = MEM[ADDR];
393 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
394 // RETURN_DATA = tmp.
395 // Masked dword OR, D0 contains the mask and D1 contains the new value.
396 void
398 {
400 } // execute
401 // --- Inst_DS__DS_WRITE_B32 class methods ---
402
404 : Inst_DS(iFmt, "ds_write_b32")
405 {
406 setFlag(MemoryRef);
407 setFlag(Store);
408 } // Inst_DS__DS_WRITE_B32
409
411 {
412 } // ~Inst_DS__DS_WRITE_B32
413
414 // --- description from .arch file ---
415 // 32b:
416 // MEM[ADDR] = DATA.
417 // Write dword.
418 void
420 {
421 Wavefront *wf = gpuDynInst->wavefront();
422
423 if (gpuDynInst->exec_mask.none()) {
424 wf->decLGKMInstsIssued();
425 return;
426 }
427
428 gpuDynInst->execUnitId = wf->execUnitId;
429 gpuDynInst->latency.init(gpuDynInst->computeUnit());
430 gpuDynInst->latency.set(
431 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
432 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
434
435 addr.read();
436 data.read();
437
438 calcAddr(gpuDynInst, addr);
439
440 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
441 if (gpuDynInst->exec_mask[lane]) {
442 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
443 = data[lane];
444 }
445 }
446
447 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
448 } // execute
449
450 void
452 {
453 Addr offset0 = instData.OFFSET0;
454 Addr offset1 = instData.OFFSET1;
455 Addr offset = (offset1 << 8) | offset0;
456
457 initMemWrite<VecElemU32>(gpuDynInst, offset);
458 } // initiateAcc
459
460 void
462 {
463 } // completeAcc
464 // --- Inst_DS__DS_WRITE2_B32 class methods ---
465
467 : Inst_DS(iFmt, "ds_write2_b32")
468 {
469 setFlag(MemoryRef);
470 setFlag(Store);
471 } // Inst_DS__DS_WRITE2_B32
472
474 {
475 } // ~Inst_DS__DS_WRITE2_B32
476
477 // --- description from .arch file ---
478 // 32b:
479 // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
480 // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
481 // Write 2 dwords.
482 void
484 {
485 Wavefront *wf = gpuDynInst->wavefront();
486
487 if (gpuDynInst->exec_mask.none()) {
488 wf->decLGKMInstsIssued();
489 return;
490 }
491
492 gpuDynInst->execUnitId = wf->execUnitId;
493 gpuDynInst->latency.init(gpuDynInst->computeUnit());
494 gpuDynInst->latency.set(
495 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
496 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
497 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
498 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
499
500 addr.read();
501 data0.read();
502 data1.read();
503
504 calcAddr(gpuDynInst, addr);
505
506 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
507 if (gpuDynInst->exec_mask[lane]) {
508 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
509 = data0[lane];
510 (reinterpret_cast<VecElemU32*>(
511 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
512 }
513 }
514
515 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
516 } // execute
517
518 void
520 {
521 Addr offset0 = instData.OFFSET0 * 4;
522 Addr offset1 = instData.OFFSET1 * 4;
523
524 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
525 }
526
527 void
531 // --- Inst_DS__DS_WRITE2ST64_B32 class methods ---
532
534 : Inst_DS(iFmt, "ds_write2st64_b32")
535 {
536 setFlag(MemoryRef);
537 setFlag(Store);
538 } // Inst_DS__DS_WRITE2ST64_B32
539
541 {
542 } // ~Inst_DS__DS_WRITE2ST64_B32
543
544 // --- description from .arch file ---
545 // 32b:
546 // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
547 // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
548 // Write 2 dwords.
549 void
551 {
552 Wavefront *wf = gpuDynInst->wavefront();
553
554 if (gpuDynInst->exec_mask.none()) {
555 wf->decLGKMInstsIssued();
556 return;
557 }
558
559 gpuDynInst->execUnitId = wf->execUnitId;
560 gpuDynInst->latency.init(gpuDynInst->computeUnit());
561 gpuDynInst->latency.set(
562 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
563 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
564 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
565 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
566
567 addr.read();
568 data0.read();
569 data1.read();
570
571 calcAddr(gpuDynInst, addr);
572
573 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
574 if (gpuDynInst->exec_mask[lane]) {
575 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
576 = data0[lane];
577 (reinterpret_cast<VecElemU32*>(
578 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
579 }
580 }
581
582 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
583 } // execute
584
585 void
587 {
588 Addr offset0 = instData.OFFSET0 * 4 * 64;
589 Addr offset1 = instData.OFFSET1 * 4 * 64;
590
591 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
592 }
593
594 void
598 // --- Inst_DS__DS_CMPST_B32 class methods ---
599
601 : Inst_DS(iFmt, "ds_cmpst_b32")
602 {
603 } // Inst_DS__DS_CMPST_B32
604
606 {
607 } // ~Inst_DS__DS_CMPST_B32
608
609 // --- description from .arch file ---
610 // 32b:
611 // tmp = MEM[ADDR];
612 // src = DATA2;
613 // cmp = DATA;
614 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
615 // RETURN_DATA[0] = tmp.
616 // Compare and store.
617 // Caution, the order of src and cmp are the *opposite* of the
618 // --- BUFFER_ATOMIC_CMPSWAP opcode.
619 void
621 {
623 } // execute
624 // --- Inst_DS__DS_CMPST_F32 class methods ---
625
627 : Inst_DS(iFmt, "ds_cmpst_f32")
628 {
629 setFlag(F32);
630 } // Inst_DS__DS_CMPST_F32
631
633 {
634 } // ~Inst_DS__DS_CMPST_F32
635
636 // --- description from .arch file ---
637 // 32b:
638 // tmp = MEM[ADDR];
639 // src = DATA2;
640 // cmp = DATA;
641 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
642 // RETURN_DATA[0] = tmp.
643 // Floating point compare and store that handles NaN/INF/denormal values.
644 // Caution, the order of src and cmp are the *opposite* of the
645 // --- BUFFER_ATOMIC_FCMPSWAP opcode.
646 void
648 {
650 } // execute
651 // --- Inst_DS__DS_MIN_F32 class methods ---
652
654 : Inst_DS(iFmt, "ds_min_f32")
655 {
656 setFlag(F32);
657 } // Inst_DS__DS_MIN_F32
658
660 {
661 } // ~Inst_DS__DS_MIN_F32
662
663 // --- description from .arch file ---
664 // 32b.
665 // tmp = MEM[ADDR];
666 // src = DATA;
667 // cmp = DATA2;
668 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
669 // Floating point minimum that handles NaN/INF/denormal values.
670 // Note that this opcode is slightly more general-purpose than
671 // --- BUFFER_ATOMIC_FMIN.
672 void
674 {
676 } // execute
677 // --- Inst_DS__DS_MAX_F32 class methods ---
678
680 : Inst_DS(iFmt, "ds_max_f32")
681 {
682 setFlag(F32);
683 } // Inst_DS__DS_MAX_F32
684
686 {
687 } // ~Inst_DS__DS_MAX_F32
688
689 // --- description from .arch file ---
690 // 32b.
691 // tmp = MEM[ADDR];
692 // src = DATA;
693 // cmp = DATA2;
694 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
695 // Floating point maximum that handles NaN/INF/denormal values.
696 // Note that this opcode is slightly more general-purpose than
697 // --- BUFFER_ATOMIC_FMAX.
698 void
700 {
702 } // execute
703 // --- Inst_DS__DS_NOP class methods ---
704
706 : Inst_DS(iFmt, "ds_nop")
707 {
708 setFlag(Nop);
709 } // Inst_DS__DS_NOP
710
712 {
713 } // ~Inst_DS__DS_NOP
714
715 // --- description from .arch file ---
716 // Do nothing.
717 void
719 {
720 gpuDynInst->wavefront()->decLGKMInstsIssued();
721 } // execute
722 // --- Inst_DS__DS_ADD_F32 class methods ---
723
725 : Inst_DS(iFmt, "ds_add_f32")
726 {
727 setFlag(F32);
728 setFlag(MemoryRef);
729 setFlag(GroupSegment);
730 setFlag(AtomicAdd);
731 setFlag(AtomicNoReturn);
732 } // Inst_DS__DS_ADD_F32
733
735 {
736 } // ~Inst_DS__DS_ADD_F32
737
738 // --- description from .arch file ---
739 // 32b:
740 // MEM[ADDR] += DATA;
741 // Floating point add that handles NaN/INF/denormal values.
742 void
744 {
745 Wavefront *wf = gpuDynInst->wavefront();
746
747 if (gpuDynInst->exec_mask.none()) {
748 wf->decLGKMInstsIssued();
749 return;
750 }
751
752 gpuDynInst->execUnitId = wf->execUnitId;
753 gpuDynInst->latency.init(gpuDynInst->computeUnit());
754 gpuDynInst->latency.set(
755 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
756 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
758
759 addr.read();
760 data.read();
761
762 calcAddr(gpuDynInst, addr);
763
764 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
765 if (gpuDynInst->exec_mask[lane]) {
766 (reinterpret_cast<VecElemF32*>(gpuDynInst->a_data))[lane]
767 = data[lane];
768 }
769 }
770
771 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
772 } // execute
773
774 void
776 {
777 Addr offset0 = instData.OFFSET0;
778 Addr offset1 = instData.OFFSET1;
779 Addr offset = (offset1 << 8) | offset0;
780
782 } // initiateAcc
783
784 void
786 {
787 } // completeAcc
788 // --- Inst_DS__DS_WRITE_B8 class methods ---
789
791 : Inst_DS(iFmt, "ds_write_b8")
792 {
793 setFlag(MemoryRef);
794 setFlag(Store);
795 } // Inst_DS__DS_WRITE_B8
796
798 {
799 } // ~Inst_DS__DS_WRITE_B8
800
801 // --- description from .arch file ---
802 // MEM[ADDR] = DATA[7:0].
803 // Byte write.
804 void
806 {
807 Wavefront *wf = gpuDynInst->wavefront();
808
809 if (gpuDynInst->exec_mask.none()) {
810 wf->decLGKMInstsIssued();
811 return;
812 }
813
814 gpuDynInst->execUnitId = wf->execUnitId;
815 gpuDynInst->latency.init(gpuDynInst->computeUnit());
816 gpuDynInst->latency.set(
817 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
818 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
819 ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
820
821 addr.read();
822 data.read();
823
824 calcAddr(gpuDynInst, addr);
825
826 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
827 if (gpuDynInst->exec_mask[lane]) {
828 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
829 = data[lane];
830 }
831 }
832
833 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
834 } // execute
835
836 void
838 {
839 Addr offset0 = instData.OFFSET0;
840 Addr offset1 = instData.OFFSET1;
841 Addr offset = (offset1 << 8) | offset0;
842
843 initMemWrite<VecElemU8>(gpuDynInst, offset);
844 } // initiateAcc
845
846 void
848 {
849 } // completeAcc
850 // --- Inst_DS__DS_WRITE_B8_D16_HI class methods ---
851
853 : Inst_DS(iFmt, "ds_write_b8_d16_hi")
854 {
855 setFlag(MemoryRef);
856 setFlag(Store);
857 } // Inst_DS__DS_WRITE_B8_D16_HI
858
860 {
861 } // ~Inst_DS__DS_WRITE_B8_D16_HI
862
863 // --- description from .arch file ---
864 // MEM[ADDR] = DATA[23:16].
865 // Byte write in to high word.
866 void
868 {
869 Wavefront *wf = gpuDynInst->wavefront();
870
871 if (gpuDynInst->exec_mask.none()) {
872 wf->decLGKMInstsIssued();
873 return;
874 }
875
876 gpuDynInst->execUnitId = wf->execUnitId;
877 gpuDynInst->latency.init(gpuDynInst->computeUnit());
878 gpuDynInst->latency.set(
879 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
880 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
881 ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
882
883 addr.read();
884 data.read();
885
886 calcAddr(gpuDynInst, addr);
887
888 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
889 if (gpuDynInst->exec_mask[lane]) {
890 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
891 = bits(data[lane], 23, 16);
892 }
893 }
894
895 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
896 } // execute
897
898 void
900 {
901 Addr offset0 = instData.OFFSET0;
902 Addr offset1 = instData.OFFSET1;
903 Addr offset = (offset1 << 8) | offset0;
904
905 initMemWrite<VecElemU8>(gpuDynInst, offset);
906 } // initiateAcc
907
908 void
910 {
911 } // completeAcc
912 // --- Inst_DS__DS_WRITE_B16 class methods ---
913
915 : Inst_DS(iFmt, "ds_write_b16")
916 {
917 setFlag(MemoryRef);
918 setFlag(Store);
919 } // Inst_DS__DS_WRITE_B16
920
922 {
923 } // ~Inst_DS__DS_WRITE_B16
924
925 // --- description from .arch file ---
926 // MEM[ADDR] = DATA[15:0]
927 // Short write.
928 void
930 {
931 Wavefront *wf = gpuDynInst->wavefront();
932
933 if (gpuDynInst->exec_mask.none()) {
934 wf->decLGKMInstsIssued();
935 return;
936 }
937
938 gpuDynInst->execUnitId = wf->execUnitId;
939 gpuDynInst->latency.init(gpuDynInst->computeUnit());
940 gpuDynInst->latency.set(
941 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
942 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
944
945 addr.read();
946 data.read();
947
948 calcAddr(gpuDynInst, addr);
949
950 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
951 if (gpuDynInst->exec_mask[lane]) {
952 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
953 = data[lane];
954 }
955 }
956
957 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
958 } // execute
959
960 void
962 {
963 Addr offset0 = instData.OFFSET0;
964 Addr offset1 = instData.OFFSET1;
965 Addr offset = (offset1 << 8) | offset0;
966
967 initMemWrite<VecElemU16>(gpuDynInst, offset);
968 } // initiateAcc
969
970 void
972 {
973 } // completeAcc
974 // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
975
977 : Inst_DS(iFmt, "ds_add_rtn_u32")
978 {
979 } // Inst_DS__DS_ADD_RTN_U32
980
982 {
983 } // ~Inst_DS__DS_ADD_RTN_U32
984
985 // --- description from .arch file ---
986 // 32b:
987 // tmp = MEM[ADDR];
988 // MEM[ADDR] += DATA;
989 // RETURN_DATA = tmp.
990 void
992 {
994 } // execute
995 // --- Inst_DS__DS_SUB_RTN_U32 class methods ---
996
998 : Inst_DS(iFmt, "ds_sub_rtn_u32")
999 {
1000 } // Inst_DS__DS_SUB_RTN_U32
1001
1003 {
1004 } // ~Inst_DS__DS_SUB_RTN_U32
1005
1006 // --- description from .arch file ---
1007 // 32b:
1008 // tmp = MEM[ADDR];
1009 // MEM[ADDR] -= DATA;
1010 // RETURN_DATA = tmp.
1011 void
1013 {
1015 } // execute
1016 // --- Inst_DS__DS_RSUB_RTN_U32 class methods ---
1017
1019 : Inst_DS(iFmt, "ds_rsub_rtn_u32")
1020 {
1021 } // Inst_DS__DS_RSUB_RTN_U32
1022
1024 {
1025 } // ~Inst_DS__DS_RSUB_RTN_U32
1026
1027 // --- description from .arch file ---
1028 // 32b:
1029 // tmp = MEM[ADDR];
1030 // MEM[ADDR] = DATA - MEM[ADDR];
1031 // RETURN_DATA = tmp.
1032 // Subtraction with reversed operands.
1033 void
1035 {
1037 } // execute
1038 // --- Inst_DS__DS_INC_RTN_U32 class methods ---
1039
1041 : Inst_DS(iFmt, "ds_inc_rtn_u32")
1042 {
1043 } // Inst_DS__DS_INC_RTN_U32
1044
1046 {
1047 } // ~Inst_DS__DS_INC_RTN_U32
1048
1049 // --- description from .arch file ---
1050 // 32b:
1051 // tmp = MEM[ADDR];
1052 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
1053 // RETURN_DATA = tmp.
1054 void
1056 {
1058 } // execute
1059 // --- Inst_DS__DS_DEC_RTN_U32 class methods ---
1060
1062 : Inst_DS(iFmt, "ds_dec_rtn_u32")
1063 {
1064 } // Inst_DS__DS_DEC_RTN_U32
1065
1067 {
1068 } // ~Inst_DS__DS_DEC_RTN_U32
1069
1070 // --- description from .arch file ---
1071 // 32b:
1072 // tmp = MEM[ADDR];
1073 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
1074 // (unsigned compare); RETURN_DATA = tmp.
1075 void
1077 {
1079 } // execute
1080 // --- Inst_DS__DS_MIN_RTN_I32 class methods ---
1081
1083 : Inst_DS(iFmt, "ds_min_rtn_i32")
1084 {
1085 } // Inst_DS__DS_MIN_RTN_I32
1086
1088 {
1089 } // ~Inst_DS__DS_MIN_RTN_I32
1090
1091 // --- description from .arch file ---
1092 // 32b:
1093 // tmp = MEM[ADDR];
1094 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
1095 // RETURN_DATA = tmp.
1096 void
1098 {
1100 } // execute
1101 // --- Inst_DS__DS_MAX_RTN_I32 class methods ---
1102
1104 : Inst_DS(iFmt, "ds_max_rtn_i32")
1105 {
1106 } // Inst_DS__DS_MAX_RTN_I32
1107
1109 {
1110 } // ~Inst_DS__DS_MAX_RTN_I32
1111
1112 // --- description from .arch file ---
1113 // 32b:
1114 // tmp = MEM[ADDR];
1115 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
1116 // RETURN_DATA = tmp.
1117 void
1119 {
1121 } // execute
1122 // --- Inst_DS__DS_MIN_RTN_U32 class methods ---
1123
1125 : Inst_DS(iFmt, "ds_min_rtn_u32")
1126 {
1127 } // Inst_DS__DS_MIN_RTN_U32
1128
1130 {
1131 } // ~Inst_DS__DS_MIN_RTN_U32
1132
1133 // --- description from .arch file ---
1134 // 32b:
1135 // tmp = MEM[ADDR];
1136 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
1137 // RETURN_DATA = tmp.
1138 void
1140 {
1142 } // execute
1143 // --- Inst_DS__DS_MAX_RTN_U32 class methods ---
1144
1146 : Inst_DS(iFmt, "ds_max_rtn_u32")
1147 {
1148 } // Inst_DS__DS_MAX_RTN_U32
1149
1151 {
1152 } // ~Inst_DS__DS_MAX_RTN_U32
1153
1154 // --- description from .arch file ---
1155 // 32b:
1156 // tmp = MEM[ADDR];
1157 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
1158 // RETURN_DATA = tmp.
1159 void
1161 {
1163 } // execute
1164 // --- Inst_DS__DS_AND_RTN_B32 class methods ---
1165
1167 : Inst_DS(iFmt, "ds_and_rtn_b32")
1168 {
1169 } // Inst_DS__DS_AND_RTN_B32
1170
1172 {
1173 } // ~Inst_DS__DS_AND_RTN_B32
1174
1175 // --- description from .arch file ---
1176 // 32b:
1177 // tmp = MEM[ADDR];
1178 // MEM[ADDR] &= DATA;
1179 // RETURN_DATA = tmp.
1180 void
1182 {
1184 } // execute
1185 // --- Inst_DS__DS_OR_RTN_B32 class methods ---
1186
1188 : Inst_DS(iFmt, "ds_or_rtn_b32")
1189 {
1190 } // Inst_DS__DS_OR_RTN_B32
1191
1193 {
1194 } // ~Inst_DS__DS_OR_RTN_B32
1195
1196 // --- description from .arch file ---
1197 // 32b:
1198 // tmp = MEM[ADDR];
1199 // MEM[ADDR] |= DATA;
1200 // RETURN_DATA = tmp.
1201 void
1203 {
1205 } // execute
1206 // --- Inst_DS__DS_XOR_RTN_B32 class methods ---
1207
1209 : Inst_DS(iFmt, "ds_xor_rtn_b32")
1210 {
1211 } // Inst_DS__DS_XOR_RTN_B32
1212
1214 {
1215 } // ~Inst_DS__DS_XOR_RTN_B32
1216
1217 // --- description from .arch file ---
1218 // 32b:
1219 // tmp = MEM[ADDR];
1220 // MEM[ADDR] ^= DATA;
1221 // RETURN_DATA = tmp.
1222 void
1224 {
1226 } // execute
1227 // --- Inst_DS__DS_MSKOR_RTN_B32 class methods ---
1228
1230 : Inst_DS(iFmt, "ds_mskor_rtn_b32")
1231 {
1232 } // Inst_DS__DS_MSKOR_RTN_B32
1233
1235 {
1236 } // ~Inst_DS__DS_MSKOR_RTN_B32
1237
1238 // --- description from .arch file ---
1239 // 32b:
1240 // tmp = MEM[ADDR];
1241 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
1242 // RETURN_DATA = tmp.
1243 // Masked dword OR, D0 contains the mask and D1 contains the new value.
1244 void
1246 {
1248 } // execute
1249 // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods ---
1250
1252 : Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
1253 {
1254 } // Inst_DS__DS_WRXCHG_RTN_B32
1255
1257 {
1258 } // ~Inst_DS__DS_WRXCHG_RTN_B32
1259
1260 // --- description from .arch file ---
1261 // tmp = MEM[ADDR];
1262 // MEM[ADDR] = DATA;
1263 // RETURN_DATA = tmp.
1264 // Write-exchange operation.
1265 void
1267 {
1269 } // execute
1270 // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods ---
1271
1273 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
1274 {
1275 } // Inst_DS__DS_WRXCHG2_RTN_B32
1276
1278 {
1279 } // ~Inst_DS__DS_WRXCHG2_RTN_B32
1280
1281 // --- description from .arch file ---
1282 // Write-exchange 2 separate dwords.
1283 void
1285 {
1287 } // execute
1288 // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods ---
1289
1291 InFmt_DS *iFmt)
1292 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
1293 {
1294 } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
1295
1297 {
1298 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
1299
1300 // --- description from .arch file ---
1301 // Write-exchange 2 separate dwords with a stride of 64 dwords.
1302 void
1307 // --- Inst_DS__DS_CMPST_RTN_B32 class methods ---
1308
1310 : Inst_DS(iFmt, "ds_cmpst_rtn_b32")
1311 {
1312 } // Inst_DS__DS_CMPST_RTN_B32
1313
1315 {
1316 } // ~Inst_DS__DS_CMPST_RTN_B32
1317
1318 // --- description from .arch file ---
1319 // 32b:
1320 // tmp = MEM[ADDR];
1321 // src = DATA2;
1322 // cmp = DATA;
1323 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1324 // RETURN_DATA[0] = tmp.
1325 // Compare and store.
1326 // Caution, the order of src and cmp are the *opposite* of the
1327 // --- BUFFER_ATOMIC_CMPSWAP opcode.
1328 void
1330 {
1332 } // execute
1333 // --- Inst_DS__DS_CMPST_RTN_F32 class methods ---
1334
1336 : Inst_DS(iFmt, "ds_cmpst_rtn_f32")
1337 {
1338 setFlag(F32);
1339 } // Inst_DS__DS_CMPST_RTN_F32
1340
1342 {
1343 } // ~Inst_DS__DS_CMPST_RTN_F32
1344
1345 // --- description from .arch file ---
1346 // 32b:
1347 // tmp = MEM[ADDR];
1348 // src = DATA2;
1349 // cmp = DATA;
1350 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
1351 // RETURN_DATA[0] = tmp.
1352 // Floating point compare and store that handles NaN/INF/denormal values.
1353 // Caution, the order of src and cmp are the *opposite* of the
1354 // --- BUFFER_ATOMIC_FCMPSWAP opcode.
1355 void
1357 {
1359 } // execute
1360 // --- Inst_DS__DS_MIN_RTN_F32 class methods ---
1361
1363 : Inst_DS(iFmt, "ds_min_rtn_f32")
1364 {
1365 setFlag(F32);
1366 } // Inst_DS__DS_MIN_RTN_F32
1367
1369 {
1370 } // ~Inst_DS__DS_MIN_RTN_F32
1371
1372 // --- description from .arch file ---
1373 // 32b.
1374 // tmp = MEM[ADDR];
1375 // src = DATA;
1376 // cmp = DATA2;
1377 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
1378 // Floating point minimum that handles NaN/INF/denormal values.
1379 // Note that this opcode is slightly more general-purpose than
1380 // --- BUFFER_ATOMIC_FMIN.
1381 void
1383 {
1385 } // execute
1386 // --- Inst_DS__DS_MAX_RTN_F32 class methods ---
1387
1389 : Inst_DS(iFmt, "ds_max_rtn_f32")
1390 {
1391 setFlag(F32);
1392 } // Inst_DS__DS_MAX_RTN_F32
1393
1395 {
1396 } // ~Inst_DS__DS_MAX_RTN_F32
1397
1398 // --- description from .arch file ---
1399 // 32b.
1400 // tmp = MEM[ADDR];
1401 // src = DATA;
1402 // cmp = DATA2;
1403 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
1404 // Floating point maximum that handles NaN/INF/denormal values.
1405 // Note that this opcode is slightly more general-purpose than
1406 // --- BUFFER_ATOMIC_FMAX.
1407 void
1409 {
1411 } // execute
1412 // --- Inst_DS__DS_WRAP_RTN_B32 class methods ---
1413
1415 : Inst_DS(iFmt, "ds_wrap_rtn_b32")
1416 {
1417 } // Inst_DS__DS_WRAP_RTN_B32
1418
1420 {
1421 } // ~Inst_DS__DS_WRAP_RTN_B32
1422
1423 // --- description from .arch file ---
1424 // tmp = MEM[ADDR];
1425 // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
1426 // RETURN_DATA = tmp.
1427 void
1429 {
1431 } // execute
1432 // --- Inst_DS__DS_ADD_RTN_F32 class methods ---
1433
1435 : Inst_DS(iFmt, "ds_add_rtn_f32")
1436 {
1437 setFlag(F32);
1438 } // Inst_DS__DS_ADD_RTN_F32
1439
1441 {
1442 } // ~Inst_DS__DS_ADD_RTN_F32
1443
1444 // --- description from .arch file ---
1445 // 32b:
1446 // tmp = MEM[ADDR];
1447 // MEM[ADDR] += DATA;
1448 // RETURN_DATA = tmp.
1449 // Floating point add that handles NaN/INF/denormal values.
1450 void
1452 {
1454 } // execute
1455 // --- Inst_DS__DS_READ_B32 class methods ---
1456
1458 : Inst_DS(iFmt, "ds_read_b32")
1459 {
1460 setFlag(MemoryRef);
1461 setFlag(Load);
1462 } // Inst_DS__DS_READ_B32
1463
1465 {
1466 } // ~Inst_DS__DS_READ_B32
1467
1468 // --- description from .arch file ---
1469 // RETURN_DATA = MEM[ADDR].
1470 // Dword read.
1471 void
1473 {
1474 Wavefront *wf = gpuDynInst->wavefront();
1475
1476 if (gpuDynInst->exec_mask.none()) {
1477 wf->decLGKMInstsIssued();
1478 return;
1479 }
1480
1481 gpuDynInst->execUnitId = wf->execUnitId;
1482 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1483 gpuDynInst->latency.set(
1484 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1485 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1486
1487 addr.read();
1488
1489 calcAddr(gpuDynInst, addr);
1490
1491 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1492 } // execute
1493
1494 void
1496 {
1497 Addr offset0 = instData.OFFSET0;
1498 Addr offset1 = instData.OFFSET1;
1499 Addr offset = (offset1 << 8) | offset0;
1500
1501 initMemRead<VecElemU32>(gpuDynInst, offset);
1502 } // initiateAcc
1503
1504 void
1506 {
1507 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1508
1509 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1510 if (gpuDynInst->exec_mask[lane]) {
1511 vdst[lane] = (reinterpret_cast<VecElemU32*>(
1512 gpuDynInst->d_data))[lane];
1513 }
1514 }
1515
1516 vdst.write();
1517 } // completeAcc
1518 // --- Inst_DS__DS_READ2_B32 class methods ---
1519
1521 : Inst_DS(iFmt, "ds_read2_b32")
1522 {
1523 setFlag(MemoryRef);
1524 setFlag(Load);
1525 } // Inst_DS__DS_READ2_B32
1526
1528 {
1529 } // ~Inst_DS__DS_READ2_B32
1530
1531 // --- description from .arch file ---
1532 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
1533 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
1534 // Read 2 dwords.
1535 void
1537 {
1538 Wavefront *wf = gpuDynInst->wavefront();
1539
1540 if (gpuDynInst->exec_mask.none()) {
1541 wf->decLGKMInstsIssued();
1542 return;
1543 }
1544
1545 gpuDynInst->execUnitId = wf->execUnitId;
1546 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1547 gpuDynInst->latency.set(
1548 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1549 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1550
1551 addr.read();
1552
1553 calcAddr(gpuDynInst, addr);
1554
1555 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1556 } // execute
1557
1558 void
1560 {
1561 Addr offset0 = instData.OFFSET0 * 4;
1562 Addr offset1 = instData.OFFSET1 * 4;
1563
1564 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
1565 } // initiateAcc
1566
1567 void
1569 {
1570 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
1571 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
1572
1573 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1574 if (gpuDynInst->exec_mask[lane]) {
1575 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1576 gpuDynInst->d_data))[lane * 2];
1577 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1578 gpuDynInst->d_data))[lane * 2 + 1];
1579 }
1580 }
1581
1582 vdst0.write();
1583 vdst1.write();
1584 } // completeAcc
1585 // --- Inst_DS__DS_READ2ST64_B32 class methods ---
1586
1588 : Inst_DS(iFmt, "ds_read2st64_b32")
1589 {
1590 setFlag(MemoryRef);
1591 setFlag(Load);
1592 } // Inst_DS__DS_READ2ST64_B32
1593
1595 {
1596 } // ~Inst_DS__DS_READ2ST64_B32
1597
1598 // --- description from .arch file ---
1599 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
1600 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
1601 // Read 2 dwords.
1602 void
1604 {
1605 Wavefront *wf = gpuDynInst->wavefront();
1606
1607 if (gpuDynInst->exec_mask.none()) {
1608 wf->decLGKMInstsIssued();
1609 return;
1610 }
1611
1612 gpuDynInst->execUnitId = wf->execUnitId;
1613 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1614 gpuDynInst->latency.set(
1615 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1616 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1617
1618 addr.read();
1619
1620 calcAddr(gpuDynInst, addr);
1621
1622 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1623 } // execute
1624
1625 void
1627 {
1628 Addr offset0 = (instData.OFFSET0 * 4 * 64);
1629 Addr offset1 = (instData.OFFSET1 * 4 * 64);
1630
1631 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
1632 }
1633
1634 void
1636 {
1637 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
1638 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
1639
1640 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1641 if (gpuDynInst->exec_mask[lane]) {
1642 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
1643 gpuDynInst->d_data))[lane * 2];
1644 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
1645 gpuDynInst->d_data))[lane * 2 + 1];
1646 }
1647 }
1648
1649 vdst0.write();
1650 vdst1.write();
1651 }
1652 // --- Inst_DS__DS_READ_I8 class methods ---
1653
1655 : Inst_DS(iFmt, "ds_read_i8")
1656 {
1657 setFlag(MemoryRef);
1658 setFlag(Load);
1659 } // Inst_DS__DS_READ_I8
1660
1662 {
1663 } // ~Inst_DS__DS_READ_I8
1664
1665 // --- description from .arch file ---
1666 // RETURN_DATA = signext(MEM[ADDR][7:0]).
1667 // Signed byte read.
1668 void
1670 {
1671 Wavefront *wf = gpuDynInst->wavefront();
1672
1673 if (gpuDynInst->exec_mask.none()) {
1674 wf->decLGKMInstsIssued();
1675 return;
1676 }
1677
1678 gpuDynInst->execUnitId = wf->execUnitId;
1679 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1680 gpuDynInst->latency.set(
1681 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1682 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1683
1684 addr.read();
1685
1686 calcAddr(gpuDynInst, addr);
1687
1688 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1689 } // execute
1690
1691 void
1693 {
1694 Addr offset0 = instData.OFFSET0;
1695 Addr offset1 = instData.OFFSET1;
1696 Addr offset = (offset1 << 8) | offset0;
1697
1698 initMemRead<VecElemI8>(gpuDynInst, offset);
1699 } // initiateAcc
1700
1701 void
1703 {
1704 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1705
1706 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1707 if (gpuDynInst->exec_mask[lane]) {
1708 vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast<VecElemI8*>(
1709 gpuDynInst->d_data))[lane]);
1710 }
1711 }
1712
1713 vdst.write();
1714 } // completeAcc
1715 // --- Inst_DS__DS_READ_U8 class methods ---
1716
1718 : Inst_DS(iFmt, "ds_read_u8")
1719 {
1720 setFlag(MemoryRef);
1721 setFlag(Load);
1722 } // Inst_DS__DS_READ_U8
1723
1725 {
1726 } // ~Inst_DS__DS_READ_U8
1727
1728 // --- description from .arch file ---
1729 // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
1730 // Unsigned byte read.
1731 void
1733 {
1734 Wavefront *wf = gpuDynInst->wavefront();
1735
1736 if (gpuDynInst->exec_mask.none()) {
1737 wf->decLGKMInstsIssued();
1738 return;
1739 }
1740
1741 gpuDynInst->execUnitId = wf->execUnitId;
1742 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1743 gpuDynInst->latency.set(
1744 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1745 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1746
1747 addr.read();
1748
1749 calcAddr(gpuDynInst, addr);
1750
1751 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1752 } // execute
1753
1754 void
1756 {
1757 Addr offset0 = instData.OFFSET0;
1758 Addr offset1 = instData.OFFSET1;
1759 Addr offset = (offset1 << 8) | offset0;
1760
1761 initMemRead<VecElemU8>(gpuDynInst, offset);
1762 } // initiateAcc
1763
1764 void
1766 {
1767 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1768
1769 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1770 if (gpuDynInst->exec_mask[lane]) {
1771 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
1772 gpuDynInst->d_data))[lane];
1773 }
1774 }
1775
1776 vdst.write();
1777 } // completeAcc
1778 // --- Inst_DS__DS_READ_I16 class methods ---
1779
1781 : Inst_DS(iFmt, "ds_read_i16")
1782 {
1783 setFlag(MemoryRef);
1784 setFlag(Load);
1785 } // Inst_DS__DS_READ_I16
1786
1788 {
1789 } // ~Inst_DS__DS_READ_I16
1790
1791 // --- description from .arch file ---
1792 // RETURN_DATA = signext(MEM[ADDR][15:0]).
1793 // Signed short read.
1794 void
1796 {
1798 } // execute
1799 // --- Inst_DS__DS_READ_U16 class methods ---
1800
1802 : Inst_DS(iFmt, "ds_read_u16")
1803 {
1804 setFlag(MemoryRef);
1805 setFlag(Load);
1806 } // Inst_DS__DS_READ_U16
1807
1809 {
1810 } // ~Inst_DS__DS_READ_U16
1811
1812 // --- description from .arch file ---
1813 // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
1814 // Unsigned short read.
1815 void
1817 {
1818 Wavefront *wf = gpuDynInst->wavefront();
1819
1820 if (gpuDynInst->exec_mask.none()) {
1821 wf->decLGKMInstsIssued();
1822 return;
1823 }
1824
1825 gpuDynInst->execUnitId = wf->execUnitId;
1826 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1827 gpuDynInst->latency.set(
1828 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1829 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1830
1831 addr.read();
1832
1833 calcAddr(gpuDynInst, addr);
1834
1835 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1836 } // execute
1837 void
1839 {
1840 Addr offset0 = instData.OFFSET0;
1841 Addr offset1 = instData.OFFSET1;
1842 Addr offset = (offset1 << 8) | offset0;
1843
1844 initMemRead<VecElemU16>(gpuDynInst, offset);
1845 } // initiateAcc
1846
1847 void
1849 {
1850 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1851
1852 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1853 if (gpuDynInst->exec_mask[lane]) {
1854 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
1855 gpuDynInst->d_data))[lane];
1856 }
1857 }
1858
1859 vdst.write();
1860 } // completeAcc
1861 // --- Inst_DS__DS_READ_U16_D16 class methods ---
1862
1865 : Inst_DS(iFmt, "ds_read_u16_d16_hi")
1866 {
1867 setFlag(MemoryRef);
1868 setFlag(Load);
1869 } // Inst_DS__DS_READ_U16_D16
1870
1872 {
1873 } // ~Inst_DS__DS_READ_U16_D16
1874
1875 // --- description from .arch file ---
1876 // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16;
1877 // // RETURN_DATA[31:16] is preserved.
1878 void
1880 {
1881 Wavefront *wf = gpuDynInst->wavefront();
1882
1883 if (gpuDynInst->exec_mask.none()) {
1884 wf->decLGKMInstsIssued();
1885 return;
1886 }
1887
1888 gpuDynInst->execUnitId = wf->execUnitId;
1889 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1890 gpuDynInst->latency.set(
1891 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1892 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1893
1894 addr.read();
1895
1896 calcAddr(gpuDynInst, addr);
1897
1898 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1899 } // execute
1900 void
1902 {
1903 Addr offset0 = instData.OFFSET0;
1904 Addr offset1 = instData.OFFSET1;
1905 Addr offset = (offset1 << 8) | offset0;
1906
1907 initMemRead<VecElemU16>(gpuDynInst, offset);
1908 } // initiateAcc
1909
1910 void
1912 {
1913 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1914
1915 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1916 if (gpuDynInst->exec_mask[lane]) {
1917 VecElemU16 ds_val = reinterpret_cast<VecElemU16*>(
1918 gpuDynInst->d_data)[lane];
1919 replaceBits(vdst[lane], 15, 0, ds_val);
1920 }
1921 }
1922
1923 vdst.write();
1924 } // completeAcc
1925 // --- Inst_DS__DS_READ_U16_D16_HI class methods ---
1926
1929 : Inst_DS(iFmt, "ds_read_u16_d16_hi")
1930 {
1931 setFlag(MemoryRef);
1932 setFlag(Load);
1933 } // Inst_DS__DS_READ_U16_D16_HI
1934
1936 {
1937 } // ~Inst_DS__DS_READ_U16_D16_HI
1938
1939 // --- description from .arch file ---
1940 // RETURN_DATA[31 : 16].u16 = MEM[ADDR].u16;
1941 // // RETURN_DATA[15:0] is preserved.
1942 void
1944 {
1945 Wavefront *wf = gpuDynInst->wavefront();
1946
1947 if (gpuDynInst->exec_mask.none()) {
1948 wf->decLGKMInstsIssued();
1949 return;
1950 }
1951
1952 gpuDynInst->execUnitId = wf->execUnitId;
1953 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1954 gpuDynInst->latency.set(
1955 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
1956 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
1957
1958 addr.read();
1959
1960 calcAddr(gpuDynInst, addr);
1961
1962 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
1963 } // execute
1964 void
1966 {
1967 Addr offset0 = instData.OFFSET0;
1968 Addr offset1 = instData.OFFSET1;
1969 Addr offset = (offset1 << 8) | offset0;
1970
1971 initMemRead<VecElemU16>(gpuDynInst, offset);
1972 } // initiateAcc
1973
1974 void
1976 {
1977 VecOperandU32 vdst(gpuDynInst, extData.VDST);
1978
1979 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1980 if (gpuDynInst->exec_mask[lane]) {
1981 VecElemU16 ds_val = reinterpret_cast<VecElemU16*>(
1982 gpuDynInst->d_data)[lane];
1983 replaceBits(vdst[lane], 31, 16, ds_val);
1984 }
1985 }
1986
1987 vdst.write();
1988 } // completeAcc
1989 // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
1990
1992 : Inst_DS(iFmt, "ds_swizzle_b32")
1993 {
1999 setFlag(Load);
2000 setFlag(ALU);
2001 } // Inst_DS__DS_SWIZZLE_B32
2002
2004 {
2005 } // ~Inst_DS__DS_SWIZZLE_B32
2006
2007 // --- description from .arch file ---
2008 // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
2009 // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for
2010 // --- details.
2011 void
2013 {
2014 Wavefront *wf = gpuDynInst->wavefront();
2015 wf->decLGKMInstsIssued();
2016
2017 if (gpuDynInst->exec_mask.none()) {
2018 return;
2019 }
2020
2021 gpuDynInst->execUnitId = wf->execUnitId;
2022 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2023 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2024 ->cyclesToTicks(Cycles(24)));
2025
2026 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2027 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2045 VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
2046
2047 data.read();
2048
2049 if (bits(ds_pattern, 15)) {
2050 // QDMode
2051 for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
2057 if (gpuDynInst->exec_mask[lane]) {
2058 int index0 = lane + bits(ds_pattern, 1, 0);
2059 panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
2060 "is out of bounds.\n", gpuDynInst->disassemble(),
2061 index0);
2062 vdst[lane]
2063 = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
2064 }
2065 if (gpuDynInst->exec_mask[lane + 1]) {
2066 int index1 = lane + bits(ds_pattern, 3, 2);
2067 panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
2068 "is out of bounds.\n", gpuDynInst->disassemble(),
2069 index1);
2070 vdst[lane + 1]
2071 = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
2072 }
2073 if (gpuDynInst->exec_mask[lane + 2]) {
2074 int index2 = lane + bits(ds_pattern, 5, 4);
2075 panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
2076 "is out of bounds.\n", gpuDynInst->disassemble(),
2077 index2);
2078 vdst[lane + 2]
2079 = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
2080 }
2081 if (gpuDynInst->exec_mask[lane + 3]) {
2082 int index3 = lane + bits(ds_pattern, 7, 6);
2083 panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
2084 "is out of bounds.\n", gpuDynInst->disassemble(),
2085 index3);
2086 vdst[lane + 3]
2087 = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
2088 }
2089 }
2090 } else {
2091 // Bit Mode
2092 int and_mask = bits(ds_pattern, 4, 0);
2093 int or_mask = bits(ds_pattern, 9, 5);
2094 int xor_mask = bits(ds_pattern, 14, 10);
2095 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2096 if (gpuDynInst->exec_mask[lane]) {
2097 int index = (((lane & and_mask) | or_mask) ^ xor_mask);
2098 // Adjust for the next 32 lanes.
2099 if (lane > 31) {
2100 index += 32;
2101 }
2102 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
2103 "out of bounds.\n", gpuDynInst->disassemble(),
2104 index);
2105 vdst[lane]
2106 = gpuDynInst->exec_mask[index] ? data[index] : 0;
2107 }
2108 }
2109 }
2110
2111 vdst.write();
2112
2119 wf->computeUnit->vrf[wf->simdId]->
2120 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2125 wf->rdLmReqsInPipe--;
2126 } // execute
2127 // --- Inst_DS__DS_PERMUTE_B32 class methods ---
2128
2130 : Inst_DS(iFmt, "ds_permute_b32")
2131 {
2132 setFlag(MemoryRef);
2138 setFlag(Load);
2139 } // Inst_DS__DS_PERMUTE_B32
2140
2142 {
2143 } // ~Inst_DS__DS_PERMUTE_B32
2144
2145 // --- description from .arch file ---
2146 // Forward permute.
2147 void
2149 {
2150 Wavefront *wf = gpuDynInst->wavefront();
2151 wf->decLGKMInstsIssued();
2152
2153 if (gpuDynInst->exec_mask.none()) {
2154 return;
2155 }
2156
2157 gpuDynInst->execUnitId = wf->execUnitId;
2158 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2159 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2160 ->cyclesToTicks(Cycles(24)));
2161 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2162 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2163 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2164
2165 addr.read();
2166 data.read();
2167
2168 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2169 if (gpuDynInst->exec_mask[lane]) {
2176 assert(!instData.OFFSET1);
2183 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
2184 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
2185 "of bounds.\n", gpuDynInst->disassemble(), index);
2191 if (wf->execMask(index)) {
2192 vdst[index] = data[lane];
2193 } else {
2194 vdst[index] = 0;
2195 }
2196 }
2197 }
2198
2199 vdst.write();
2200
2207 wf->computeUnit->vrf[wf->simdId]->
2208 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2213 wf->rdLmReqsInPipe--;
2214 } // execute
2215 // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
2216
2218 : Inst_DS(iFmt, "ds_bpermute_b32")
2219 {
2220 setFlag(MemoryRef);
2226 setFlag(Load);
2227 } // Inst_DS__DS_BPERMUTE_B32
2228
2230 {
2231 } // ~Inst_DS__DS_BPERMUTE_B32
2232
2233 // --- description from .arch file ---
2234 // Backward permute.
2235 void
2237 {
2238 Wavefront *wf = gpuDynInst->wavefront();
2239 wf->decLGKMInstsIssued();
2240
2241 if (gpuDynInst->exec_mask.none()) {
2242 return;
2243 }
2244
2245 gpuDynInst->execUnitId = wf->execUnitId;
2246 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2247 gpuDynInst->latency.set(gpuDynInst->computeUnit()
2248 ->cyclesToTicks(Cycles(24)));
2249 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2250 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
2251 VecOperandU32 vdst(gpuDynInst, extData.VDST);
2252
2253 addr.read();
2254 data.read();
2255
2256 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2257 if (gpuDynInst->exec_mask[lane]) {
2264 assert(!instData.OFFSET1);
2271 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
2272 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
2273 "of bounds.\n", gpuDynInst->disassemble(), index);
2279 if (wf->execMask(index)) {
2280 vdst[lane] = data[index];
2281 } else {
2282 vdst[lane] = 0;
2283 }
2284 }
2285 }
2286
2287 vdst.write();
2288
2295 wf->computeUnit->vrf[wf->simdId]->
2296 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
2301 wf->rdLmReqsInPipe--;
2302 } // execute
2303
2304 // --- Inst_DS__DS_ADD_U64 class methods ---
2305
2307 : Inst_DS(iFmt, "ds_add_u64")
2308 {
2309 setFlag(MemoryRef);
2310 setFlag(GroupSegment);
2311 setFlag(AtomicAdd);
2312 setFlag(AtomicNoReturn);
2313 } // Inst_DS__DS_ADD_U64
2314
2316 {
2317 } // ~Inst_DS__DS_ADD_U64
2318
2319 // --- description from .arch file ---
2320 // 64b:
2321 // MEM[ADDR] += DATA[0:1];
2322 void
2324 {
2325 Wavefront *wf = gpuDynInst->wavefront();
2326
2327 if (gpuDynInst->exec_mask.none()) {
2328 wf->decLGKMInstsIssued();
2329 return;
2330 }
2331
2332 gpuDynInst->execUnitId = wf->execUnitId;
2333 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2334 gpuDynInst->latency.set(
2335 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2336 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2337 ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
2338
2339 addr.read();
2340 data.read();
2341
2342 calcAddr(gpuDynInst, addr);
2343
2344 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2345 if (gpuDynInst->exec_mask[lane]) {
2346 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
2347 = data[lane];
2348 }
2349 }
2350
2351 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2352 } // execute
2353
2354 void
2356 {
2357 Addr offset0 = instData.OFFSET0;
2358 Addr offset1 = instData.OFFSET1;
2359 Addr offset = (offset1 << 8) | offset0;
2360
2362 } // initiateAcc
2363
2364 void
2366 {
2367 } // completeAcc
2368 // --- Inst_DS__DS_SUB_U64 class methods ---
2369
2371 : Inst_DS(iFmt, "ds_sub_u64")
2372 {
2373 } // Inst_DS__DS_SUB_U64
2374
2376 {
2377 } // ~Inst_DS__DS_SUB_U64
2378
2379 // --- description from .arch file ---
2380 // 64b:
2381 // tmp = MEM[ADDR];
2382 // MEM[ADDR] -= DATA[0:1];
2383 // RETURN_DATA[0:1] = tmp.
2384 void
2386 {
2388 } // execute
2389 // --- Inst_DS__DS_RSUB_U64 class methods ---
2390
2392 : Inst_DS(iFmt, "ds_rsub_u64")
2393 {
2394 } // Inst_DS__DS_RSUB_U64
2395
2397 {
2398 } // ~Inst_DS__DS_RSUB_U64
2399
2400 // --- description from .arch file ---
2401 // 64b:
2402 // tmp = MEM[ADDR];
2403 // MEM[ADDR] = DATA - MEM[ADDR];
2404 // RETURN_DATA = tmp.
2405 // Subtraction with reversed operands.
2406 void
2408 {
2410 } // execute
2411 // --- Inst_DS__DS_INC_U64 class methods ---
2412
2414 : Inst_DS(iFmt, "ds_inc_u64")
2415 {
2416 } // Inst_DS__DS_INC_U64
2417
2419 {
2420 } // ~Inst_DS__DS_INC_U64
2421
2422 // --- description from .arch file ---
2423 // 64b:
2424 // tmp = MEM[ADDR];
2425 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
2426 // RETURN_DATA[0:1] = tmp.
2427 void
2429 {
2431 } // execute
2432 // --- Inst_DS__DS_DEC_U64 class methods ---
2433
2435 : Inst_DS(iFmt, "ds_dec_u64")
2436 {
2437 } // Inst_DS__DS_DEC_U64
2438
2440 {
2441 } // ~Inst_DS__DS_DEC_U64
2442
2443 // --- description from .arch file ---
2444 // 64b:
2445 // tmp = MEM[ADDR];
2446 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
2447 // (unsigned compare);
2448 // RETURN_DATA[0:1] = tmp.
2449 void
2451 {
2453 } // execute
2454 // --- Inst_DS__DS_MIN_I64 class methods ---
2455
2457 : Inst_DS(iFmt, "ds_min_i64")
2458 {
2459 } // Inst_DS__DS_MIN_I64
2460
2462 {
2463 } // ~Inst_DS__DS_MIN_I64
2464
2465 // --- description from .arch file ---
2466 // 64b:
2467 // tmp = MEM[ADDR];
2468 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
2469 // RETURN_DATA[0:1] = tmp.
2470 void
2472 {
2474 } // execute
2475 // --- Inst_DS__DS_MAX_I64 class methods ---
2476
2478 : Inst_DS(iFmt, "ds_max_i64")
2479 {
2480 } // Inst_DS__DS_MAX_I64
2481
2483 {
2484 } // ~Inst_DS__DS_MAX_I64
2485
2486 // --- description from .arch file ---
2487 // 64b:
2488 // tmp = MEM[ADDR];
2489 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
2490 // RETURN_DATA[0:1] = tmp.
2491 void
2493 {
2495 } // execute
2496 // --- Inst_DS__DS_MIN_U64 class methods ---
2497
2499 : Inst_DS(iFmt, "ds_min_u64")
2500 {
2501 } // Inst_DS__DS_MIN_U64
2502
2504 {
2505 } // ~Inst_DS__DS_MIN_U64
2506
2507 // --- description from .arch file ---
2508 // 64b:
2509 // tmp = MEM[ADDR];
2510 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
2511 // RETURN_DATA[0:1] = tmp.
2512 void
2514 {
2516 } // execute
2517 // --- Inst_DS__DS_MAX_U64 class methods ---
2518
2520 : Inst_DS(iFmt, "ds_max_u64")
2521 {
2522 } // Inst_DS__DS_MAX_U64
2523
2525 {
2526 } // ~Inst_DS__DS_MAX_U64
2527
2528 // --- description from .arch file ---
2529 // 64b:
2530 // tmp = MEM[ADDR];
2531 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
2532 // RETURN_DATA[0:1] = tmp.
2533 void
2535 {
2537 } // execute
2538 // --- Inst_DS__DS_AND_B64 class methods ---
2539
2541 : Inst_DS(iFmt, "ds_and_b64")
2542 {
2543 } // Inst_DS__DS_AND_B64
2544
2546 {
2547 } // ~Inst_DS__DS_AND_B64
2548
2549 // --- description from .arch file ---
2550 // 64b:
2551 // tmp = MEM[ADDR];
2552 // MEM[ADDR] &= DATA[0:1];
2553 // RETURN_DATA[0:1] = tmp.
2554 void
2556 {
2558 } // execute
2559 // --- Inst_DS__DS_OR_B64 class methods ---
2560
2562 : Inst_DS(iFmt, "ds_or_b64")
2563 {
2564 } // Inst_DS__DS_OR_B64
2565
2567 {
2568 } // ~Inst_DS__DS_OR_B64
2569
2570 // --- description from .arch file ---
2571 // 64b:
2572 // tmp = MEM[ADDR];
2573 // MEM[ADDR] |= DATA[0:1];
2574 // RETURN_DATA[0:1] = tmp.
2575 void
2577 {
2579 } // execute
2580 // --- Inst_DS__DS_XOR_B64 class methods ---
2581
2583 : Inst_DS(iFmt, "ds_xor_b64")
2584 {
2585 } // Inst_DS__DS_XOR_B64
2586
2588 {
2589 } // ~Inst_DS__DS_XOR_B64
2590
2591 // --- description from .arch file ---
2592 // 64b:
2593 // tmp = MEM[ADDR];
2594 // MEM[ADDR] ^= DATA[0:1];
2595 // RETURN_DATA[0:1] = tmp.
2596 void
2598 {
2600 } // execute
2601 // --- Inst_DS__DS_MSKOR_B64 class methods ---
2602
2604 : Inst_DS(iFmt, "ds_mskor_b64")
2605 {
2606 } // Inst_DS__DS_MSKOR_B64
2607
2609 {
2610 } // ~Inst_DS__DS_MSKOR_B64
2611
2612 // --- description from .arch file ---
2613 // 64b:
2614 // tmp = MEM[ADDR];
2615 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
2616 // RETURN_DATA = tmp.
2617 // Masked dword OR, D0 contains the mask and D1 contains the new value.
2618 void
2620 {
2622 } // execute
2623 // --- Inst_DS__DS_WRITE_B64 class methods ---
2624
2626 : Inst_DS(iFmt, "ds_write_b64")
2627 {
2628 setFlag(MemoryRef);
2629 setFlag(Store);
2630 } // Inst_DS__DS_WRITE_B64
2631
2633 {
2634 } // ~Inst_DS__DS_WRITE_B64
2635
2636 // --- description from .arch file ---
2637 // 64b:
2638 // MEM[ADDR] = DATA.
2639 // Write qword.
2640 void
2642 {
2643 Wavefront *wf = gpuDynInst->wavefront();
2644
2645 if (gpuDynInst->exec_mask.none()) {
2646 wf->decLGKMInstsIssued();
2647 return;
2648 }
2649
2650 gpuDynInst->execUnitId = wf->execUnitId;
2651 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2652 gpuDynInst->latency.set(
2653 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2654 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2655 ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
2656
2657 addr.read();
2658 data.read();
2659
2660 calcAddr(gpuDynInst, addr);
2661
2662 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2663 if (gpuDynInst->exec_mask[lane]) {
2664 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
2665 = data[lane];
2666 }
2667 }
2668
2669 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2670 } // execute
2671
2672 void
2674 {
2675 Addr offset0 = instData.OFFSET0;
2676 Addr offset1 = instData.OFFSET1;
2677 Addr offset = (offset1 << 8) | offset0;
2678
2679 initMemWrite<VecElemU64>(gpuDynInst, offset);
2680 } // initiateAcc
2681
2682 void
2684 {
2685 } // completeAcc
2686 // --- Inst_DS__DS_WRITE2_B64 class methods ---
2687
2689 : Inst_DS(iFmt, "ds_write2_b64")
2690 {
2691 setFlag(MemoryRef);
2692 setFlag(Store);
2693 } // Inst_DS__DS_WRITE2_B64
2694
2696 {
2697 } // ~Inst_DS__DS_WRITE2_B64
2698
2699 // --- description from .arch file ---
2700 // 64b:
2701 // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
2702 // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
2703 // Write 2 qwords.
2704 void
2706 {
2707 Wavefront *wf = gpuDynInst->wavefront();
2708
2709 if (gpuDynInst->exec_mask.none()) {
2710 wf->decLGKMInstsIssued();
2711 return;
2712 }
2713
2714 gpuDynInst->execUnitId = wf->execUnitId;
2715 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2716 gpuDynInst->latency.set(
2717 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2718 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2719 ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
2720 ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
2721
2722 addr.read();
2723 data0.read();
2724 data1.read();
2725
2726 calcAddr(gpuDynInst, addr);
2727
2728 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2729 if (gpuDynInst->exec_mask[lane]) {
2730 (reinterpret_cast<VecElemU64*>(
2731 gpuDynInst->d_data))[lane * 2] = data0[lane];
2732 (reinterpret_cast<VecElemU64*>(
2733 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
2734 }
2735 }
2736
2737 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2738 } // execute
2739
2740 void
2742 {
2743 Addr offset0 = instData.OFFSET0 * 8;
2744 Addr offset1 = instData.OFFSET1 * 8;
2745
2746 initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
2747 }
2748
2749 void
2753 // --- Inst_DS__DS_WRITE2ST64_B64 class methods ---
2754
2756 : Inst_DS(iFmt, "ds_write2st64_b64")
2757 {
2758 setFlag(MemoryRef);
2759 setFlag(Store);
2760 } // Inst_DS__DS_WRITE2ST64_B64
2761
2763 {
2764 } // ~Inst_DS__DS_WRITE2ST64_B64
2765
2766 // --- description from .arch file ---
2767 // 64b:
2768 // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
2769 // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
2770 // Write 2 qwords.
2771 void
2773 {
2774 Wavefront *wf = gpuDynInst->wavefront();
2775
2776 if (gpuDynInst->exec_mask.none()) {
2777 wf->decLGKMInstsIssued();
2778 return;
2779 }
2780
2781 gpuDynInst->execUnitId = wf->execUnitId;
2782 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2783 gpuDynInst->latency.set(
2784 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
2785 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
2786 ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
2787 ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
2788
2789 addr.read();
2790 data0.read();
2791 data1.read();
2792
2793 calcAddr(gpuDynInst, addr);
2794
2795 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2796 if (gpuDynInst->exec_mask[lane]) {
2797 (reinterpret_cast<VecElemU64*>(
2798 gpuDynInst->d_data))[lane * 2] = data0[lane];
2799 (reinterpret_cast<VecElemU64*>(
2800 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
2801 }
2802 }
2803
2804 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
2805 } // execute
2806
2807 void
2809 {
2810 Addr offset0 = instData.OFFSET0 * 8 * 64;
2811 Addr offset1 = instData.OFFSET1 * 8 * 64;
2812
2813 initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
2814 }
2815
2816 void
2820 // --- Inst_DS__DS_CMPST_B64 class methods ---
2821
2823 : Inst_DS(iFmt, "ds_cmpst_b64")
2824 {
2825 } // Inst_DS__DS_CMPST_B64
2826
2828 {
2829 } // ~Inst_DS__DS_CMPST_B64
2830
2831 // --- description from .arch file ---
2832 // 64b:
2833 // tmp = MEM[ADDR];
2834 // src = DATA2;
2835 // cmp = DATA;
2836 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2837 // RETURN_DATA[0] = tmp.
2838 // Compare and store.
2839 // Caution, the order of src and cmp are the *opposite* of the
2840 // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
2841 void
2843 {
2845 } // execute
2846 // --- Inst_DS__DS_CMPST_F64 class methods ---
2847
2849 : Inst_DS(iFmt, "ds_cmpst_f64")
2850 {
2851 setFlag(F64);
2852 } // Inst_DS__DS_CMPST_F64
2853
2855 {
2856 } // ~Inst_DS__DS_CMPST_F64
2857
2858 // --- description from .arch file ---
2859 // 64b:
2860 // tmp = MEM[ADDR];
2861 // src = DATA2;
2862 // cmp = DATA;
2863 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
2864 // RETURN_DATA[0] = tmp.
2865 // Floating point compare and store that handles NaN/INF/denormal values.
2866 // Caution, the order of src and cmp are the *opposite* of the
2867 // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
2868 void
2870 {
2872 } // execute
2873 // --- Inst_DS__DS_MIN_F64 class methods ---
2874
2876 : Inst_DS(iFmt, "ds_min_f64")
2877 {
2878 setFlag(F64);
2879 } // Inst_DS__DS_MIN_F64
2880
2882 {
2883 } // ~Inst_DS__DS_MIN_F64
2884
2885 // --- description from .arch file ---
2886 // 64b.
2887 // tmp = MEM[ADDR];
2888 // src = DATA;
2889 // cmp = DATA2;
2890 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
2891 // Floating point minimum that handles NaN/INF/denormal values.
2892 // Note that this opcode is slightly more general-purpose than
2893 // --- BUFFER_ATOMIC_FMIN_X2.
2894 void
2896 {
2898 } // execute
2899 // --- Inst_DS__DS_MAX_F64 class methods ---
2900
2902 : Inst_DS(iFmt, "ds_max_f64")
2903 {
2904 setFlag(F64);
2905 } // Inst_DS__DS_MAX_F64
2906
2908 {
2909 } // ~Inst_DS__DS_MAX_F64
2910
2911 // --- description from .arch file ---
2912 // 64b.
2913 // tmp = MEM[ADDR];
2914 // src = DATA;
2915 // cmp = DATA2;
2916 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
2917 // Floating point maximum that handles NaN/INF/denormal values.
2918 // Note that this opcode is slightly more general-purpose than
2919 // --- BUFFER_ATOMIC_FMAX_X2.
2920 void
2922 {
2924 } // execute
2925 // --- Inst_DS__DS_ADD_RTN_U64 class methods ---
2926
2928 : Inst_DS(iFmt, "ds_add_rtn_u64")
2929 {
2930 } // Inst_DS__DS_ADD_RTN_U64
2931
2933 {
2934 } // ~Inst_DS__DS_ADD_RTN_U64
2935
2936 // --- description from .arch file ---
2937 // 64b:
2938 // tmp = MEM[ADDR];
2939 // MEM[ADDR] += DATA[0:1];
2940 // RETURN_DATA[0:1] = tmp.
2941 void
2943 {
2945 } // execute
2946 // --- Inst_DS__DS_SUB_RTN_U64 class methods ---
2947
2949 : Inst_DS(iFmt, "ds_sub_rtn_u64")
2950 {
2951 } // Inst_DS__DS_SUB_RTN_U64
2952
2954 {
2955 } // ~Inst_DS__DS_SUB_RTN_U64
2956
2957 // --- description from .arch file ---
2958 // 64b:
2959 // tmp = MEM[ADDR];
2960 // MEM[ADDR] -= DATA[0:1];
2961 // RETURN_DATA[0:1] = tmp.
2962 void
2964 {
2966 } // execute
2967 // --- Inst_DS__DS_RSUB_RTN_U64 class methods ---
2968
2970 : Inst_DS(iFmt, "ds_rsub_rtn_u64")
2971 {
2972 } // Inst_DS__DS_RSUB_RTN_U64
2973
2975 {
2976 } // ~Inst_DS__DS_RSUB_RTN_U64
2977
2978 // --- description from .arch file ---
2979 // 64b:
2980 // tmp = MEM[ADDR];
2981 // MEM[ADDR] = DATA - MEM[ADDR];
2982 // RETURN_DATA = tmp.
2983 // Subtraction with reversed operands.
2984 void
2986 {
2988 } // execute
2989 // --- Inst_DS__DS_INC_RTN_U64 class methods ---
2990
2992 : Inst_DS(iFmt, "ds_inc_rtn_u64")
2993 {
2994 } // Inst_DS__DS_INC_RTN_U64
2995
2997 {
2998 } // ~Inst_DS__DS_INC_RTN_U64
2999
3000 // --- description from .arch file ---
3001 // 64b:
3002 // tmp = MEM[ADDR];
3003 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
3004 // RETURN_DATA[0:1] = tmp.
3005 void
3007 {
3009 } // execute
3010 // --- Inst_DS__DS_DEC_RTN_U64 class methods ---
3011
3013 : Inst_DS(iFmt, "ds_dec_rtn_u64")
3014 {
3015 } // Inst_DS__DS_DEC_RTN_U64
3016
3018 {
3019 } // ~Inst_DS__DS_DEC_RTN_U64
3020
3021 // --- description from .arch file ---
3022 // 64b:
3023 // tmp = MEM[ADDR];
3024 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
3025 // (unsigned compare);
3026 // RETURN_DATA[0:1] = tmp.
3027 void
3029 {
3031 } // execute
3032 // --- Inst_DS__DS_MIN_RTN_I64 class methods ---
3033
3035 : Inst_DS(iFmt, "ds_min_rtn_i64")
3036 {
3037 } // Inst_DS__DS_MIN_RTN_I64
3038
3040 {
3041 } // ~Inst_DS__DS_MIN_RTN_I64
3042
3043 // --- description from .arch file ---
3044 // 64b:
3045 // tmp = MEM[ADDR];
3046 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
3047 // RETURN_DATA[0:1] = tmp.
3048 void
3050 {
3052 } // execute
3053 // --- Inst_DS__DS_MAX_RTN_I64 class methods ---
3054
3056 : Inst_DS(iFmt, "ds_max_rtn_i64")
3057 {
3058 } // Inst_DS__DS_MAX_RTN_I64
3059
3061 {
3062 } // ~Inst_DS__DS_MAX_RTN_I64
3063
3064 // --- description from .arch file ---
3065 // 64b:
3066 // tmp = MEM[ADDR];
3067 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
3068 // RETURN_DATA[0:1] = tmp.
3069 void
3071 {
3073 } // execute
3074 // --- Inst_DS__DS_MIN_RTN_U64 class methods ---
3075
3077 : Inst_DS(iFmt, "ds_min_rtn_u64")
3078 {
3079 } // Inst_DS__DS_MIN_RTN_U64
3080
3082 {
3083 } // ~Inst_DS__DS_MIN_RTN_U64
3084
3085 // --- description from .arch file ---
3086 // 64b:
3087 // tmp = MEM[ADDR];
3088 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
3089 // RETURN_DATA[0:1] = tmp.
3090 void
3092 {
3094 } // execute
3095 // --- Inst_DS__DS_MAX_RTN_U64 class methods ---
3096
3098 : Inst_DS(iFmt, "ds_max_rtn_u64")
3099 {
3100 } // Inst_DS__DS_MAX_RTN_U64
3101
3103 {
3104 } // ~Inst_DS__DS_MAX_RTN_U64
3105
3106 // --- description from .arch file ---
3107 // 64b:
3108 // tmp = MEM[ADDR];
3109 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
3110 // RETURN_DATA[0:1] = tmp.
3111 void
3113 {
3115 } // execute
3116 // --- Inst_DS__DS_AND_RTN_B64 class methods ---
3117
3119 : Inst_DS(iFmt, "ds_and_rtn_b64")
3120 {
3121 } // Inst_DS__DS_AND_RTN_B64
3122
3124 {
3125 } // ~Inst_DS__DS_AND_RTN_B64
3126
3127 // --- description from .arch file ---
3128 // 64b:
3129 // tmp = MEM[ADDR];
3130 // MEM[ADDR] &= DATA[0:1];
3131 // RETURN_DATA[0:1] = tmp.
3132 void
3134 {
3136 } // execute
3137 // --- Inst_DS__DS_OR_RTN_B64 class methods ---
3138
3140 : Inst_DS(iFmt, "ds_or_rtn_b64")
3141 {
3142 } // Inst_DS__DS_OR_RTN_B64
3143
3145 {
3146 } // ~Inst_DS__DS_OR_RTN_B64
3147
3148 // --- description from .arch file ---
3149 // 64b:
3150 // tmp = MEM[ADDR];
3151 // MEM[ADDR] |= DATA[0:1];
3152 // RETURN_DATA[0:1] = tmp.
3153 void
3155 {
3157 } // execute
3158 // --- Inst_DS__DS_XOR_RTN_B64 class methods ---
3159
3161 : Inst_DS(iFmt, "ds_xor_rtn_b64")
3162 {
3163 } // Inst_DS__DS_XOR_RTN_B64
3164
3166 {
3167 } // ~Inst_DS__DS_XOR_RTN_B64
3168
3169 // --- description from .arch file ---
3170 // 64b:
3171 // tmp = MEM[ADDR];
3172 // MEM[ADDR] ^= DATA[0:1];
3173 // RETURN_DATA[0:1] = tmp.
3174 void
3176 {
3178 } // execute
3179 // --- Inst_DS__DS_MSKOR_RTN_B64 class methods ---
3180
3182 : Inst_DS(iFmt, "ds_mskor_rtn_b64")
3183 {
3184 } // Inst_DS__DS_MSKOR_RTN_B64
3185
3187 {
3188 } // ~Inst_DS__DS_MSKOR_RTN_B64
3189
3190 // --- description from .arch file ---
3191 // 64b:
3192 // tmp = MEM[ADDR];
3193 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
3194 // RETURN_DATA = tmp.
3195 // Masked dword OR, D0 contains the mask and D1 contains the new value.
3196 void
3198 {
3200 } // execute
3201 // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods ---
3202
3204 : Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
3205 {
3206 } // Inst_DS__DS_WRXCHG_RTN_B64
3207
3209 {
3210 } // ~Inst_DS__DS_WRXCHG_RTN_B64
3211
3212 // --- description from .arch file ---
3213 // tmp = MEM[ADDR];
3214 // MEM[ADDR] = DATA;
3215 // RETURN_DATA = tmp.
3216 // Write-exchange operation.
3217 void
3219 {
3221 } // execute
3222 // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods ---
3223
3225 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
3226 {
3227 } // Inst_DS__DS_WRXCHG2_RTN_B64
3228
3230 {
3231 } // ~Inst_DS__DS_WRXCHG2_RTN_B64
3232
3233 // --- description from .arch file ---
3234 // Write-exchange 2 separate qwords.
3235 void
3237 {
3239 } // execute
3240 // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods ---
3241
3243 InFmt_DS *iFmt)
3244 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
3245 {
3246 } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
3247
3249 {
3250 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
3251
3252 // --- description from .arch file ---
3253 // Write-exchange 2 qwords with a stride of 64 qwords.
3254 void
3259 // --- Inst_DS__DS_CMPST_RTN_B64 class methods ---
3260
3262 : Inst_DS(iFmt, "ds_cmpst_rtn_b64")
3263 {
3264 } // Inst_DS__DS_CMPST_RTN_B64
3265
3267 {
3268 } // ~Inst_DS__DS_CMPST_RTN_B64
3269
3270 // --- description from .arch file ---
3271 // 64b:
3272 // tmp = MEM[ADDR];
3273 // src = DATA2;
3274 // cmp = DATA;
3275 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
3276 // RETURN_DATA[0] = tmp.
3277 // Compare and store.
3278 // Caution, the order of src and cmp are the *opposite* of the
3279 // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode.
3280 void
3282 {
3284 } // execute
3285 // --- Inst_DS__DS_CMPST_RTN_F64 class methods ---
3286
3288 : Inst_DS(iFmt, "ds_cmpst_rtn_f64")
3289 {
3290 setFlag(F64);
3291 } // Inst_DS__DS_CMPST_RTN_F64
3292
3294 {
3295 } // ~Inst_DS__DS_CMPST_RTN_F64
3296
3297 // --- description from .arch file ---
3298 // 64b:
3299 // tmp = MEM[ADDR];
3300 // src = DATA2;
3301 // cmp = DATA;
3302 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
3303 // RETURN_DATA[0] = tmp.
3304 // Floating point compare and store that handles NaN/INF/denormal values.
3305 // Caution, the order of src and cmp are the *opposite* of the
3306 // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode.
3307 void
3309 {
3311 } // execute
3312 // --- Inst_DS__DS_MIN_RTN_F64 class methods ---
3313
3315 : Inst_DS(iFmt, "ds_min_rtn_f64")
3316 {
3317 setFlag(F64);
3318 } // Inst_DS__DS_MIN_RTN_F64
3319
3321 {
3322 } // ~Inst_DS__DS_MIN_RTN_F64
3323
3324 // --- description from .arch file ---
3325 // 64b.
3326 // tmp = MEM[ADDR];
3327 // src = DATA;
3328 // cmp = DATA2;
3329 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
3330 // Floating point minimum that handles NaN/INF/denormal values.
3331 // Note that this opcode is slightly more general-purpose than
3332 // --- BUFFER_ATOMIC_FMIN_X2.
3333 void
3335 {
3337 } // execute
3338 // --- Inst_DS__DS_MAX_RTN_F64 class methods ---
3339
3341 : Inst_DS(iFmt, "ds_max_rtn_f64")
3342 {
3343 setFlag(F64);
3344 } // Inst_DS__DS_MAX_RTN_F64
3345
3347 {
3348 } // ~Inst_DS__DS_MAX_RTN_F64
3349
3350 // --- description from .arch file ---
3351 // 64b.
3352 // tmp = MEM[ADDR];
3353 // src = DATA;
3354 // cmp = DATA2;
3355 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
3356 // Floating point maximum that handles NaN/INF/denormal values.
3357 // Note that this opcode is slightly more general-purpose than
3358 // --- BUFFER_ATOMIC_FMAX_X2.
3359 void
3361 {
3363 } // execute
3364 // --- Inst_DS__DS_READ_B64 class methods ---
3365
3367 : Inst_DS(iFmt, "ds_read_b64")
3368 {
3369 setFlag(MemoryRef);
3370 setFlag(Load);
3371 } // Inst_DS__DS_READ_B64
3372
3374 {
3375 } // ~Inst_DS__DS_READ_B64
3376
3377 // --- description from .arch file ---
3378 // RETURN_DATA = MEM[ADDR].
3379 // Read 1 qword.
3380 void
3382 {
3383 Wavefront *wf = gpuDynInst->wavefront();
3384
3385 if (gpuDynInst->exec_mask.none()) {
3386 wf->decLGKMInstsIssued();
3387 return;
3388 }
3389
3390 gpuDynInst->execUnitId = wf->execUnitId;
3391 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3392 gpuDynInst->latency.set(
3393 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3394 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3395
3396 addr.read();
3397
3398 calcAddr(gpuDynInst, addr);
3399
3400 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3401 } // execute
3402
3403 void
3405 {
3406 Addr offset0 = instData.OFFSET0;
3407 Addr offset1 = instData.OFFSET1;
3408 Addr offset = (offset1 << 8) | offset0;
3409
3410 initMemRead<VecElemU64>(gpuDynInst, offset);
3411 } // initiateAcc
3412
3413 void
3415 {
3416 VecOperandU64 vdst(gpuDynInst, extData.VDST);
3417
3418 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3419 if (gpuDynInst->exec_mask[lane]) {
3420 vdst[lane] = (reinterpret_cast<VecElemU64*>(
3421 gpuDynInst->d_data))[lane];
3422 }
3423 }
3424
3425 vdst.write();
3426 } // completeAcc
3427 // --- Inst_DS__DS_READ2_B64 class methods ---
3428
3430 : Inst_DS(iFmt, "ds_read2_b64")
3431 {
3432 setFlag(MemoryRef);
3433 setFlag(Load);
3434 } // Inst_DS__DS_READ2_B64
3435
3437 {
3438 } // ~Inst_DS__DS_READ2_B64
3439
3440 // --- description from .arch file ---
3441 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
3442 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
3443 // Read 2 qwords.
3444 void
3446 {
3447 Wavefront *wf = gpuDynInst->wavefront();
3448
3449 if (gpuDynInst->exec_mask.none()) {
3450 wf->decLGKMInstsIssued();
3451 return;
3452 }
3453
3454 gpuDynInst->execUnitId = wf->execUnitId;
3455 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3456 gpuDynInst->latency.set(
3457 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3458 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3459
3460 addr.read();
3461
3462 calcAddr(gpuDynInst, addr);
3463
3464 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3465 } // execute
3466
3467 void
3469 {
3470 Addr offset0 = instData.OFFSET0 * 8;
3471 Addr offset1 = instData.OFFSET1 * 8;
3472
3473 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
3474 } // initiateAcc
3475
3476 void
3478 {
3479 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
3480 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
3481
3482 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3483 if (gpuDynInst->exec_mask[lane]) {
3484 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
3485 gpuDynInst->d_data))[lane * 2];
3486 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
3487 gpuDynInst->d_data))[lane * 2 + 1];
3488 }
3489 }
3490
3491 vdst0.write();
3492 vdst1.write();
3493 } // completeAcc
3494 // --- Inst_DS__DS_READ2ST64_B64 class methods ---
3495
3497 : Inst_DS(iFmt, "ds_read2st64_b64")
3498 {
3499 setFlag(MemoryRef);
3500 setFlag(Load);
3501 } // Inst_DS__DS_READ2ST64_B64
3502
3504 {
3505 } // ~Inst_DS__DS_READ2ST64_B64
3506
3507 // --- description from .arch file ---
3508 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
3509 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
3510 // Read 2 qwords.
3511 void
3513 {
3514 Wavefront *wf = gpuDynInst->wavefront();
3515
3516 if (gpuDynInst->exec_mask.none()) {
3517 wf->decLGKMInstsIssued();
3518 return;
3519 }
3520
3521 gpuDynInst->execUnitId = wf->execUnitId;
3522 gpuDynInst->latency.init(gpuDynInst->computeUnit());
3523 gpuDynInst->latency.set(
3524 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
3525 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
3526
3527 addr.read();
3528
3529 calcAddr(gpuDynInst, addr);
3530
3531 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
3532 } // execute
3533
3534 void
3536 {
3537 Addr offset0 = (instData.OFFSET0 * 8 * 64);
3538 Addr offset1 = (instData.OFFSET1 * 8 * 64);
3539
3540 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
3541 }
3542
3543 void
3545 {
3546 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
3547 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
3548
3549 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3550 if (gpuDynInst->exec_mask[lane]) {
3551 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
3552 gpuDynInst->d_data))[lane * 2];
3553 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
3554 gpuDynInst->d_data))[lane * 2 + 1];
3555 }
3556 }
3557
3558 vdst0.write();
3559 vdst1.write();
3560 }
3561 // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods ---
3562
3564 InFmt_DS *iFmt)
3565 : Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
3566 {
3567 } // Inst_DS__DS_CONDXCHG32_RTN_B64
3568
3570 {
3571 } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
3572
3573 // --- description from .arch file ---
3574 // Conditional write exchange.
3575 void
3580 // --- Inst_DS__DS_ADD_SRC2_U32 class methods ---
3581
3583 : Inst_DS(iFmt, "ds_add_src2_u32")
3584 {
3585 } // Inst_DS__DS_ADD_SRC2_U32
3586
3588 {
3589 } // ~Inst_DS__DS_ADD_SRC2_U32
3590
3591 // --- description from .arch file ---
3592 // 32b:
3593 // A = ADDR_BASE;
3594 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3595 // --- {offset1[6],offset1[6:0],offset0});
3596 // MEM[A] = MEM[A] + MEM[B].
3597 void
3599 {
3601 } // execute
3602 // --- Inst_DS__DS_SUB_SRC2_U32 class methods ---
3603
3605 : Inst_DS(iFmt, "ds_sub_src2_u32")
3606 {
3607 } // Inst_DS__DS_SUB_SRC2_U32
3608
3610 {
3611 } // ~Inst_DS__DS_SUB_SRC2_U32
3612
3613 // --- description from .arch file ---
3614 // 32b:
3615 // A = ADDR_BASE;
3616 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3617 // --- {offset1[6],offset1[6:0],offset0});
3618 // MEM[A] = MEM[A] - MEM[B].
3619 void
3621 {
3623 } // execute
3624 // --- Inst_DS__DS_RSUB_SRC2_U32 class methods ---
3625
3627 : Inst_DS(iFmt, "ds_rsub_src2_u32")
3628 {
3629 } // Inst_DS__DS_RSUB_SRC2_U32
3630
3632 {
3633 } // ~Inst_DS__DS_RSUB_SRC2_U32
3634
3635 // --- description from .arch file ---
3636 // 32b:
3637 // A = ADDR_BASE;
3638 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3639 // --- {offset1[6],offset1[6:0],offset0});
3640 // MEM[A] = MEM[B] - MEM[A].
3641 void
3643 {
3645 } // execute
3646 // --- Inst_DS__DS_INC_SRC2_U32 class methods ---
3647
3649 : Inst_DS(iFmt, "ds_inc_src2_u32")
3650 {
3651 } // Inst_DS__DS_INC_SRC2_U32
3652
3654 {
3655 } // ~Inst_DS__DS_INC_SRC2_U32
3656
3657 // --- description from .arch file ---
3658 // 32b:
3659 // A = ADDR_BASE;
3660 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3661 // --- {offset1[6],offset1[6:0],offset0});
3662 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
3663 void
3665 {
3667 } // execute
3668 // --- Inst_DS__DS_DEC_SRC2_U32 class methods ---
3669
3671 : Inst_DS(iFmt, "ds_dec_src2_u32")
3672 {
3673 } // Inst_DS__DS_DEC_SRC2_U32
3674
3676 {
3677 } // ~Inst_DS__DS_DEC_SRC2_U32
3678
3679 // --- description from .arch file ---
3680 // 32b:
3681 // A = ADDR_BASE;
3682 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3683 // --- {offset1[6],offset1[6:0],offset0});
3684 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
3685 // Uint decrement.
3686 void
3688 {
3690 } // execute
3691 // --- Inst_DS__DS_MIN_SRC2_I32 class methods ---
3692
3694 : Inst_DS(iFmt, "ds_min_src2_i32")
3695 {
3696 } // Inst_DS__DS_MIN_SRC2_I32
3697
3699 {
3700 } // ~Inst_DS__DS_MIN_SRC2_I32
3701
3702 // --- description from .arch file ---
3703 // 32b:
3704 // A = ADDR_BASE;
3705 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3706 // --- {offset1[6],offset1[6:0],offset0});
3707 // MEM[A] = min(MEM[A], MEM[B]).
3708 void
3710 {
3712 } // execute
3713 // --- Inst_DS__DS_MAX_SRC2_I32 class methods ---
3714
3716 : Inst_DS(iFmt, "ds_max_src2_i32")
3717 {
3718 } // Inst_DS__DS_MAX_SRC2_I32
3719
3721 {
3722 } // ~Inst_DS__DS_MAX_SRC2_I32
3723
3724 // --- description from .arch file ---
3725 // 32b:
3726 // A = ADDR_BASE;
3727 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3728 // --- {offset1[6],offset1[6:0],offset0});
3729 // MEM[A] = max(MEM[A], MEM[B]).
3730 void
3732 {
3734 } // execute
3735 // --- Inst_DS__DS_MIN_SRC2_U32 class methods ---
3736
3738 : Inst_DS(iFmt, "ds_min_src2_u32")
3739 {
3740 } // Inst_DS__DS_MIN_SRC2_U32
3741
3743 {
3744 } // ~Inst_DS__DS_MIN_SRC2_U32
3745
3746 // --- description from .arch file ---
3747 // 32b:
3748 // A = ADDR_BASE;
3749 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3750 // --- {offset1[6],offset1[6:0],offset0});
3751 // MEM[A] = min(MEM[A], MEM[B]).
3752 void
3754 {
3756 } // execute
3757 // --- Inst_DS__DS_MAX_SRC2_U32 class methods ---
3758
3760 : Inst_DS(iFmt, "ds_max_src2_u32")
3761 {
3762 } // Inst_DS__DS_MAX_SRC2_U32
3763
3765 {
3766 } // ~Inst_DS__DS_MAX_SRC2_U32
3767
3768 // --- description from .arch file ---
3769 // 32b:
3770 // A = ADDR_BASE;
3771 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3772 // --- {offset1[6],offset1[6:0],offset0});
3773 // MEM[A] = max(MEM[A], MEM[B]).
3774 void
3776 {
3778 } // execute
3779 // --- Inst_DS__DS_AND_SRC2_B32 class methods ---
3780
3782 : Inst_DS(iFmt, "ds_and_src2_b32")
3783 {
3784 } // Inst_DS__DS_AND_SRC2_B32
3785
3787 {
3788 } // ~Inst_DS__DS_AND_SRC2_B32
3789
3790 // --- description from .arch file ---
3791 // 32b:
3792 // A = ADDR_BASE;
3793 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3794 // --- {offset1[6],offset1[6:0],offset0});
3795 // MEM[A] = MEM[A] & MEM[B].
3796 void
3798 {
3800 } // execute
3801 // --- Inst_DS__DS_OR_SRC2_B32 class methods ---
3802
3804 : Inst_DS(iFmt, "ds_or_src2_b32")
3805 {
3806 } // Inst_DS__DS_OR_SRC2_B32
3807
3809 {
3810 } // ~Inst_DS__DS_OR_SRC2_B32
3811
3812 // --- description from .arch file ---
3813 // 32b:
3814 // A = ADDR_BASE;
3815 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3816 // --- {offset1[6],offset1[6:0],offset0});
3817 // MEM[A] = MEM[A] | MEM[B].
3818 void
3820 {
3822 } // execute
3823 // --- Inst_DS__DS_XOR_SRC2_B32 class methods ---
3824
3826 : Inst_DS(iFmt, "ds_xor_src2_b32")
3827 {
3828 } // Inst_DS__DS_XOR_SRC2_B32
3829
3831 {
3832 } // ~Inst_DS__DS_XOR_SRC2_B32
3833
3834 // --- description from .arch file ---
3835 // 32b:
3836 // A = ADDR_BASE;
3837 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3838 // --- {offset1[6],offset1[6:0],offset0});
3839 // MEM[A] = MEM[A] ^ MEM[B].
3840 void
3842 {
3844 } // execute
3845 // --- Inst_DS__DS_WRITE_SRC2_B32 class methods ---
3846
3848 : Inst_DS(iFmt, "ds_write_src2_b32")
3849 {
3850 setFlag(MemoryRef);
3851 setFlag(Store);
3852 } // Inst_DS__DS_WRITE_SRC2_B32
3853
3855 {
3856 } // ~Inst_DS__DS_WRITE_SRC2_B32
3857
3858 // --- description from .arch file ---
3859 // 32b:
3860 // A = ADDR_BASE;
3861 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3862 // --- {offset1[6],offset1[6:0],offset0});
3863 // MEM[A] = MEM[B].
3864 // Write dword.
3865 void
3867 {
3869 } // execute
3870 // --- Inst_DS__DS_MIN_SRC2_F32 class methods ---
3871
3873 : Inst_DS(iFmt, "ds_min_src2_f32")
3874 {
3875 setFlag(F32);
3876 } // Inst_DS__DS_MIN_SRC2_F32
3877
3879 {
3880 } // ~Inst_DS__DS_MIN_SRC2_F32
3881
3882 // --- description from .arch file ---
3883 // 32b:
3884 // A = ADDR_BASE;
3885 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3886 // --- {offset1[6],offset1[6:0],offset0});
3887 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
3888 // Float, handles NaN/INF/denorm.
3889 void
3891 {
3893 } // execute
3894 // --- Inst_DS__DS_MAX_SRC2_F32 class methods ---
3895
3897 : Inst_DS(iFmt, "ds_max_src2_f32")
3898 {
3899 setFlag(F32);
3900 } // Inst_DS__DS_MAX_SRC2_F32
3901
3903 {
3904 } // ~Inst_DS__DS_MAX_SRC2_F32
3905
3906 // --- description from .arch file ---
3907 // 32b:
3908 // A = ADDR_BASE;
3909 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3910 // --- {offset1[6],offset1[6:0],offset0});
3911 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
3912 // Float, handles NaN/INF/denorm.
3913 void
3915 {
3917 } // execute
3918 // --- Inst_DS__DS_ADD_SRC2_F32 class methods ---
3919
3921 : Inst_DS(iFmt, "ds_add_src2_f32")
3922 {
3923 setFlag(F32);
3924 } // Inst_DS__DS_ADD_SRC2_F32
3925
3927 {
3928 } // ~Inst_DS__DS_ADD_SRC2_F32
3929
3930 // --- description from .arch file ---
3931 // 32b:
3932 // A = ADDR_BASE;
3933 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
3934 // --- {offset1[6],offset1[6:0],offset0});
3935 // MEM[A] = MEM[B] + MEM[A].
3936 // Float, handles NaN/INF/denorm.
3937 void
3939 {
3941 } // execute
3942 // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods ---
3943
3945 InFmt_DS *iFmt)
3946 : Inst_DS(iFmt, "ds_gws_sema_release_all")
3947 {
3948 } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
3949
3951 {
3952 } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
3953
3954 // --- description from .arch file ---
3955 // GDS Only: The GWS resource (rid) indicated will process this opcode by
3956 // updating the counter and labeling the specified resource as a semaphore.
3957 // //Determine the GWS resource to work on
3958 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
3959 // //Incr the state counter of the resource
3960 // state.counter[rid] = state.wave_in_queue;
3961 // state.type = SEMAPHORE;
3962 // return rd_done; //release calling wave
3963 // This action will release ALL queued waves; it Will have no effect if no
3964 // --- waves are present.
3965 void
3970 // --- Inst_DS__DS_GWS_INIT class methods ---
3971
3973 : Inst_DS(iFmt, "ds_gws_init")
3974 {
3975 } // Inst_DS__DS_GWS_INIT
3976
3978 {
3979 } // ~Inst_DS__DS_GWS_INIT
3980
3981 // --- description from .arch file ---
3982 // GDS Only: Initialize a barrier or semaphore resource.
3983 // //Determine the GWS resource to work on
3984 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
3985 // //Get the value to use in init
3986 // index = find_first_valid(vector mask)
3987 // value = DATA[thread: index]
3988 // //Set the state of the resource
3989 // state.counter[rid] = lsb(value); //limit #waves
3990 // state.flag[rid] = 0;
3991 // return rd_done; //release calling wave
3992 void
3994 {
3996 } // execute
3997 // --- Inst_DS__DS_GWS_SEMA_V class methods ---
3998
4000 : Inst_DS(iFmt, "ds_gws_sema_v")
4001 {
4002 } // Inst_DS__DS_GWS_SEMA_V
4003
4005 {
4006 } // ~Inst_DS__DS_GWS_SEMA_V
4007
4008 // --- description from .arch file ---
4009 // GDS Only: The GWS resource indicated will process this opcode by
4010 // updating the counter and labeling the resource as a semaphore.
4011 // //Determine the GWS resource to work on
4012 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4013 // //Incr the state counter of the resource
4014 // state.counter[rid]++;
4015 // state.type = SEMAPHORE;
4016 // return rd_done; //release calling wave
4017 // This action will release one waved if any are queued in this resource.
4018 void
4020 {
4022 } // execute
4023 // --- Inst_DS__DS_GWS_SEMA_BR class methods ---
4024
4026 : Inst_DS(iFmt, "ds_gws_sema_br")
4027 {
4028 } // Inst_DS__DS_GWS_SEMA_BR
4029
4031 {
4032 } // ~Inst_DS__DS_GWS_SEMA_BR
4033
4034 // --- description from .arch file ---
4035 // GDS Only: The GWS resource indicated will process this opcode by
4036 // updating the counter by the bulk release delivered count and labeling
4037 // the resource as a semaphore.
4038 // //Determine the GWS resource to work on
4039 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4040 // index = find first valid (vector mask)
4041 // count = DATA[thread: index];
4042 // //Add count to the resource state counter
4043 // state.counter[rid] += count;
4044 // state.type = SEMAPHORE;
4045 // return rd_done; //release calling wave
4046 // This action will release count number of waves, immediately if queued,
4047 // or as they arrive from the noted resource.
4048 void
4050 {
4052 } // execute
4053 // --- Inst_DS__DS_GWS_SEMA_P class methods ---
4054
4056 : Inst_DS(iFmt, "ds_gws_sema_p")
4057 {
4058 } // Inst_DS__DS_GWS_SEMA_P
4059
4061 {
4062 } // ~Inst_DS__DS_GWS_SEMA_P
4063
4064 // --- description from .arch file ---
4065 // GDS Only: The GWS resource indicated will process this opcode by
4066 // queueing it until counter enables a release and then decrementing the
4067 // counter of the resource as a semaphore.
4068 // //Determine the GWS resource to work on
4069 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0];
4070 // state.type = SEMAPHORE;
4071 // ENQUEUE until(state[rid].counter > 0)
4072 // state[rid].counter--;
4073 // return rd_done
4074 void
4076 {
4078 } // execute
4079 // --- Inst_DS__DS_GWS_BARRIER class methods ---
4080
4082 : Inst_DS(iFmt, "ds_gws_barrier")
4083 {
4084 } // Inst_DS__DS_GWS_BARRIER
4085
4087 {
4088 } // ~Inst_DS__DS_GWS_BARRIER
4089
4090 // --- description from .arch file ---
4091 // GDS Only: The GWS resource indicated will process this opcode by
4092 // queueing it until barrier is satisfied. The number of waves needed is
4093 // passed in as DATA of first valid thread.
4094 // //Determine the GWS resource to work on
4095 // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0];
4096 // index = find first valid (vector mask);
4097 // value = DATA[thread: index];
4098 // // Input Decision Machine
4099 // state.type[rid] = BARRIER;
4100 // if (state[rid].counter <= 0) {
4101 // thread[rid].flag = state[rid].flag;
4102 // ENQUEUE;
4103 // state[rid].flag = !state.flag;
4104 // state[rid].counter = value;
4105 // return rd_done;
4106 // } else {
4107 // state[rid].counter--;
4108 // thread.flag = state[rid].flag;
4109 // ENQUEUE;
4110 // }
4111 // Since the waves deliver the count for the next barrier, this function
4112 // can have a different size barrier for each occurrence.
4113 // // Release Machine
4114 // if (state.type == BARRIER) {
4115 // if (state.flag != thread.flag) {
4116 // return rd_done;
4117 // }
4118 // }
4119 void
4121 {
4123 } // execute
4124 // --- Inst_DS__DS_CONSUME class methods ---
4125
4127 : Inst_DS(iFmt, "ds_consume")
4128 {
4129 } // Inst_DS__DS_CONSUME
4130
4132 {
4133 } // ~Inst_DS__DS_CONSUME
4134
4135 // --- description from .arch file ---
4136 // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS
4137 // memory at (M0.base + instr_offset). Return the pre-operation value to
4138 // VGPRs.
4139 void
4141 {
4143 } // execute
4144 // --- Inst_DS__DS_APPEND class methods ---
4145
4147 : Inst_DS(iFmt, "ds_append")
4148 {
4149 } // Inst_DS__DS_APPEND
4150
4152 {
4153 } // ~Inst_DS__DS_APPEND
4154
4155 // --- description from .arch file ---
4156 // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory
4157 // at (M0.base + instr_offset). Return the pre-operation value to VGPRs.
4158 void
4160 {
4162 } // execute
4163 // --- Inst_DS__DS_ORDERED_COUNT class methods ---
4164
4166 : Inst_DS(iFmt, "ds_ordered_count")
4167 {
4168 } // Inst_DS__DS_ORDERED_COUNT
4169
4171 {
4172 } // ~Inst_DS__DS_ORDERED_COUNT
4173
4174 // --- description from .arch file ---
4175 // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated
4176 // ordered-count counters (aka 'packers'). Additional bits of instr.offset
4177 // field are overloaded to hold packer-id, 'last'.
4178 void
4180 {
4182 } // execute
4183 // --- Inst_DS__DS_ADD_SRC2_U64 class methods ---
4184
4186 : Inst_DS(iFmt, "ds_add_src2_u64")
4187 {
4188 } // Inst_DS__DS_ADD_SRC2_U64
4189
4191 {
4192 } // ~Inst_DS__DS_ADD_SRC2_U64
4193
4194 // --- description from .arch file ---
4195 // 64b:
4196 // A = ADDR_BASE;
4197 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4198 // --- {offset1[6],offset1[6:0],offset0});
4199 // MEM[A] = MEM[A] + MEM[B].
4200 void
4202 {
4204 } // execute
4205 // --- Inst_DS__DS_SUB_SRC2_U64 class methods ---
4206
4208 : Inst_DS(iFmt, "ds_sub_src2_u64")
4209 {
4210 } // Inst_DS__DS_SUB_SRC2_U64
4211
4213 {
4214 } // ~Inst_DS__DS_SUB_SRC2_U64
4215
4216 // --- description from .arch file ---
4217 // 64b:
4218 // A = ADDR_BASE;
4219 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4220 // --- {offset1[6],offset1[6:0],offset0});
4221 // MEM[A] = MEM[A] - MEM[B].
4222 void
4224 {
4226 } // execute
4227 // --- Inst_DS__DS_RSUB_SRC2_U64 class methods ---
4228
4230 : Inst_DS(iFmt, "ds_rsub_src2_u64")
4231 {
4232 } // Inst_DS__DS_RSUB_SRC2_U64
4233
4235 {
4236 } // ~Inst_DS__DS_RSUB_SRC2_U64
4237
4238 // --- description from .arch file ---
4239 // 64b:
4240 // A = ADDR_BASE;
4241 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4242 // --- {offset1[6],offset1[6:0],offset0});
4243 // MEM[A] = MEM[B] - MEM[A].
4244 void
4246 {
4248 } // execute
4249 // --- Inst_DS__DS_INC_SRC2_U64 class methods ---
4250
4252 : Inst_DS(iFmt, "ds_inc_src2_u64")
4253 {
4254 } // Inst_DS__DS_INC_SRC2_U64
4255
4257 {
4258 } // ~Inst_DS__DS_INC_SRC2_U64
4259
4260 // --- description from .arch file ---
4261 // 64b:
4262 // A = ADDR_BASE;
4263 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4264 // --- {offset1[6],offset1[6:0],offset0});
4265 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
4266 void
4268 {
4270 } // execute
4271 // --- Inst_DS__DS_DEC_SRC2_U64 class methods ---
4272
4274 : Inst_DS(iFmt, "ds_dec_src2_u64")
4275 {
4276 } // Inst_DS__DS_DEC_SRC2_U64
4277
4279 {
4280 } // ~Inst_DS__DS_DEC_SRC2_U64
4281
4282 // --- description from .arch file ---
4283 // 64b:
4284 // A = ADDR_BASE;
4285 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4286 // --- {offset1[6],offset1[6:0],offset0});
4287 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
4288 // Uint decrement.
4289 void
4291 {
4293 } // execute
4294 // --- Inst_DS__DS_MIN_SRC2_I64 class methods ---
4295
4297 : Inst_DS(iFmt, "ds_min_src2_i64")
4298 {
4299 } // Inst_DS__DS_MIN_SRC2_I64
4300
4302 {
4303 } // ~Inst_DS__DS_MIN_SRC2_I64
4304
4305 // --- description from .arch file ---
4306 // 64b:
4307 // A = ADDR_BASE;
4308 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4309 // --- {offset1[6],offset1[6:0],offset0});
4310 // MEM[A] = min(MEM[A], MEM[B]).
4311 void
4313 {
4315 } // execute
4316 // --- Inst_DS__DS_MAX_SRC2_I64 class methods ---
4317
4319 : Inst_DS(iFmt, "ds_max_src2_i64")
4320 {
4321 } // Inst_DS__DS_MAX_SRC2_I64
4322
4324 {
4325 } // ~Inst_DS__DS_MAX_SRC2_I64
4326
4327 // --- description from .arch file ---
4328 // 64b:
4329 // A = ADDR_BASE;
4330 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4331 // --- {offset1[6],offset1[6:0],offset0});
4332 // MEM[A] = max(MEM[A], MEM[B]).
4333 void
4335 {
4337 } // execute
4338 // --- Inst_DS__DS_MIN_SRC2_U64 class methods ---
4339
4341 : Inst_DS(iFmt, "ds_min_src2_u64")
4342 {
4343 } // Inst_DS__DS_MIN_SRC2_U64
4344
4346 {
4347 } // ~Inst_DS__DS_MIN_SRC2_U64
4348
4349 // --- description from .arch file ---
4350 // 64b:
4351 // A = ADDR_BASE;
4352 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4353 // --- {offset1[6],offset1[6:0],offset0});
4354 // MEM[A] = min(MEM[A], MEM[B]).
4355 void
4357 {
4359 } // execute
4360 // --- Inst_DS__DS_MAX_SRC2_U64 class methods ---
4361
4363 : Inst_DS(iFmt, "ds_max_src2_u64")
4364 {
4365 } // Inst_DS__DS_MAX_SRC2_U64
4366
4368 {
4369 } // ~Inst_DS__DS_MAX_SRC2_U64
4370
4371 // --- description from .arch file ---
4372 // 64b:
4373 // A = ADDR_BASE;
4374 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4375 // --- {offset1[6],offset1[6:0],offset0});
4376 // MEM[A] = max(MEM[A], MEM[B]).
4377 void
4379 {
4381 } // execute
4382 // --- Inst_DS__DS_AND_SRC2_B64 class methods ---
4383
4385 : Inst_DS(iFmt, "ds_and_src2_b64")
4386 {
4387 } // Inst_DS__DS_AND_SRC2_B64
4388
4390 {
4391 } // ~Inst_DS__DS_AND_SRC2_B64
4392
4393 // --- description from .arch file ---
4394 // 64b:
4395 // A = ADDR_BASE;
4396 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4397 // --- {offset1[6],offset1[6:0],offset0});
4398 // MEM[A] = MEM[A] & MEM[B].
4399 void
4401 {
4403 } // execute
4404 // --- Inst_DS__DS_OR_SRC2_B64 class methods ---
4405
4407 : Inst_DS(iFmt, "ds_or_src2_b64")
4408 {
4409 } // Inst_DS__DS_OR_SRC2_B64
4410
4412 {
4413 } // ~Inst_DS__DS_OR_SRC2_B64
4414
4415 // --- description from .arch file ---
4416 // 64b:
4417 // A = ADDR_BASE;
4418 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4419 // --- {offset1[6],offset1[6:0],offset0});
4420 // MEM[A] = MEM[A] | MEM[B].
4421 void
4423 {
4425 } // execute
4426 // --- Inst_DS__DS_XOR_SRC2_B64 class methods ---
4427
4429 : Inst_DS(iFmt, "ds_xor_src2_b64")
4430 {
4431 } // Inst_DS__DS_XOR_SRC2_B64
4432
4434 {
4435 } // ~Inst_DS__DS_XOR_SRC2_B64
4436
4437 // --- description from .arch file ---
4438 // 64b:
4439 // A = ADDR_BASE;
4440 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4441 // --- {offset1[6],offset1[6:0],offset0});
4442 // MEM[A] = MEM[A] ^ MEM[B].
4443 void
4445 {
4447 } // execute
4448 // --- Inst_DS__DS_WRITE_SRC2_B64 class methods ---
4449
4451 : Inst_DS(iFmt, "ds_write_src2_b64")
4452 {
4453 setFlag(MemoryRef);
4454 setFlag(Store);
4455 } // Inst_DS__DS_WRITE_SRC2_B64
4456
4458 {
4459 } // ~Inst_DS__DS_WRITE_SRC2_B64
4460
4461 // --- description from .arch file ---
4462 // 64b:
4463 // A = ADDR_BASE;
4464 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4465 // --- {offset1[6],offset1[6:0],offset0});
4466 // MEM[A] = MEM[B].
4467 // Write qword.
4468 void
4470 {
4472 } // execute
4473 // --- Inst_DS__DS_MIN_SRC2_F64 class methods ---
4474
4476 : Inst_DS(iFmt, "ds_min_src2_f64")
4477 {
4478 setFlag(F64);
4479 } // Inst_DS__DS_MIN_SRC2_F64
4480
4482 {
4483 } // ~Inst_DS__DS_MIN_SRC2_F64
4484
4485 // --- description from .arch file ---
4486 // 64b:
4487 // A = ADDR_BASE;
4488 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4489 // --- {offset1[6],offset1[6:0],offset0});
4490 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
4491 // Float, handles NaN/INF/denorm.
4492 void
4494 {
4496 } // execute
4497 // --- Inst_DS__DS_MAX_SRC2_F64 class methods ---
4498
4500 : Inst_DS(iFmt, "ds_max_src2_f64")
4501 {
4502 setFlag(F64);
4503 } // Inst_DS__DS_MAX_SRC2_F64
4504
4506 {
4507 } // ~Inst_DS__DS_MAX_SRC2_F64
4508
4509 // --- description from .arch file ---
4510 // 64b:
4511 // A = ADDR_BASE;
4512 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
4513 // --- {offset1[6],offset1[6:0],offset0});
4514 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
4515 // Float, handles NaN/INF/denorm.
4516 void
4518 {
4520 } // execute
4521 // --- Inst_DS__DS_WRITE_B96 class methods ---
4522
4524 : Inst_DS(iFmt, "ds_write_b96")
4525 {
4526 setFlag(MemoryRef);
4527 setFlag(Store);
4528 } // Inst_DS__DS_WRITE_B96
4529
4531 {
4532 } // ~Inst_DS__DS_WRITE_B96
4533
4534 // --- description from .arch file ---
4535 // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
4536 // Tri-dword write.
4537 void
4539 {
4540 Wavefront *wf = gpuDynInst->wavefront();
4541 gpuDynInst->execUnitId = wf->execUnitId;
4542 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4543 gpuDynInst->latency.set(
4544 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4545 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4546 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
4547 ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
4548 ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
4549
4550 addr.read();
4551 data0.read();
4552 data1.read();
4553 data2.read();
4554
4555 calcAddr(gpuDynInst, addr);
4556
4557 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4558 if (gpuDynInst->exec_mask[lane]) {
4559 (reinterpret_cast<VecElemU32*>(
4560 gpuDynInst->d_data))[lane * 4] = data0[lane];
4561 (reinterpret_cast<VecElemU32*>(
4562 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
4563 (reinterpret_cast<VecElemU32*>(
4564 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
4565 }
4566 }
4567
4568 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4569 } // execute
4570
4571 void
4573 {
4574 Addr offset0 = instData.OFFSET0;
4575 Addr offset1 = instData.OFFSET1;
4576 Addr offset = (offset1 << 8) | offset0;
4577
4578 initMemWrite<3>(gpuDynInst, offset);
4579 } // initiateAcc
4580
4581 void
4583 {
4584 } // completeAcc
4585 // --- Inst_DS__DS_WRITE_B128 class methods ---
4586
4588 : Inst_DS(iFmt, "ds_write_b128")
4589 {
4590 setFlag(MemoryRef);
4591 setFlag(Store);
4592 } // Inst_DS__DS_WRITE_B128
4593
4595 {
4596 } // ~Inst_DS__DS_WRITE_B128
4597
4598 // --- description from .arch file ---
4599 // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
4600 // Qword write.
4601 void
4603 {
4604 Wavefront *wf = gpuDynInst->wavefront();
4605 gpuDynInst->execUnitId = wf->execUnitId;
4606 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4607 gpuDynInst->latency.set(
4608 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4609 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4610 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
4611 ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1);
4612 ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2);
4613 ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3);
4614
4615 addr.read();
4616 data0.read();
4617 data1.read();
4618 data2.read();
4619 data3.read();
4620
4621 calcAddr(gpuDynInst, addr);
4622
4623 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4624 if (gpuDynInst->exec_mask[lane]) {
4625 (reinterpret_cast<VecElemU32*>(
4626 gpuDynInst->d_data))[lane * 4] = data0[lane];
4627 (reinterpret_cast<VecElemU32*>(
4628 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
4629 (reinterpret_cast<VecElemU32*>(
4630 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
4631 (reinterpret_cast<VecElemU32*>(
4632 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
4633 }
4634 }
4635
4636 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4637 } // execute
4638
4639 void
4641 {
4642 Addr offset0 = instData.OFFSET0;
4643 Addr offset1 = instData.OFFSET1;
4644 Addr offset = (offset1 << 8) | offset0;
4645
4646 initMemWrite<4>(gpuDynInst, offset);
4647 } // initiateAcc
4648
4649 void
4651 {
4652 } // completeAcc
4653 // --- Inst_DS__DS_READ_B96 class methods ---
4654
4656 : Inst_DS(iFmt, "ds_read_b96")
4657 {
4658 setFlag(MemoryRef);
4659 setFlag(Load);
4660 } // Inst_DS__DS_READ_B96
4661
4663 {
4664 } // ~Inst_DS__DS_READ_B96
4665
4666 // --- description from .arch file ---
4667 // Tri-dword read.
4668 void
4670 {
4671 Wavefront *wf = gpuDynInst->wavefront();
4672 gpuDynInst->execUnitId = wf->execUnitId;
4673 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4674 gpuDynInst->latency.set(
4675 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4676 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4677
4678 addr.read();
4679
4680 calcAddr(gpuDynInst, addr);
4681
4682 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4683 } // execute
4684
4685 void
4687 {
4688 Addr offset0 = instData.OFFSET0;
4689 Addr offset1 = instData.OFFSET1;
4690 Addr offset = (offset1 << 8) | offset0;
4691
4692 initMemRead<3>(gpuDynInst, offset);
4693 }
4694
4695 void
4697 {
4698 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
4699 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
4700 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
4701
4702 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4703 if (gpuDynInst->exec_mask[lane]) {
4704 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
4705 gpuDynInst->d_data))[lane * 4];
4706 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
4707 gpuDynInst->d_data))[lane * 4 + 1];
4708 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
4709 gpuDynInst->d_data))[lane * 4 + 2];
4710 }
4711 }
4712
4713 vdst0.write();
4714 vdst1.write();
4715 vdst2.write();
4716 }
4717 // --- Inst_DS__DS_READ_B128 class methods ---
4718
4720 : Inst_DS(iFmt, "ds_read_b128")
4721 {
4722 setFlag(MemoryRef);
4723 setFlag(Load);
4724 } // Inst_DS__DS_READ_B128
4725
4727 {
4728 } // ~Inst_DS__DS_READ_B128
4729
4730 // --- description from .arch file ---
4731 // Qword read.
4732 void
4734 {
4735 Wavefront *wf = gpuDynInst->wavefront();
4736 gpuDynInst->execUnitId = wf->execUnitId;
4737 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4738 gpuDynInst->latency.set(
4739 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
4740 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
4741
4742 addr.read();
4743
4744 calcAddr(gpuDynInst, addr);
4745
4746 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
4747 } // execute
4748
4749 void
4751 {
4752 Addr offset0 = instData.OFFSET0;
4753 Addr offset1 = instData.OFFSET1;
4754 Addr offset = (offset1 << 8) | offset0;
4755
4756 initMemRead<4>(gpuDynInst, offset);
4757 } // initiateAcc
4758
4759 void
4761 {
4762 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
4763 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
4764 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
4765 VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
4766
4767 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4768 if (gpuDynInst->exec_mask[lane]) {
4769 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
4770 gpuDynInst->d_data))[lane * 4];
4771 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
4772 gpuDynInst->d_data))[lane * 4 + 1];
4773 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
4774 gpuDynInst->d_data))[lane * 4 + 2];
4775 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
4776 gpuDynInst->d_data))[lane * 4 + 3];
4777 }
4778 }
4779
4780 vdst0.write();
4781 vdst1.write();
4782 vdst2.write();
4783 vdst3.write();
4784 } // completeAcc
4785} // namespace VegaISA
4786} // namespace gem5
const char data[]
std::vector< VectorRegisterFile * > vrf
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79
void setFlag(Flags flag)
Nop class.
Definition nop.hh:49
void execute(GPUDynInstPtr) override
Definition ds.cc:743
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:775
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:785
void execute(GPUDynInstPtr) override
Definition ds.cc:1451
void execute(GPUDynInstPtr) override
Definition ds.cc:991
void execute(GPUDynInstPtr) override
Definition ds.cc:2942
void execute(GPUDynInstPtr) override
Definition ds.cc:3938
void execute(GPUDynInstPtr) override
Definition ds.cc:3598
void execute(GPUDynInstPtr) override
Definition ds.cc:4201
Inst_DS__DS_ADD_U32(InFmt_DS *)
Definition ds.cc:41
void execute(GPUDynInstPtr) override
Definition ds.cc:58
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:100
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:90
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:2365
void execute(GPUDynInstPtr) override
Definition ds.cc:2323
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:2355
void execute(GPUDynInstPtr) override
Definition ds.cc:289
void execute(GPUDynInstPtr) override
Definition ds.cc:2555
void execute(GPUDynInstPtr) override
Definition ds.cc:1181
void execute(GPUDynInstPtr) override
Definition ds.cc:3133
void execute(GPUDynInstPtr) override
Definition ds.cc:3797
void execute(GPUDynInstPtr) override
Definition ds.cc:4400
void execute(GPUDynInstPtr) override
Definition ds.cc:4159
void execute(GPUDynInstPtr) override
Definition ds.cc:2236
void execute(GPUDynInstPtr) override
Definition ds.cc:620
void execute(GPUDynInstPtr) override
Definition ds.cc:2842
void execute(GPUDynInstPtr) override
Definition ds.cc:647
void execute(GPUDynInstPtr) override
Definition ds.cc:2869
void execute(GPUDynInstPtr) override
Definition ds.cc:1329
void execute(GPUDynInstPtr) override
Definition ds.cc:3281
void execute(GPUDynInstPtr) override
Definition ds.cc:1356
void execute(GPUDynInstPtr) override
Definition ds.cc:3308
void execute(GPUDynInstPtr) override
Definition ds.cc:3576
void execute(GPUDynInstPtr) override
Definition ds.cc:4140
void execute(GPUDynInstPtr) override
Definition ds.cc:1076
void execute(GPUDynInstPtr) override
Definition ds.cc:3028
void execute(GPUDynInstPtr) override
Definition ds.cc:3687
void execute(GPUDynInstPtr) override
Definition ds.cc:4290
void execute(GPUDynInstPtr) override
Definition ds.cc:184
void execute(GPUDynInstPtr) override
Definition ds.cc:2450
void execute(GPUDynInstPtr) override
Definition ds.cc:4120
void execute(GPUDynInstPtr) override
Definition ds.cc:3993
void execute(GPUDynInstPtr) override
Definition ds.cc:4049
void execute(GPUDynInstPtr) override
Definition ds.cc:4075
void execute(GPUDynInstPtr) override
Definition ds.cc:3966
void execute(GPUDynInstPtr) override
Definition ds.cc:4019
void execute(GPUDynInstPtr) override
Definition ds.cc:1055
void execute(GPUDynInstPtr) override
Definition ds.cc:3006
void execute(GPUDynInstPtr) override
Definition ds.cc:3664
void execute(GPUDynInstPtr) override
Definition ds.cc:4267
void execute(GPUDynInstPtr) override
Definition ds.cc:163
void execute(GPUDynInstPtr) override
Definition ds.cc:2428
void execute(GPUDynInstPtr) override
Definition ds.cc:699
void execute(GPUDynInstPtr) override
Definition ds.cc:2921
void execute(GPUDynInstPtr) override
Definition ds.cc:226
void execute(GPUDynInstPtr) override
Definition ds.cc:2492
void execute(GPUDynInstPtr) override
Definition ds.cc:1408
void execute(GPUDynInstPtr) override
Definition ds.cc:3360
void execute(GPUDynInstPtr) override
Definition ds.cc:1118
void execute(GPUDynInstPtr) override
Definition ds.cc:3070
void execute(GPUDynInstPtr) override
Definition ds.cc:1160
void execute(GPUDynInstPtr) override
Definition ds.cc:3112
void execute(GPUDynInstPtr) override
Definition ds.cc:3914
void execute(GPUDynInstPtr) override
Definition ds.cc:4517
void execute(GPUDynInstPtr) override
Definition ds.cc:3731
void execute(GPUDynInstPtr) override
Definition ds.cc:4334
void execute(GPUDynInstPtr) override
Definition ds.cc:3775
void execute(GPUDynInstPtr) override
Definition ds.cc:4378
void execute(GPUDynInstPtr) override
Definition ds.cc:268
void execute(GPUDynInstPtr) override
Definition ds.cc:2534
void execute(GPUDynInstPtr) override
Definition ds.cc:673
void execute(GPUDynInstPtr) override
Definition ds.cc:2895
void execute(GPUDynInstPtr) override
Definition ds.cc:205
void execute(GPUDynInstPtr) override
Definition ds.cc:2471
void execute(GPUDynInstPtr) override
Definition ds.cc:1382
void execute(GPUDynInstPtr) override
Definition ds.cc:3334
void execute(GPUDynInstPtr) override
Definition ds.cc:1097
void execute(GPUDynInstPtr) override
Definition ds.cc:3049
void execute(GPUDynInstPtr) override
Definition ds.cc:1139
void execute(GPUDynInstPtr) override
Definition ds.cc:3091
void execute(GPUDynInstPtr) override
Definition ds.cc:3890
void execute(GPUDynInstPtr) override
Definition ds.cc:4493
void execute(GPUDynInstPtr) override
Definition ds.cc:3709
void execute(GPUDynInstPtr) override
Definition ds.cc:4312
void execute(GPUDynInstPtr) override
Definition ds.cc:3753
void execute(GPUDynInstPtr) override
Definition ds.cc:4356
void execute(GPUDynInstPtr) override
Definition ds.cc:247
void execute(GPUDynInstPtr) override
Definition ds.cc:2513
void execute(GPUDynInstPtr) override
Definition ds.cc:397
void execute(GPUDynInstPtr) override
Definition ds.cc:2619
void execute(GPUDynInstPtr) override
Definition ds.cc:1245
void execute(GPUDynInstPtr) override
Definition ds.cc:3197
Inst_DS__DS_NOP(InFmt_DS *)
Definition ds.cc:705
void execute(GPUDynInstPtr) override
Definition ds.cc:718
void execute(GPUDynInstPtr) override
Definition ds.cc:4179
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:354
void execute(GPUDynInstPtr) override
Definition ds.cc:312
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition ds.cc:344
void execute(GPUDynInstPtr) override
Definition ds.cc:2576
void execute(GPUDynInstPtr) override
Definition ds.cc:1202
void execute(GPUDynInstPtr) override
Definition ds.cc:3154
void execute(GPUDynInstPtr) override
Definition ds.cc:3819
void execute(GPUDynInstPtr) override
Definition ds.cc:4422
void execute(GPUDynInstPtr) override
Definition ds.cc:2148
void execute(GPUDynInstPtr) override
Definition ds.cc:1603
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1635
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1626
void execute(GPUDynInstPtr) override
Definition ds.cc:3512
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3535
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3544
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1568
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1559
void execute(GPUDynInstPtr) override
Definition ds.cc:1536
void execute(GPUDynInstPtr) override
Definition ds.cc:3445
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3468
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3477
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4760
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4750
void execute(GPUDynInstPtr) override
Definition ds.cc:4733
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1505
void execute(GPUDynInstPtr) override
Definition ds.cc:1472
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1495
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:3414
void execute(GPUDynInstPtr) override
Definition ds.cc:3381
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:3404
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4686
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4696
void execute(GPUDynInstPtr) override
Definition ds.cc:4669
void execute(GPUDynInstPtr) override
Definition ds.cc:1795
void execute(GPUDynInstPtr) override
Definition ds.cc:1669
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1702
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1692
void execute(GPUDynInstPtr) override
Definition ds.cc:1943
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1965
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1975
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1911
void execute(GPUDynInstPtr) override
Definition ds.cc:1879
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1901
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1848
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1838
void execute(GPUDynInstPtr) override
Definition ds.cc:1816
void execute(GPUDynInstPtr) override
Definition ds.cc:1732
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:1765
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:1755
void execute(GPUDynInstPtr) override
Definition ds.cc:1034
void execute(GPUDynInstPtr) override
Definition ds.cc:2985
void execute(GPUDynInstPtr) override
Definition ds.cc:3642
void execute(GPUDynInstPtr) override
Definition ds.cc:4245
void execute(GPUDynInstPtr) override
Definition ds.cc:142
void execute(GPUDynInstPtr) override
Definition ds.cc:2407
void execute(GPUDynInstPtr) override
Definition ds.cc:1012
void execute(GPUDynInstPtr) override
Definition ds.cc:2963
void execute(GPUDynInstPtr) override
Definition ds.cc:3620
void execute(GPUDynInstPtr) override
Definition ds.cc:4223
void execute(GPUDynInstPtr) override
Definition ds.cc:120
void execute(GPUDynInstPtr) override
Definition ds.cc:2385
void execute(GPUDynInstPtr) override
Definition ds.cc:2012
void execute(GPUDynInstPtr) override
Definition ds.cc:1428
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:586
void execute(GPUDynInstPtr) override
Definition ds.cc:550
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:595
void execute(GPUDynInstPtr) override
Definition ds.cc:2772
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2817
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2808
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:519
void execute(GPUDynInstPtr) override
Definition ds.cc:483
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:528
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2741
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2750
void execute(GPUDynInstPtr) override
Definition ds.cc:2705
void execute(GPUDynInstPtr) override
Definition ds.cc:4602
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4640
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4650
void execute(GPUDynInstPtr) override
Definition ds.cc:929
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:971
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:961
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:451
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:461
void execute(GPUDynInstPtr) override
Definition ds.cc:419
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:2673
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:2683
void execute(GPUDynInstPtr) override
Definition ds.cc:2641
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:909
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:899
void execute(GPUDynInstPtr) override
Definition ds.cc:867
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:837
void execute(GPUDynInstPtr) override
Definition ds.cc:805
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:847
void initiateAcc(GPUDynInstPtr) override
Definition ds.cc:4572
void completeAcc(GPUDynInstPtr) override
Definition ds.cc:4582
void execute(GPUDynInstPtr) override
Definition ds.cc:4538
void execute(GPUDynInstPtr) override
Definition ds.cc:3866
void execute(GPUDynInstPtr) override
Definition ds.cc:4469
void execute(GPUDynInstPtr) override
Definition ds.cc:1303
void execute(GPUDynInstPtr) override
Definition ds.cc:3255
void execute(GPUDynInstPtr) override
Definition ds.cc:1284
void execute(GPUDynInstPtr) override
Definition ds.cc:3236
void execute(GPUDynInstPtr) override
Definition ds.cc:1266
void execute(GPUDynInstPtr) override
Definition ds.cc:3218
void execute(GPUDynInstPtr) override
Definition ds.cc:375
void execute(GPUDynInstPtr) override
Definition ds.cc:2597
void execute(GPUDynInstPtr) override
Definition ds.cc:1223
void execute(GPUDynInstPtr) override
Definition ds.cc:3175
void execute(GPUDynInstPtr) override
Definition ds.cc:3841
void execute(GPUDynInstPtr) override
Definition ds.cc:4444
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
void initAtomicAccess(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void read() override
read from the vrf.
Definition operand.hh:147
void write() override
write to the vrf.
Definition operand.hh:199
const int simdId
Definition wavefront.hh:101
ComputeUnit * computeUnit
Definition wavefront.hh:108
void decLGKMInstsIssued()
VectorMask & execMask()
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:129
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214
Bitfield< 23, 0 > offset
Definition types.hh:144
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
Bitfield< 30, 0 > index
uint16_t VecElemU16
uint32_t VecElemU32
uint64_t VecElemU64
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

Generated on Tue Jun 18 2024 16:23:41 for gem5 by doxygen 1.11.0