gem5 v24.0.0.0
Loading...
Searching...
No Matches
sop2.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34namespace gem5
35{
36
37namespace VegaISA
38{
39 // --- Inst_SOP2__S_ADD_U32 class methods ---
40
42 : Inst_SOP2(iFmt, "s_add_u32")
43 {
44 setFlag(ALU);
45 } // Inst_SOP2__S_ADD_U32
46
48 {
49 } // ~Inst_SOP2__S_ADD_U32
50
51 // --- description from .arch file ---
52 // D.u = S0.u + S1.u;
53 // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned
54 // --- overflow/carry-out for S_ADDC_U32.
55 void
57 {
58 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
59 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
60 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
61 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
62
63 src0.read();
64 src1.read();
65
66 sdst = src0.rawData() + src1.rawData();
67 scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
68 >= 0x100000000ULL ? 1 : 0;
69
70 sdst.write();
71 scc.write();
72 } // execute
73 // --- Inst_SOP2__S_SUB_U32 class methods ---
74
76 : Inst_SOP2(iFmt, "s_sub_u32")
77 {
78 setFlag(ALU);
79 } // Inst_SOP2__S_SUB_U32
80
82 {
83 } // ~Inst_SOP2__S_SUB_U32
84
85 // --- description from .arch file ---
86 // D.u = S0.u - S1.u;
87 // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for
88 // --- S_SUBB_U32.
89 void
91 {
92 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
93 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
94 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
95 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
96
97 src0.read();
98 src1.read();
99
100 sdst = src0.rawData() - src1.rawData();
101 scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
102
103 sdst.write();
104 scc.write();
105 } // execute
106 // --- Inst_SOP2__S_ADD_I32 class methods ---
107
109 : Inst_SOP2(iFmt, "s_add_i32")
110 {
111 setFlag(ALU);
112 } // Inst_SOP2__S_ADD_I32
113
115 {
116 } // ~Inst_SOP2__S_ADD_I32
117
118 // --- description from .arch file ---
119 // D.i = S0.i + S1.i;
120 // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
121 // overflow.
122 // This opcode is not suitable for use with S_ADDC_U32 for implementing
123 // 64-bit operations.
124 void
126 {
127 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
128 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
129 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
130 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
131
132 src0.read();
133 src1.read();
134
135 sdst = src0.rawData() + src1.rawData();
136 scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
137 && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
138 ? 1 : 0;
139
140 sdst.write();
141 scc.write();
142 } // execute
143 // --- Inst_SOP2__S_SUB_I32 class methods ---
144
146 : Inst_SOP2(iFmt, "s_sub_i32")
147 {
148 setFlag(ALU);
149 } // Inst_SOP2__S_SUB_I32
150
152 {
153 } // ~Inst_SOP2__S_SUB_I32
154
155 // --- description from .arch file ---
156 // D.i = S0.i - S1.i;
157 // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
158 // overflow.
159 // CAUTION: The condition code behaviour for this opcode is inconsistent
160 // with V_SUB_I32; see V_SUB_I32 for further details.
161 // This opcode is not suitable for use with S_SUBB_U32 for implementing
162 // 64-bit operations.
163 void
165 {
166 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
167 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
168 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
169 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
170
171 src0.read();
172 src1.read();
173
174 sdst = src0.rawData() - src1.rawData();
175 scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
176 && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
177
178 sdst.write();
179 scc.write();
180 } // execute
181 // --- Inst_SOP2__S_ADDC_U32 class methods ---
182
184 : Inst_SOP2(iFmt, "s_addc_u32")
185 {
186 setFlag(ALU);
187 } // Inst_SOP2__S_ADDC_U32
188
190 {
191 } // ~Inst_SOP2__S_ADDC_U32
192
193 // --- description from .arch file ---
194 // D.u = S0.u + S1.u + SCC;
195 // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned
196 // overflow.
197 void
199 {
200 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
201 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
202 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
203 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
204
205 src0.read();
206 src1.read();
207 scc.read();
208
209 sdst = src0.rawData() + src1.rawData() + scc.rawData();
210 scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
211 + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
212
213 sdst.write();
214 scc.write();
215 } // execute
216 // --- Inst_SOP2__S_SUBB_U32 class methods ---
217
219 : Inst_SOP2(iFmt, "s_subb_u32")
220 {
221 setFlag(ALU);
222 } // Inst_SOP2__S_SUBB_U32
223
225 {
226 } // ~Inst_SOP2__S_SUBB_U32
227
228 // --- description from .arch file ---
229 // D.u = S0.u - S1.u - SCC;
230 // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
231 void
233 {
234 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
235 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
236 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
237 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
238
239 src0.read();
240 src1.read();
241 scc.read();
242
243 sdst = src0.rawData() - src1.rawData() - scc.rawData();
244 scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
245
246 sdst.write();
247 scc.write();
248 } // execute
249 // --- Inst_SOP2__S_MIN_I32 class methods ---
250
252 : Inst_SOP2(iFmt, "s_min_i32")
253 {
254 setFlag(ALU);
255 } // Inst_SOP2__S_MIN_I32
256
258 {
259 } // ~Inst_SOP2__S_MIN_I32
260
261 // --- description from .arch file ---
262 // D.i = (S0.i < S1.i) ? S0.i : S1.i;
263 // SCC = 1 if S0 is chosen as the minimum value.
264 void
266 {
267 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
268 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
269 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
270 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
271
272 src0.read();
273 src1.read();
274
275 sdst = std::min(src0.rawData(), src1.rawData());
276 scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
277
278 sdst.write();
279 scc.write();
280 } // execute
281 // --- Inst_SOP2__S_MIN_U32 class methods ---
282
284 : Inst_SOP2(iFmt, "s_min_u32")
285 {
286 setFlag(ALU);
287 } // Inst_SOP2__S_MIN_U32
288
290 {
291 } // ~Inst_SOP2__S_MIN_U32
292
293 // --- description from .arch file ---
294 // D.u = (S0.u < S1.u) ? S0.u : S1.u;
295 // SCC = 1 if S0 is chosen as the minimum value.
296 void
298 {
299 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
300 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
301 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
302 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
303
304 src0.read();
305 src1.read();
306
307 sdst = std::min(src0.rawData(), src1.rawData());
308 scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
309
310 sdst.write();
311 scc.write();
312 } // execute
313 // --- Inst_SOP2__S_MAX_I32 class methods ---
314
316 : Inst_SOP2(iFmt, "s_max_i32")
317 {
318 setFlag(ALU);
319 } // Inst_SOP2__S_MAX_I32
320
322 {
323 } // ~Inst_SOP2__S_MAX_I32
324
325 // --- description from .arch file ---
326 // D.i = (S0.i > S1.i) ? S0.i : S1.i;
327 // SCC = 1 if S0 is chosen as the maximum value.
328 void
330 {
331 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
332 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
333 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
334 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
335
336 src0.read();
337 src1.read();
338
339 sdst = std::max(src0.rawData(), src1.rawData());
340 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
341
342 sdst.write();
343 scc.write();
344 } // execute
345 // --- Inst_SOP2__S_MAX_U32 class methods ---
346
348 : Inst_SOP2(iFmt, "s_max_u32")
349 {
350 setFlag(ALU);
351 } // Inst_SOP2__S_MAX_U32
352
354 {
355 } // ~Inst_SOP2__S_MAX_U32
356
357 // --- description from .arch file ---
358 // D.u = (S0.u > S1.u) ? S0.u : S1.u;
359 // SCC = 1 if S0 is chosen as the maximum value.
360 void
362 {
363 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
364 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
365 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
366 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
367
368 src0.read();
369 src1.read();
370
371 sdst = std::max(src0.rawData(), src1.rawData());
372 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
373
374 sdst.write();
375 scc.write();
376 } // execute
377 // --- Inst_SOP2__S_CSELECT_B32 class methods ---
378
380 : Inst_SOP2(iFmt, "s_cselect_b32")
381 {
382 setFlag(ALU);
383 } // Inst_SOP2__S_CSELECT_B32
384
386 {
387 } // ~Inst_SOP2__S_CSELECT_B32
388
389 // --- description from .arch file ---
390 // D.u = SCC ? S0.u : S1.u (conditional select).
391 void
393 {
394 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
395 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
396 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
397 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
398
399 src0.read();
400 src1.read();
401 scc.read();
402
403 sdst = scc.rawData() ? src0.rawData() : src1.rawData();
404
405 sdst.write();
406 } // execute
407 // --- Inst_SOP2__S_CSELECT_B64 class methods ---
408
410 : Inst_SOP2(iFmt, "s_cselect_b64")
411 {
412 setFlag(ALU);
413 } // Inst_SOP2__S_CSELECT_B64
414
416 {
417 } // ~Inst_SOP2__S_CSELECT_B64
418
419 // --- description from .arch file ---
420 // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
421 void
423 {
424 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
425 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
426 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
427 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
428
429 src0.read();
430 src1.read();
431 scc.read();
432
433 sdst = scc.rawData() ? src0.rawData() : src1.rawData();
434
435 sdst.write();
436 } // execute
437 // --- Inst_SOP2__S_AND_B32 class methods ---
438
440 : Inst_SOP2(iFmt, "s_and_b32")
441 {
442 setFlag(ALU);
443 } // Inst_SOP2__S_AND_B32
444
446 {
447 } // ~Inst_SOP2__S_AND_B32
448
449 // --- description from .arch file ---
450 // D.u = S0.u & S1.u;
451 // SCC = 1 if result is non-zero.
452 void
454 {
455 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
456 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
457 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
458 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
459
460 src0.read();
461 src1.read();
462
463 sdst = src0.rawData() & src1.rawData();
464 scc = sdst.rawData() ? 1 : 0;
465
466 sdst.write();
467 scc.write();
468 } // execute
469 // --- Inst_SOP2__S_AND_B64 class methods ---
470
472 : Inst_SOP2(iFmt, "s_and_b64")
473 {
474 setFlag(ALU);
475 } // Inst_SOP2__S_AND_B64
476
478 {
479 } // ~Inst_SOP2__S_AND_B64
480
481 // --- description from .arch file ---
482 // D.u64 = S0.u64 & S1.u64;
483 // SCC = 1 if result is non-zero.
484 void
486 {
487 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
488 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
489 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
490 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
491
492 src0.read();
493 src1.read();
494
495 sdst = src0.rawData() & src1.rawData();
496 scc = sdst.rawData() ? 1 : 0;
497
498 sdst.write();
499 scc.write();
500 } // execute
501 // --- Inst_SOP2__S_OR_B32 class methods ---
502
504 : Inst_SOP2(iFmt, "s_or_b32")
505 {
506 setFlag(ALU);
507 } // Inst_SOP2__S_OR_B32
508
510 {
511 } // ~Inst_SOP2__S_OR_B32
512
513 // --- description from .arch file ---
514 // D.u = S0.u | S1.u;
515 // SCC = 1 if result is non-zero.
516 void
518 {
519 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
520 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
521 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
522 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
523
524 src0.read();
525 src1.read();
526
527 sdst = src0.rawData() | src1.rawData();
528 scc = sdst.rawData() ? 1 : 0;
529
530 sdst.write();
531 scc.write();
532 } // execute
533 // --- Inst_SOP2__S_OR_B64 class methods ---
534
536 : Inst_SOP2(iFmt, "s_or_b64")
537 {
538 setFlag(ALU);
539 } // Inst_SOP2__S_OR_B64
540
542 {
543 } // ~Inst_SOP2__S_OR_B64
544
545 // --- description from .arch file ---
546 // D.u64 = S0.u64 | S1.u64;
547 // SCC = 1 if result is non-zero.
548 void
550 {
551 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
552 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
553 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
554 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
555
556 src0.read();
557 src1.read();
558
559 sdst = src0.rawData() | src1.rawData();
560 scc = sdst.rawData() ? 1 : 0;
561
562 sdst.write();
563 scc.write();
564 } // execute
565 // --- Inst_SOP2__S_XOR_B32 class methods ---
566
568 : Inst_SOP2(iFmt, "s_xor_b32")
569 {
570 setFlag(ALU);
571 } // Inst_SOP2__S_XOR_B32
572
574 {
575 } // ~Inst_SOP2__S_XOR_B32
576
577 // --- description from .arch file ---
578 // D.u = S0.u ^ S1.u;
579 // SCC = 1 if result is non-zero.
580 void
582 {
583 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
584 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
585 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
586 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
587
588 src0.read();
589 src1.read();
590
591 sdst = src0.rawData() ^ src1.rawData();
592 scc = sdst.rawData() ? 1 : 0;
593
594 sdst.write();
595 scc.write();
596 } // execute
597 // --- Inst_SOP2__S_XOR_B64 class methods ---
598
600 : Inst_SOP2(iFmt, "s_xor_b64")
601 {
602 setFlag(ALU);
603 } // Inst_SOP2__S_XOR_B64
604
606 {
607 } // ~Inst_SOP2__S_XOR_B64
608
609 // --- description from .arch file ---
610 // D.u64 = S0.u64 ^ S1.u64;
611 // SCC = 1 if result is non-zero.
612 void
614 {
615 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
616 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
617 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
618 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
619
620 src0.read();
621 src1.read();
622
623 sdst = src0.rawData() ^ src1.rawData();
624 scc = sdst.rawData() ? 1 : 0;
625
626 sdst.write();
627 scc.write();
628 } // execute
629 // --- Inst_SOP2__S_ANDN2_B32 class methods ---
630
632 : Inst_SOP2(iFmt, "s_andn2_b32")
633 {
634 setFlag(ALU);
635 } // Inst_SOP2__S_ANDN2_B32
636
638 {
639 } // ~Inst_SOP2__S_ANDN2_B32
640
641 // --- description from .arch file ---
642 // D.u = S0.u & ~S1.u;
643 // SCC = 1 if result is non-zero.
644 void
646 {
647 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
648 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
649 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
650 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
651
652 src0.read();
653 src1.read();
654
655 sdst = src0.rawData() &~ src1.rawData();
656 scc = sdst.rawData() ? 1 : 0;
657
658 sdst.write();
659 scc.write();
660 } // execute
661 // --- Inst_SOP2__S_ANDN2_B64 class methods ---
662
664 : Inst_SOP2(iFmt, "s_andn2_b64")
665 {
666 setFlag(ALU);
667 } // Inst_SOP2__S_ANDN2_B64
668
670 {
671 } // ~Inst_SOP2__S_ANDN2_B64
672
673 // --- description from .arch file ---
674 // D.u64 = S0.u64 & ~S1.u64;
675 // SCC = 1 if result is non-zero.
676 void
678 {
679 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
680 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
681 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
682 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
683
684 src0.read();
685 src1.read();
686
687 sdst = src0.rawData() &~ src1.rawData();
688 scc = sdst.rawData() ? 1 : 0;
689
690 sdst.write();
691 scc.write();
692 } // execute
693 // --- Inst_SOP2__S_ORN2_B32 class methods ---
694
696 : Inst_SOP2(iFmt, "s_orn2_b32")
697 {
698 setFlag(ALU);
699 } // Inst_SOP2__S_ORN2_B32
700
702 {
703 } // ~Inst_SOP2__S_ORN2_B32
704
705 // --- description from .arch file ---
706 // D.u = S0.u | ~S1.u;
707 // SCC = 1 if result is non-zero.
708 void
710 {
711 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
712 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
713 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
714 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
715
716 src0.read();
717 src1.read();
718
719 sdst = src0.rawData() |~ src1.rawData();
720 scc = sdst.rawData() ? 1 : 0;
721
722 sdst.write();
723 scc.write();
724 } // execute
725 // --- Inst_SOP2__S_ORN2_B64 class methods ---
726
728 : Inst_SOP2(iFmt, "s_orn2_b64")
729 {
730 setFlag(ALU);
731 } // Inst_SOP2__S_ORN2_B64
732
734 {
735 } // ~Inst_SOP2__S_ORN2_B64
736
737 // --- description from .arch file ---
738 // D.u64 = S0.u64 | ~S1.u64;
739 // SCC = 1 if result is non-zero.
740 void
742 {
743 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
744 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
745 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
746 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
747
748 src0.read();
749 src1.read();
750
751 sdst = src0.rawData() |~ src1.rawData();
752 scc = sdst.rawData() ? 1 : 0;
753
754 sdst.write();
755 scc.write();
756 } // execute
757 // --- Inst_SOP2__S_NAND_B32 class methods ---
758
760 : Inst_SOP2(iFmt, "s_nand_b32")
761 {
762 setFlag(ALU);
763 } // Inst_SOP2__S_NAND_B32
764
766 {
767 } // ~Inst_SOP2__S_NAND_B32
768
769 // --- description from .arch file ---
770 // D.u = ~(S0.u & S1.u);
771 // SCC = 1 if result is non-zero.
772 void
774 {
775 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
776 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
777 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
778 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
779
780 src0.read();
781 src1.read();
782
783 sdst = ~(src0.rawData() & src1.rawData());
784 scc = sdst.rawData() ? 1 : 0;
785
786 sdst.write();
787 scc.write();
788 } // execute
789 // --- Inst_SOP2__S_NAND_B64 class methods ---
790
792 : Inst_SOP2(iFmt, "s_nand_b64")
793 {
794 setFlag(ALU);
795 } // Inst_SOP2__S_NAND_B64
796
798 {
799 } // ~Inst_SOP2__S_NAND_B64
800
801 // --- description from .arch file ---
802 // D.u64 = ~(S0.u64 & S1.u64);
803 // SCC = 1 if result is non-zero.
804 void
806 {
807 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
808 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
809 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
810 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
811
812 src0.read();
813 src1.read();
814
815 sdst = ~(src0.rawData() & src1.rawData());
816 scc = sdst.rawData() ? 1 : 0;
817
818 sdst.write();
819 scc.write();
820 } // execute
821 // --- Inst_SOP2__S_NOR_B32 class methods ---
822
824 : Inst_SOP2(iFmt, "s_nor_b32")
825 {
826 setFlag(ALU);
827 } // Inst_SOP2__S_NOR_B32
828
830 {
831 } // ~Inst_SOP2__S_NOR_B32
832
833 // --- description from .arch file ---
834 // D.u = ~(S0.u | S1.u);
835 // SCC = 1 if result is non-zero.
836 void
838 {
839 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
840 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
841 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
842 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
843
844 src0.read();
845 src1.read();
846
847 sdst = ~(src0.rawData() | src1.rawData());
848 scc = sdst.rawData() ? 1 : 0;
849
850 sdst.write();
851 scc.write();
852 } // execute
853 // --- Inst_SOP2__S_NOR_B64 class methods ---
854
856 : Inst_SOP2(iFmt, "s_nor_b64")
857 {
858 setFlag(ALU);
859 } // Inst_SOP2__S_NOR_B64
860
862 {
863 } // ~Inst_SOP2__S_NOR_B64
864
865 // --- description from .arch file ---
866 // D.u64 = ~(S0.u64 | S1.u64);
867 // SCC = 1 if result is non-zero.
868 void
870 {
871 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
872 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
873 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
874 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
875
876 src0.read();
877 src1.read();
878
879 sdst = ~(src0.rawData() | src1.rawData());
880 scc = sdst.rawData() ? 1 : 0;
881
882 sdst.write();
883 scc.write();
884 } // execute
885 // --- Inst_SOP2__S_XNOR_B32 class methods ---
886
888 : Inst_SOP2(iFmt, "s_xnor_b32")
889 {
890 setFlag(ALU);
891 } // Inst_SOP2__S_XNOR_B32
892
894 {
895 } // ~Inst_SOP2__S_XNOR_B32
896
897 // --- description from .arch file ---
898 // D.u = ~(S0.u ^ S1.u);
899 // SCC = 1 if result is non-zero.
900 void
902 {
903 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
904 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
905 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
906 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
907
908 src0.read();
909 src1.read();
910
911 sdst = ~(src0.rawData() ^ src1.rawData());
912 scc = sdst.rawData() ? 1 : 0;
913
914 sdst.write();
915 scc.write();
916 } // execute
917 // --- Inst_SOP2__S_XNOR_B64 class methods ---
918
920 : Inst_SOP2(iFmt, "s_xnor_b64")
921 {
922 setFlag(ALU);
923 } // Inst_SOP2__S_XNOR_B64
924
926 {
927 } // ~Inst_SOP2__S_XNOR_B64
928
929 // --- description from .arch file ---
930 // D.u64 = ~(S0.u64 ^ S1.u64);
931 // SCC = 1 if result is non-zero.
932 void
934 {
935 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
936 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
937 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
938 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
939
940 src0.read();
941 src1.read();
942
943 sdst = ~(src0.rawData() ^ src1.rawData());
944 scc = sdst.rawData() ? 1 : 0;
945
946 sdst.write();
947 scc.write();
948 } // execute
949 // --- Inst_SOP2__S_LSHL_B32 class methods ---
950
952 : Inst_SOP2(iFmt, "s_lshl_b32")
953 {
954 setFlag(ALU);
955 } // Inst_SOP2__S_LSHL_B32
956
958 {
959 } // ~Inst_SOP2__S_LSHL_B32
960
961 // --- description from .arch file ---
962 // D.u = S0.u << S1.u[4:0];
963 // SCC = 1 if result is non-zero.
964 void
966 {
967 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
968 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
969 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
970 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
971
972 src0.read();
973 src1.read();
974
975 sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
976 scc = sdst.rawData() ? 1 : 0;
977
978 sdst.write();
979 scc.write();
980 } // execute
981 // --- Inst_SOP2__S_LSHL_B64 class methods ---
982
984 : Inst_SOP2(iFmt, "s_lshl_b64")
985 {
986 setFlag(ALU);
987 } // Inst_SOP2__S_LSHL_B64
988
990 {
991 } // ~Inst_SOP2__S_LSHL_B64
992
993 // --- description from .arch file ---
994 // D.u64 = S0.u64 << S1.u[5:0];
995 // SCC = 1 if result is non-zero.
996 void
998 {
999 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1000 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1001 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1002 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1003
1004 src0.read();
1005 src1.read();
1006
1007 sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
1008 scc = sdst.rawData() ? 1 : 0;
1009
1010 sdst.write();
1011 scc.write();
1012 } // execute
1013 // --- Inst_SOP2__S_LSHR_B32 class methods ---
1014
1016 : Inst_SOP2(iFmt, "s_lshr_b32")
1017 {
1018 setFlag(ALU);
1019 } // Inst_SOP2__S_LSHR_B32
1020
1022 {
1023 } // ~Inst_SOP2__S_LSHR_B32
1024
1025 // --- description from .arch file ---
1026 // D.u = S0.u >> S1.u[4:0];
1027 // SCC = 1 if result is non-zero.
1028 // The vacated bits are set to zero.
1029 void
1031 {
1032 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1033 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1034 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1035 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1036
1037 src0.read();
1038 src1.read();
1039
1040 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
1041 scc = sdst.rawData() ? 1 : 0;
1042
1043 sdst.write();
1044 scc.write();
1045 } // execute
1046 // --- Inst_SOP2__S_LSHR_B64 class methods ---
1047
1049 : Inst_SOP2(iFmt, "s_lshr_b64")
1050 {
1051 setFlag(ALU);
1052 } // Inst_SOP2__S_LSHR_B64
1053
1055 {
1056 } // ~Inst_SOP2__S_LSHR_B64
1057
1058 // --- description from .arch file ---
1059 // D.u64 = S0.u64 >> S1.u[5:0];
1060 // SCC = 1 if result is non-zero.
1061 // The vacated bits are set to zero.
1062 void
1064 {
1065 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1066 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1067 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1068 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1069
1070 src0.read();
1071 src1.read();
1072
1073 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1074 scc = sdst.rawData() ? 1 : 0;
1075
1076 sdst.write();
1077 scc.write();
1078 } // execute
1079 // --- Inst_SOP2__S_ASHR_I32 class methods ---
1080
1082 : Inst_SOP2(iFmt, "s_ashr_i32")
1083 {
1084 setFlag(ALU);
1085 } // Inst_SOP2__S_ASHR_I32
1086
1088 {
1089 } // ~Inst_SOP2__S_ASHR_I32
1090
1091 // --- description from .arch file ---
1092 // D.i = signext(S0.i) >> S1.u[4:0];
1093 // SCC = 1 if result is non-zero.
1094 // The vacated bits are set to the sign bit of the input value.
1095 void
1097 {
1098 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1099 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1100 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1101 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1102
1103 src0.read();
1104 src1.read();
1105
1106 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
1107 scc = sdst.rawData() ? 1 : 0;
1108
1109 sdst.write();
1110 scc.write();
1111 } // execute
1112 // --- Inst_SOP2__S_ASHR_I64 class methods ---
1113
1115 : Inst_SOP2(iFmt, "s_ashr_i64")
1116 {
1117 setFlag(ALU);
1118 } // Inst_SOP2__S_ASHR_I64
1119
1121 {
1122 } // ~Inst_SOP2__S_ASHR_I64
1123
1124 // --- description from .arch file ---
1125 // D.i64 = signext(S0.i64) >> S1.u[5:0];
1126 // SCC = 1 if result is non-zero.
1127 // The vacated bits are set to the sign bit of the input value.
1128 void
1130 {
1131 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1132 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1133 ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1134 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1135
1136 src0.read();
1137 src1.read();
1138
1139 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1140 scc = sdst.rawData() ? 1 : 0;
1141
1142 sdst.write();
1143 scc.write();
1144 } // execute
1145 // --- Inst_SOP2__S_BFM_B32 class methods ---
1146
1148 : Inst_SOP2(iFmt, "s_bfm_b32")
1149 {
1150 setFlag(ALU);
1151 } // Inst_SOP2__S_BFM_B32
1152
1154 {
1155 } // ~Inst_SOP2__S_BFM_B32
1156
1157 // --- description from .arch file ---
1158 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1159 void
1161 {
1162 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1163 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1164 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1165
1166 src0.read();
1167 src1.read();
1168
1169 sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
1170 << bits(src1.rawData(), 4, 0);
1171
1172 sdst.write();
1173 } // execute
1174 // --- Inst_SOP2__S_BFM_B64 class methods ---
1175
1177 : Inst_SOP2(iFmt, "s_bfm_b64")
1178 {
1179 setFlag(ALU);
1180 } // Inst_SOP2__S_BFM_B64
1181
1183 {
1184 } // ~Inst_SOP2__S_BFM_B64
1185
1186 // --- description from .arch file ---
1187 // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1188 void
1190 {
1191 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1192 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1193 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1194
1195 src0.read();
1196 src1.read();
1197
1198 sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
1199 << bits(src1.rawData(), 5, 0);
1200
1201 sdst.write();
1202 } // execute
1203 // --- Inst_SOP2__S_MUL_I32 class methods ---
1204
1206 : Inst_SOP2(iFmt, "s_mul_i32")
1207 {
1208 setFlag(ALU);
1209 } // Inst_SOP2__S_MUL_I32
1210
1212 {
1213 } // ~Inst_SOP2__S_MUL_I32
1214
1215 // --- description from .arch file ---
1216 // D.i = S0.i * S1.i.
1217 void
1219 {
1220 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1221 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1222 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1223
1224 src0.read();
1225 src1.read();
1226
1227 ScalarRegI64 tmp = src0.rawData() * src1.rawData();
1228 sdst = tmp & mask(32);
1229
1230 sdst.write();
1231 } // execute
1232 // --- Inst_SOP2__S_BFE_U32 class methods ---
1233
1235 : Inst_SOP2(iFmt, "s_bfe_u32")
1236 {
1237 setFlag(ALU);
1238 } // Inst_SOP2__S_BFE_U32
1239
1241 {
1242 } // ~Inst_SOP2__S_BFE_U32
1243
1244 // --- description from .arch file ---
1245 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1246 // field width.
1247 // D.u = (S0.u>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
1248 // SCC = 1 if result is non-zero.
1249 void
1251 {
1252 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1253 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1254 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1255 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1256
1257 src0.read();
1258 src1.read();
1259
1260 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1261 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1262 scc = sdst.rawData() ? 1 : 0;
1263
1264 sdst.write();
1265 scc.write();
1266 } // execute
1267 // --- Inst_SOP2__S_BFE_I32 class methods ---
1268
1270 : Inst_SOP2(iFmt, "s_bfe_i32")
1271 {
1272 setFlag(ALU);
1273 } // Inst_SOP2__S_BFE_I32
1274
1276 {
1277 } // ~Inst_SOP2__S_BFE_I32
1278
1279 // --- description from .arch file ---
1280 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1281 // field width.
1282 // D.i = (S0.i>>S1.u[4:0]) & ((1<<S1.u[22:16])-1);
1283 // Sign-extend the result;
1284 // SCC = 1 if result is non-zero.
1285 void
1287 {
1288 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1289 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1290 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1291 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1292
1293 src0.read();
1294 src1.read();
1295
1296 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1297 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1298
1299 // Above extracted a signed int of size src1[22:16] bits which needs
1300 // to be signed-extended. Check if the MSB of our src1[22:16]-bit
1301 // integer is 1, and sign extend it is.
1302 //
1303 // Note: The description in the Vega ISA manual does not mention to
1304 // sign-extend the result. An update description can be found in the
1305 // more recent RDNA3 manual here:
1306 // https://developer.amd.com/wp-content/resources/
1307 // RDNA3_Shader_ISA_December2022.pdf
1308 if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) {
1309 sdst = sdst.rawData()
1310 | (0xffffffff << bits(src1.rawData(), 22, 16));
1311 }
1312
1313 scc = sdst.rawData() ? 1 : 0;
1314
1315 sdst.write();
1316 scc.write();
1317 } // execute
1318 // --- Inst_SOP2__S_BFE_U64 class methods ---
1319
1321 : Inst_SOP2(iFmt, "s_bfe_u64")
1322 {
1323 setFlag(ALU);
1324 } // Inst_SOP2__S_BFE_U64
1325
1327 {
1328 } // ~Inst_SOP2__S_BFE_U64
1329
1330 // --- description from .arch file ---
1331 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1332 // field width.
1333 // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
1334 // SCC = 1 if result is non-zero.
1335 void
1337 {
1338 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1339 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1340 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1341 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1342
1343 src0.read();
1344 src1.read();
1345
1346 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1347 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1348 scc = sdst.rawData() ? 1 : 0;
1349
1350 sdst.write();
1351 scc.write();
1352 } // execute
1353 // --- Inst_SOP2__S_BFE_I64 class methods ---
1354
1356 : Inst_SOP2(iFmt, "s_bfe_i64")
1357 {
1358 setFlag(ALU);
1359 } // Inst_SOP2__S_BFE_I64
1360
1362 {
1363 } // ~Inst_SOP2__S_BFE_I64
1364
1365 // --- description from .arch file ---
1366 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1367 // field width.
1368 // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<<S1.u[22:16])-1);
1369 // Sign-extend result;
1370 // SCC = 1 if result is non-zero.
1371 void
1373 {
1374 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1375 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1376 ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1377 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1378
1379 src0.read();
1380 src1.read();
1381
1382 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1383 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1384
1385 // Above extracted a signed int of size src1[22:16] bits which needs
1386 // to be signed-extended. Check if the MSB of our src1[22:16]-bit
1387 // integer is 1, and sign extend it is.
1388 if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) {
1389 sdst = sdst.rawData()
1390 | 0xffffffffffffffff << bits(src1.rawData(), 22, 16);
1391 }
1392 scc = sdst.rawData() ? 1 : 0;
1393
1394 sdst.write();
1395 scc.write();
1396 } // execute
1397 // --- Inst_SOP2__S_CBRANCH_G_FORK class methods ---
1398
1400 : Inst_SOP2(iFmt, "s_cbranch_g_fork")
1401 {
1402 setFlag(Branch);
1403 } // Inst_SOP2__S_CBRANCH_G_FORK
1404
1406 {
1407 } // ~Inst_SOP2__S_CBRANCH_G_FORK
1408
1409 // --- description from .arch file ---
1410 // mask_pass = S0.u64 & EXEC;
1411 // mask_fail = ~S0.u64 & EXEC;
1412 // if (mask_pass == EXEC)
1413 // PC = S1.u64;
1414 // elsif (mask_fail == EXEC)
1415 // PC += 4;
1416 // elsif (bitcount(mask_fail) < bitcount(mask_pass))
1417 // EXEC = mask_fail;
1418 // SGPR[CSP*4] = { S1.u64, mask_pass };
1419 // CSP++;
1420 // PC += 4;
1421 // else
1422 // EXEC = mask_pass;
1423 // SGPR[CSP*4] = { PC + 4, mask_fail };
1424 // CSP++;
1425 // PC = S1.u64;
1426 // end.
1427 // Conditional branch using branch-stack.
1428 // S0 = compare mask(vcc or any sgpr) and
1429 // S1 = 64-bit byte address of target instruction.
1430 // See also S_CBRANCH_JOIN.
1431 void
1433 {
1435 } // execute
1436 // --- Inst_SOP2__S_ABSDIFF_I32 class methods ---
1437
1439 : Inst_SOP2(iFmt, "s_absdiff_i32")
1440 {
1441 setFlag(ALU);
1442 } // Inst_SOP2__S_ABSDIFF_I32
1443
1445 {
1446 } // ~Inst_SOP2__S_ABSDIFF_I32
1447
1448 // --- description from .arch file ---
1449 // D.i = S0.i - S1.i;
1450 // if (D.i < 0) then D.i = -D.i;
1451 // SCC = 1 if result is non-zero.
1452 // Compute the absolute value of difference between two values.
1453 void
1455 {
1456 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1457 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1458 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1459 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1460
1461 sdst = std::abs(src0.rawData() - src1.rawData());
1462 scc = sdst.rawData() ? 1 : 0;
1463
1464 sdst.write();
1465 scc.write();
1466 } // execute
1467 // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods ---
1468
1470 InFmt_SOP2 *iFmt)
1471 : Inst_SOP2(iFmt, "s_rfe_restore_b64")
1472 {
1473 } // Inst_SOP2__S_RFE_RESTORE_B64
1474
1476 {
1477 } // ~Inst_SOP2__S_RFE_RESTORE_B64
1478
1479 // --- description from .arch file ---
1480 // PRIV = 0;
1481 // PC = S0.u64;
1482 // INST_ATC = S1.u32[0].
1483 // Return from exception handler and continue, possibly changing the
1484 // --- instruction ATC mode.
1485 // This instruction may only be used within a trap handler.
1486 // Use this instruction when the main program may be in a different memory
1487 // --- space than the trap handler.
1488 void
1490 {
1492 } // execute
1493 // --- Inst_SOP2__S_MUL_HI_U32 class methods ---
1494
1496 : Inst_SOP2(iFmt, "s_mul_hi_u32")
1497 {
1498 setFlag(ALU);
1499 } // Inst_SOP2__S_MUL_HI_U32
1500
1502 {
1503 } // ~Inst_SOP2__S_MUL_HI_U32
1504
1505 // --- description from .arch file ---
1506 // D.u = (S0.u * S1.u) >> 32;
1507 void
1509 {
1510 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1511 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1512 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1513
1514 src0.read();
1515 src1.read();
1516
1517 VecElemU64 tmp_dst =
1518 ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData());
1519 sdst = (tmp_dst >> 32);
1520
1521 sdst.write();
1522 } // execute
1523 // --- Inst_SOP2__S_MUL_HI_I32 class methods ---
1524
1526 : Inst_SOP2(iFmt, "s_mul_hi_i32")
1527 {
1528 setFlag(ALU);
1529 } // Inst_SOP2__S_MUL_HI_I32
1530
1532 {
1533 } // ~Inst_SOP2__S_MUL_HI_I32
1534
1535 // --- description from .arch file ---
1536 // D.u = (S0.u * S1.u) >> 32;
1537 void
1539 {
1540 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1541 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1542 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1543
1544 src0.read();
1545 src1.read();
1546
1547 VecElemI64 tmp_src0 =
1548 sext<std::numeric_limits<VecElemI64>::digits>(src0.rawData());
1549 VecElemI64 tmp_src1 =
1550 sext<std::numeric_limits<VecElemI64>::digits>(src1.rawData());
1551 sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
1552
1553 sdst.write();
1554 } // execute
1555} // namespace VegaISA
1556} // namespace gem5
void setFlag(Flags flag)
Base class for branch operations.
Definition branch.hh:49
void execute(GPUDynInstPtr) override
Definition sop2.cc:1454
void execute(GPUDynInstPtr) override
Definition sop2.cc:198
void execute(GPUDynInstPtr) override
Definition sop2.cc:125
Inst_SOP2__S_ADD_U32(InFmt_SOP2 *)
Definition sop2.cc:41
void execute(GPUDynInstPtr) override
Definition sop2.cc:56
void execute(GPUDynInstPtr) override
Definition sop2.cc:645
void execute(GPUDynInstPtr) override
Definition sop2.cc:677
void execute(GPUDynInstPtr) override
Definition sop2.cc:453
void execute(GPUDynInstPtr) override
Definition sop2.cc:485
void execute(GPUDynInstPtr) override
Definition sop2.cc:1096
void execute(GPUDynInstPtr) override
Definition sop2.cc:1129
void execute(GPUDynInstPtr) override
Definition sop2.cc:1286
void execute(GPUDynInstPtr) override
Definition sop2.cc:1372
void execute(GPUDynInstPtr) override
Definition sop2.cc:1250
void execute(GPUDynInstPtr) override
Definition sop2.cc:1336
void execute(GPUDynInstPtr) override
Definition sop2.cc:1160
void execute(GPUDynInstPtr) override
Definition sop2.cc:1189
void execute(GPUDynInstPtr) override
Definition sop2.cc:1432
void execute(GPUDynInstPtr) override
Definition sop2.cc:392
void execute(GPUDynInstPtr) override
Definition sop2.cc:422
void execute(GPUDynInstPtr) override
Definition sop2.cc:965
void execute(GPUDynInstPtr) override
Definition sop2.cc:997
void execute(GPUDynInstPtr) override
Definition sop2.cc:1030
void execute(GPUDynInstPtr) override
Definition sop2.cc:1063
void execute(GPUDynInstPtr) override
Definition sop2.cc:329
void execute(GPUDynInstPtr) override
Definition sop2.cc:361
void execute(GPUDynInstPtr) override
Definition sop2.cc:265
void execute(GPUDynInstPtr) override
Definition sop2.cc:297
void execute(GPUDynInstPtr) override
Definition sop2.cc:1538
void execute(GPUDynInstPtr) override
Definition sop2.cc:1508
void execute(GPUDynInstPtr) override
Definition sop2.cc:1218
void execute(GPUDynInstPtr) override
Definition sop2.cc:773
void execute(GPUDynInstPtr) override
Definition sop2.cc:805
void execute(GPUDynInstPtr) override
Definition sop2.cc:837
void execute(GPUDynInstPtr) override
Definition sop2.cc:869
void execute(GPUDynInstPtr) override
Definition sop2.cc:709
void execute(GPUDynInstPtr) override
Definition sop2.cc:741
Inst_SOP2__S_OR_B32(InFmt_SOP2 *)
Definition sop2.cc:503
void execute(GPUDynInstPtr) override
Definition sop2.cc:517
Inst_SOP2__S_OR_B64(InFmt_SOP2 *)
Definition sop2.cc:535
void execute(GPUDynInstPtr) override
Definition sop2.cc:549
void execute(GPUDynInstPtr) override
Definition sop2.cc:1489
void execute(GPUDynInstPtr) override
Definition sop2.cc:232
void execute(GPUDynInstPtr) override
Definition sop2.cc:164
void execute(GPUDynInstPtr) override
Definition sop2.cc:90
Inst_SOP2__S_SUB_U32(InFmt_SOP2 *)
Definition sop2.cc:75
void execute(GPUDynInstPtr) override
Definition sop2.cc:901
void execute(GPUDynInstPtr) override
Definition sop2.cc:933
void execute(GPUDynInstPtr) override
Definition sop2.cc:581
void execute(GPUDynInstPtr) override
Definition sop2.cc:613
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
uint64_t ScalarRegU64
uint64_t VecElemU64
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

Generated on Tue Jun 18 2024 16:23:48 for gem5 by doxygen 1.11.0