gem5 v24.1.0.1
Loading...
Searching...
No Matches
vop3.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
35
36namespace gem5
37{
38
39namespace VegaISA
40{
41 // --- Inst_VOP3__V_CNDMASK_B32 class methods ---
42
44 : Inst_VOP3A(iFmt, "v_cndmask_b32", false)
45 {
46 setFlag(ALU);
47 setFlag(ReadsVCC);
48 } // Inst_VOP3__V_CNDMASK_B32
49
51 {
52 } // ~Inst_VOP3__V_CNDMASK_B32
53
54 // --- description from .arch file ---
55 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
56 // as a scalar GPR in S2.
57 void
59 {
60 Wavefront *wf = gpuDynInst->wavefront();
61 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
62 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
63 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
64 VecOperandU32 vdst(gpuDynInst, instData.VDST);
65
66 src0.readSrc();
67 src1.readSrc();
68 vcc.read();
69
70 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
71 if (wf->execMask(lane)) {
72 vdst[lane] = bits(vcc.rawData(), lane)
73 ? src1[lane] : src0[lane];
74 }
75 }
76
77 vdst.write();
78 } // execute
79 // --- Inst_VOP3__V_ADD_F32 class methods ---
80
82 : Inst_VOP3A(iFmt, "v_add_f32", false)
83 {
84 setFlag(ALU);
85 setFlag(F32);
86 } // Inst_VOP3__V_ADD_F32
87
89 {
90 } // ~Inst_VOP3__V_ADD_F32
91
92 // --- description from .arch file ---
93 // D.f = S0.f + S1.f.
94 void
96 {
97 Wavefront *wf = gpuDynInst->wavefront();
98 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
99 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
100 VecOperandF32 vdst(gpuDynInst, instData.VDST);
101
102 src0.readSrc();
103 src1.readSrc();
104
105 if (instData.ABS & 0x1) {
106 src0.absModifier();
107 }
108
109 if (instData.ABS & 0x2) {
110 src1.absModifier();
111 }
112
113 if (extData.NEG & 0x1) {
114 src0.negModifier();
115 }
116
117 if (extData.NEG & 0x2) {
118 src1.negModifier();
119 }
120
124 assert(!(instData.ABS & 0x4));
125 assert(!(extData.NEG & 0x4));
126
127 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
128 if (wf->execMask(lane)) {
129 vdst[lane] = src0[lane] + src1[lane];
130 }
131 }
132
133 vdst.write();
134 } // execute
135 // --- Inst_VOP3__V_SUB_F32 class methods ---
136
138 : Inst_VOP3A(iFmt, "v_sub_f32", false)
139 {
140 setFlag(ALU);
141 setFlag(F32);
142 } // Inst_VOP3__V_SUB_F32
143
145 {
146 } // ~Inst_VOP3__V_SUB_F32
147
148 // --- description from .arch file ---
149 // D.f = S0.f - S1.f.
150 // SQ translates to V_ADD_F32.
151 void
153 {
154 Wavefront *wf = gpuDynInst->wavefront();
155 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
156 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
157 VecOperandF32 vdst(gpuDynInst, instData.VDST);
158
159 src0.readSrc();
160 src1.readSrc();
161
162 if (instData.ABS & 0x1) {
163 src0.absModifier();
164 }
165
166 if (instData.ABS & 0x2) {
167 src1.absModifier();
168 }
169
170 if (extData.NEG & 0x1) {
171 src0.negModifier();
172 }
173
174 if (extData.NEG & 0x2) {
175 src1.negModifier();
176 }
177
181 assert(!(instData.ABS & 0x4));
182 assert(!(extData.NEG & 0x4));
183
184 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
185 if (wf->execMask(lane)) {
186 vdst[lane] = src0[lane] - src1[lane];
187 }
188 }
189
190 vdst.write();
191 } // execute
192 // --- Inst_VOP3__V_SUBREV_F32 class methods ---
193
195 : Inst_VOP3A(iFmt, "v_subrev_f32", false)
196 {
197 setFlag(ALU);
198 setFlag(F32);
199 } // Inst_VOP3__V_SUBREV_F32
200
202 {
203 } // ~Inst_VOP3__V_SUBREV_F32
204
205 // --- description from .arch file ---
206 // D.f = S1.f - S0.f.
207 // SQ translates to V_ADD_F32.
208 void
210 {
211 Wavefront *wf = gpuDynInst->wavefront();
212 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
213 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
214 VecOperandF32 vdst(gpuDynInst, instData.VDST);
215
216 src0.readSrc();
217 src1.readSrc();
218
219 if (instData.ABS & 0x1) {
220 src0.absModifier();
221 }
222
223 if (instData.ABS & 0x2) {
224 src1.absModifier();
225 }
226
227 if (extData.NEG & 0x1) {
228 src0.negModifier();
229 }
230
231 if (extData.NEG & 0x2) {
232 src1.negModifier();
233 }
234
238 assert(!(instData.ABS & 0x4));
239 assert(!(extData.NEG & 0x4));
240
241 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
242 if (wf->execMask(lane)) {
243 vdst[lane] = src1[lane] - src0[lane];
244 }
245 }
246
247 vdst.write();
248 } // execute
249 // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods ---
250
252 : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false)
253 {
254 setFlag(ALU);
255 setFlag(F32);
256 } // Inst_VOP3__V_MUL_LEGACY_F32
257
259 {
260 } // ~Inst_VOP3__V_MUL_LEGACY_F32
261
262 // --- description from .arch file ---
263 // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).
264 void
266 {
267 Wavefront *wf = gpuDynInst->wavefront();
268 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
269 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
270 VecOperandF32 vdst(gpuDynInst, instData.VDST);
271
272 src0.readSrc();
273 src1.readSrc();
274
275 if (instData.ABS & 0x1) {
276 src0.absModifier();
277 }
278
279 if (instData.ABS & 0x2) {
280 src1.absModifier();
281 }
282
283 if (extData.NEG & 0x1) {
284 src0.negModifier();
285 }
286
287 if (extData.NEG & 0x2) {
288 src1.negModifier();
289 }
290
294 assert(!(instData.ABS & 0x4));
295 assert(!(extData.NEG & 0x4));
296
297 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
298 if (wf->execMask(lane)) {
299 if (std::isnan(src0[lane]) ||
300 std::isnan(src1[lane])) {
301 vdst[lane] = NAN;
302 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
303 std::fpclassify(src0[lane]) == FP_ZERO) &&
304 !std::signbit(src0[lane])) {
305 if (std::isinf(src1[lane])) {
306 vdst[lane] = NAN;
307 } else if (!std::signbit(src1[lane])) {
308 vdst[lane] = +0.0;
309 } else {
310 vdst[lane] = -0.0;
311 }
312 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
313 std::fpclassify(src0[lane]) == FP_ZERO) &&
314 std::signbit(src0[lane])) {
315 if (std::isinf(src1[lane])) {
316 vdst[lane] = NAN;
317 } else if (std::signbit(src1[lane])) {
318 vdst[lane] = +0.0;
319 } else {
320 vdst[lane] = -0.0;
321 }
322 } else if (std::isinf(src0[lane]) &&
323 !std::signbit(src0[lane])) {
324 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
325 std::fpclassify(src1[lane]) == FP_ZERO) {
326 vdst[lane] = NAN;
327 } else if (!std::signbit(src1[lane])) {
328 vdst[lane] = +INFINITY;
329 } else {
330 vdst[lane] = -INFINITY;
331 }
332 } else if (std::isinf(src0[lane]) &&
333 std::signbit(src0[lane])) {
334 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
335 std::fpclassify(src1[lane]) == FP_ZERO) {
336 vdst[lane] = NAN;
337 } else if (std::signbit(src1[lane])) {
338 vdst[lane] = +INFINITY;
339 } else {
340 vdst[lane] = -INFINITY;
341 }
342 } else {
343 vdst[lane] = src0[lane] * src1[lane];
344 }
345 }
346 }
347
348 vdst.write();
349 } // execute
350 // --- Inst_VOP3__V_MUL_F32 class methods ---
351
353 : Inst_VOP3A(iFmt, "v_mul_f32", false)
354 {
355 setFlag(ALU);
356 setFlag(F32);
357 } // Inst_VOP3__V_MUL_F32
358
360 {
361 } // ~Inst_VOP3__V_MUL_F32
362
363 // --- description from .arch file ---
364 // D.f = S0.f * S1.f.
365 void
367 {
368 Wavefront *wf = gpuDynInst->wavefront();
369 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
370 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
371 VecOperandF32 vdst(gpuDynInst, instData.VDST);
372
373 src0.readSrc();
374 src1.readSrc();
375
376 if (instData.ABS & 0x1) {
377 src0.absModifier();
378 }
379
380 if (instData.ABS & 0x2) {
381 src1.absModifier();
382 }
383
384 if (extData.NEG & 0x1) {
385 src0.negModifier();
386 }
387
388 if (extData.NEG & 0x2) {
389 src1.negModifier();
390 }
391
395 assert(!(instData.ABS & 0x4));
396 assert(!(extData.NEG & 0x4));
397
398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
399 if (wf->execMask(lane)) {
400 if (std::isnan(src0[lane]) ||
401 std::isnan(src1[lane])) {
402 vdst[lane] = NAN;
403 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
404 std::fpclassify(src0[lane]) == FP_ZERO) &&
405 !std::signbit(src0[lane])) {
406 if (std::isinf(src1[lane])) {
407 vdst[lane] = NAN;
408 } else if (!std::signbit(src1[lane])) {
409 vdst[lane] = +0.0;
410 } else {
411 vdst[lane] = -0.0;
412 }
413 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
414 std::fpclassify(src0[lane]) == FP_ZERO) &&
415 std::signbit(src0[lane])) {
416 if (std::isinf(src1[lane])) {
417 vdst[lane] = NAN;
418 } else if (std::signbit(src1[lane])) {
419 vdst[lane] = +0.0;
420 } else {
421 vdst[lane] = -0.0;
422 }
423 } else if (std::isinf(src0[lane]) &&
424 !std::signbit(src0[lane])) {
425 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
426 std::fpclassify(src1[lane]) == FP_ZERO) {
427 vdst[lane] = NAN;
428 } else if (!std::signbit(src1[lane])) {
429 vdst[lane] = +INFINITY;
430 } else {
431 vdst[lane] = -INFINITY;
432 }
433 } else if (std::isinf(src0[lane]) &&
434 std::signbit(src0[lane])) {
435 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
436 std::fpclassify(src1[lane]) == FP_ZERO) {
437 vdst[lane] = NAN;
438 } else if (std::signbit(src1[lane])) {
439 vdst[lane] = +INFINITY;
440 } else {
441 vdst[lane] = -INFINITY;
442 }
443 } else {
444 vdst[lane] = src0[lane] * src1[lane];
445 }
446 }
447 }
448
449 vdst.write();
450 } // execute
451 // --- Inst_VOP3__V_MUL_I32_I24 class methods ---
452
454 : Inst_VOP3A(iFmt, "v_mul_i32_i24", false)
455 {
456 setFlag(ALU);
457 } // Inst_VOP3__V_MUL_I32_I24
458
460 {
461 } // ~Inst_VOP3__V_MUL_I32_I24
462
463 // --- description from .arch file ---
464 // D.i = S0.i[23:0] * S1.i[23:0].
465 void
467 {
468 Wavefront *wf = gpuDynInst->wavefront();
469 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
470 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
471 VecOperandI32 vdst(gpuDynInst, instData.VDST);
472
473 src0.readSrc();
474 src1.read();
475
479 assert(!(instData.ABS & 0x1));
480 assert(!(instData.ABS & 0x2));
481 assert(!(instData.ABS & 0x4));
482 assert(!(extData.NEG & 0x1));
483 assert(!(extData.NEG & 0x2));
484 assert(!(extData.NEG & 0x4));
485
486 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
487 if (wf->execMask(lane)) {
488 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
489 * sext<24>(bits(src1[lane], 23, 0));
490 }
491 }
492
493 vdst.write();
494 } // execute
495 // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods ---
496
498 : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false)
499 {
500 setFlag(ALU);
501 } // Inst_VOP3__V_MUL_HI_I32_I24
502
504 {
505 } // ~Inst_VOP3__V_MUL_HI_I32_I24
506
507 // --- description from .arch file ---
508 // D.i = (S0.i[23:0] * S1.i[23:0])>>32.
509 void
511 {
512 Wavefront *wf = gpuDynInst->wavefront();
513 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
514 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
515 VecOperandI32 vdst(gpuDynInst, instData.VDST);
516
517 src0.readSrc();
518 src1.readSrc();
519
523 assert(!(instData.ABS & 0x1));
524 assert(!(instData.ABS & 0x2));
525 assert(!(instData.ABS & 0x4));
526 assert(!(extData.NEG & 0x1));
527 assert(!(extData.NEG & 0x2));
528 assert(!(extData.NEG & 0x4));
529
530 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
531 if (wf->execMask(lane)) {
532 VecElemI64 tmp_src0
533 = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
534 VecElemI64 tmp_src1
535 = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
536
537 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
538 }
539 }
540
541 vdst.write();
542 } // execute
543 // --- Inst_VOP3__V_MUL_U32_U24 class methods ---
544
546 : Inst_VOP3A(iFmt, "v_mul_u32_u24", false)
547 {
548 setFlag(ALU);
549 } // Inst_VOP3__V_MUL_U32_U24
550
552 {
553 } // ~Inst_VOP3__V_MUL_U32_U24
554
555 // --- description from .arch file ---
556 // D.u = S0.u[23:0] * S1.u[23:0].
557 void
559 {
560 Wavefront *wf = gpuDynInst->wavefront();
561 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
562 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
563 VecOperandU32 vdst(gpuDynInst, instData.VDST);
564
565 src0.readSrc();
566 src1.readSrc();
567
571 assert(!(instData.ABS & 0x1));
572 assert(!(instData.ABS & 0x2));
573 assert(!(instData.ABS & 0x4));
574 assert(!(extData.NEG & 0x1));
575 assert(!(extData.NEG & 0x2));
576 assert(!(extData.NEG & 0x4));
577
578 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
579 if (wf->execMask(lane)) {
580 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
581 }
582 }
583
584 vdst.write();
585 } // execute
586 // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods ---
587
589 : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false)
590 {
591 setFlag(ALU);
592 } // Inst_VOP3__V_MUL_HI_U32_U24
593
595 {
596 } // ~Inst_VOP3__V_MUL_HI_U32_U24
597
598 // --- description from .arch file ---
599 // D.i = (S0.u[23:0] * S1.u[23:0])>>32.
600 void
602 {
603 Wavefront *wf = gpuDynInst->wavefront();
604 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
605 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
606 VecOperandU32 vdst(gpuDynInst, instData.VDST);
607
608 src0.readSrc();
609 src1.readSrc();
610
614 assert(!(instData.ABS & 0x1));
615 assert(!(instData.ABS & 0x2));
616 assert(!(instData.ABS & 0x4));
617 assert(!(extData.NEG & 0x1));
618 assert(!(extData.NEG & 0x2));
619 assert(!(extData.NEG & 0x4));
620
621 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
622 if (wf->execMask(lane)) {
623 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
624 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
625 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
626 }
627 }
628
629 vdst.write();
630 } // execute
631 // --- Inst_VOP3__V_MIN_F32 class methods ---
632
634 : Inst_VOP3A(iFmt, "v_min_f32", false)
635 {
636 setFlag(ALU);
637 setFlag(F32);
638 } // Inst_VOP3__V_MIN_F32
639
641 {
642 } // ~Inst_VOP3__V_MIN_F32
643
644 // --- description from .arch file ---
645 // D.f = (S0.f < S1.f ? S0.f : S1.f).
646 void
648 {
649 Wavefront *wf = gpuDynInst->wavefront();
650 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
651 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
652 VecOperandF32 vdst(gpuDynInst, instData.VDST);
653
654 src0.readSrc();
655 src1.readSrc();
656
657 if (instData.ABS & 0x1) {
658 src0.absModifier();
659 }
660
661 if (instData.ABS & 0x2) {
662 src1.absModifier();
663 }
664
665 if (extData.NEG & 0x1) {
666 src0.negModifier();
667 }
668
669 if (extData.NEG & 0x2) {
670 src1.negModifier();
671 }
672
676 assert(!(instData.ABS & 0x4));
677 assert(!(extData.NEG & 0x4));
678
679 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
680 if (wf->execMask(lane)) {
681 vdst[lane] = std::fmin(src0[lane], src1[lane]);
682 }
683 }
684
685 vdst.write();
686 } // execute
687 // --- Inst_VOP3__V_MAX_F32 class methods ---
688
690 : Inst_VOP3A(iFmt, "v_max_f32", false)
691 {
692 setFlag(ALU);
693 setFlag(F32);
694 } // Inst_VOP3__V_MAX_F32
695
697 {
698 } // ~Inst_VOP3__V_MAX_F32
699
700 // --- description from .arch file ---
701 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
702 void
704 {
705 Wavefront *wf = gpuDynInst->wavefront();
706 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
707 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
708 VecOperandF32 vdst(gpuDynInst, instData.VDST);
709
710 src0.readSrc();
711 src1.readSrc();
712
713 if (instData.ABS & 0x1) {
714 src0.absModifier();
715 }
716
717 if (instData.ABS & 0x2) {
718 src1.absModifier();
719 }
720
721 if (extData.NEG & 0x1) {
722 src0.negModifier();
723 }
724
725 if (extData.NEG & 0x2) {
726 src1.negModifier();
727 }
728
732 assert(!(instData.ABS & 0x4));
733 assert(!(extData.NEG & 0x4));
734
735 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
736 if (wf->execMask(lane)) {
737 vdst[lane] = std::fmax(src0[lane], src1[lane]);
738 }
739 }
740
741 vdst.write();
742 } // execute
743 // --- Inst_VOP3__V_MIN_I32 class methods ---
744
746 : Inst_VOP3A(iFmt, "v_min_i32", false)
747 {
748 setFlag(ALU);
749 } // Inst_VOP3__V_MIN_I32
750
752 {
753 } // ~Inst_VOP3__V_MIN_I32
754
755 // --- description from .arch file ---
756 // D.i = min(S0.i, S1.i).
757 void
759 {
760 Wavefront *wf = gpuDynInst->wavefront();
761 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
762 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
763 VecOperandI32 vdst(gpuDynInst, instData.VDST);
764
765 src0.readSrc();
766 src1.readSrc();
767
771 assert(!(instData.ABS & 0x1));
772 assert(!(instData.ABS & 0x2));
773 assert(!(instData.ABS & 0x4));
774 assert(!(extData.NEG & 0x1));
775 assert(!(extData.NEG & 0x2));
776 assert(!(extData.NEG & 0x4));
777
778 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
779 if (wf->execMask(lane)) {
780 vdst[lane] = std::min(src0[lane], src1[lane]);
781 }
782 }
783
784 vdst.write();
785 } // execute
786 // --- Inst_VOP3__V_MAX_I32 class methods ---
787
789 : Inst_VOP3A(iFmt, "v_max_i32", false)
790 {
791 setFlag(ALU);
792 } // Inst_VOP3__V_MAX_I32
793
795 {
796 } // ~Inst_VOP3__V_MAX_I32
797
798 // --- description from .arch file ---
799 // D.i = max(S0.i, S1.i).
800 void
802 {
803 Wavefront *wf = gpuDynInst->wavefront();
804 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
805 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
806 VecOperandI32 vdst(gpuDynInst, instData.VDST);
807
808 src0.readSrc();
809 src1.readSrc();
810
814 assert(!(instData.ABS & 0x1));
815 assert(!(instData.ABS & 0x2));
816 assert(!(instData.ABS & 0x4));
817 assert(!(extData.NEG & 0x1));
818 assert(!(extData.NEG & 0x2));
819 assert(!(extData.NEG & 0x4));
820
821 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
822 if (wf->execMask(lane)) {
823 vdst[lane] = std::max(src0[lane], src1[lane]);
824 }
825 }
826
827 vdst.write();
828 } // execute
829 // --- Inst_VOP3__V_MIN_U32 class methods ---
830
832 : Inst_VOP3A(iFmt, "v_min_u32", false)
833 {
834 setFlag(ALU);
835 } // Inst_VOP3__V_MIN_U32
836
838 {
839 } // ~Inst_VOP3__V_MIN_U32
840
841 // --- description from .arch file ---
842 // D.u = min(S0.u, S1.u).
843 void
845 {
846 Wavefront *wf = gpuDynInst->wavefront();
847 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
848 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
849 VecOperandU32 vdst(gpuDynInst, instData.VDST);
850
851 src0.readSrc();
852 src1.readSrc();
853
857 assert(!(instData.ABS & 0x1));
858 assert(!(instData.ABS & 0x2));
859 assert(!(instData.ABS & 0x4));
860 assert(!(extData.NEG & 0x1));
861 assert(!(extData.NEG & 0x2));
862 assert(!(extData.NEG & 0x4));
863
864 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
865 if (wf->execMask(lane)) {
866 vdst[lane] = std::min(src0[lane], src1[lane]);
867 }
868 }
869
870 vdst.write();
871 } // execute
872 // --- Inst_VOP3__V_MAX_U32 class methods ---
873
875 : Inst_VOP3A(iFmt, "v_max_u32", false)
876 {
877 setFlag(ALU);
878 } // Inst_VOP3__V_MAX_U32
879
881 {
882 } // ~Inst_VOP3__V_MAX_U32
883
884 // --- description from .arch file ---
885 // D.u = max(S0.u, S1.u).
886 void
888 {
889 Wavefront *wf = gpuDynInst->wavefront();
890 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
891 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
892 VecOperandU32 vdst(gpuDynInst, instData.VDST);
893
894 src0.readSrc();
895 src1.readSrc();
896
900 assert(!(instData.ABS & 0x1));
901 assert(!(instData.ABS & 0x2));
902 assert(!(instData.ABS & 0x4));
903 assert(!(extData.NEG & 0x1));
904 assert(!(extData.NEG & 0x2));
905 assert(!(extData.NEG & 0x4));
906
907 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
908 if (wf->execMask(lane)) {
909 vdst[lane] = std::max(src0[lane], src1[lane]);
910 }
911 }
912
913 vdst.write();
914 } // execute
915 // --- Inst_VOP3__V_LSHRREV_B32 class methods ---
916
918 : Inst_VOP3A(iFmt, "v_lshrrev_b32", false)
919 {
920 setFlag(ALU);
921 } // Inst_VOP3__V_LSHRREV_B32
922
924 {
925 } // ~Inst_VOP3__V_LSHRREV_B32
926
927 // --- description from .arch file ---
928 // D.u = S1.u >> S0.u[4:0].
929 // The vacated bits are set to zero.
930 // SQ translates this to an internal SP opcode.
931 void
933 {
934 Wavefront *wf = gpuDynInst->wavefront();
935 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
936 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
937 VecOperandU32 vdst(gpuDynInst, instData.VDST);
938
939 src0.readSrc();
940 src1.readSrc();
941
945 assert(!(instData.ABS & 0x1));
946 assert(!(instData.ABS & 0x2));
947 assert(!(instData.ABS & 0x4));
948 assert(!(extData.NEG & 0x1));
949 assert(!(extData.NEG & 0x2));
950 assert(!(extData.NEG & 0x4));
951
952 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
953 if (wf->execMask(lane)) {
954 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
955 }
956 }
957
958 vdst.write();
959 } // execute
960 // --- Inst_VOP3__V_ASHRREV_I32 class methods ---
961
963 : Inst_VOP3A(iFmt, "v_ashrrev_i32", false)
964 {
965 setFlag(ALU);
966 } // Inst_VOP3__V_ASHRREV_I32
967
969 {
970 } // ~Inst_VOP3__V_ASHRREV_I32
971
972 // --- description from .arch file ---
973 // D.i = signext(S1.i) >> S0.i[4:0].
974 // The vacated bits are set to the sign bit of the input value.
975 // SQ translates this to an internal SP opcode.
976 void
978 {
979 Wavefront *wf = gpuDynInst->wavefront();
980 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
981 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
982 VecOperandI32 vdst(gpuDynInst, instData.VDST);
983
984 src0.readSrc();
985 src1.readSrc();
986
990 assert(!(instData.ABS & 0x1));
991 assert(!(instData.ABS & 0x2));
992 assert(!(instData.ABS & 0x4));
993 assert(!(extData.NEG & 0x1));
994 assert(!(extData.NEG & 0x2));
995 assert(!(extData.NEG & 0x4));
996
997 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
998 if (wf->execMask(lane)) {
999 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
1000 }
1001 }
1002
1003 vdst.write();
1004 } // execute
1005 // --- Inst_VOP3__V_LSHLREV_B32 class methods ---
1006
1008 : Inst_VOP3A(iFmt, "v_lshlrev_b32", false)
1009 {
1010 setFlag(ALU);
1011 } // Inst_VOP3__V_LSHLREV_B32
1012
1014 {
1015 } // ~Inst_VOP3__V_LSHLREV_B32
1016
1017 // --- description from .arch file ---
1018 // D.u = S1.u << S0.u[4:0].
1019 // SQ translates this to an internal SP opcode.
1020 void
1022 {
1023 Wavefront *wf = gpuDynInst->wavefront();
1024 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1025 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1026 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1027
1028 src0.readSrc();
1029 src1.readSrc();
1030
1034 assert(!(instData.ABS & 0x1));
1035 assert(!(instData.ABS & 0x2));
1036 assert(!(instData.ABS & 0x4));
1037 assert(!(extData.NEG & 0x1));
1038 assert(!(extData.NEG & 0x2));
1039 assert(!(extData.NEG & 0x4));
1040
1041 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1042 if (wf->execMask(lane)) {
1043 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
1044 }
1045 }
1046
1047 vdst.write();
1048 } // execute
1049 // --- Inst_VOP3__V_AND_B32 class methods ---
1050
1052 : Inst_VOP3A(iFmt, "v_and_b32", false)
1053 {
1054 setFlag(ALU);
1055 } // Inst_VOP3__V_AND_B32
1056
1058 {
1059 } // ~Inst_VOP3__V_AND_B32
1060
1061 // --- description from .arch file ---
1062 // D.u = S0.u & S1.u.
1063 // Input and output modifiers not supported.
1064 void
1066 {
1067 Wavefront *wf = gpuDynInst->wavefront();
1068 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1069 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1070 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1071
1072 src0.readSrc();
1073 src1.readSrc();
1074
1078 assert(!(instData.ABS & 0x1));
1079 assert(!(instData.ABS & 0x2));
1080 assert(!(instData.ABS & 0x4));
1081 assert(!(extData.NEG & 0x1));
1082 assert(!(extData.NEG & 0x2));
1083 assert(!(extData.NEG & 0x4));
1084
1085 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1086 if (wf->execMask(lane)) {
1087 vdst[lane] = src0[lane] & src1[lane];
1088 }
1089 }
1090
1091 vdst.write();
1092 } // execute
1093 // --- Inst_VOP3__V_OR_B32 class methods ---
1094
1096 : Inst_VOP3A(iFmt, "v_or_b32", false)
1097 {
1098 setFlag(ALU);
1099 } // Inst_VOP3__V_OR_B32
1100
1102 {
1103 } // ~Inst_VOP3__V_OR_B32
1104
1105 // --- description from .arch file ---
1106 // D.u = S0.u | S1.u.
1107 // Input and output modifiers not supported.
1108 void
1110 {
1111 Wavefront *wf = gpuDynInst->wavefront();
1112 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1113 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1114 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1115
1116 src0.readSrc();
1117 src1.readSrc();
1118
1122 assert(!(instData.ABS & 0x1));
1123 assert(!(instData.ABS & 0x2));
1124 assert(!(instData.ABS & 0x4));
1125 assert(!(extData.NEG & 0x1));
1126 assert(!(extData.NEG & 0x2));
1127 assert(!(extData.NEG & 0x4));
1128
1129 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1130 if (wf->execMask(lane)) {
1131 vdst[lane] = src0[lane] | src1[lane];
1132 }
1133 }
1134
1135 vdst.write();
1136 } // execute
1137 // --- Inst_VOP3__V_OR3_B32 class methods ---
1138
1140 : Inst_VOP3A(iFmt, "v_or3_b32", false)
1141 {
1142 setFlag(ALU);
1143 } // Inst_VOP3__V_OR3_B32
1144
1146 {
1147 } // ~Inst_VOP3__V_OR3_B32
1148
1149 // --- description from .arch file ---
1150 // D.u = S0.u | S1.u | S2.u.
1151 // Input and output modifiers not supported.
1152 void
1154 {
1155 Wavefront *wf = gpuDynInst->wavefront();
1156 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1157 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1158 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
1159 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1160
1161 src0.readSrc();
1162 src1.readSrc();
1163 src2.readSrc();
1164
1168 assert(!(instData.ABS & 0x1));
1169 assert(!(instData.ABS & 0x2));
1170 assert(!(instData.ABS & 0x4));
1171 assert(!(extData.NEG & 0x1));
1172 assert(!(extData.NEG & 0x2));
1173 assert(!(extData.NEG & 0x4));
1174
1175 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1176 if (wf->execMask(lane)) {
1177 vdst[lane] = src0[lane] | src1[lane] | src2[lane];
1178 }
1179 }
1180
1181 vdst.write();
1182 } // execute
1183 // --- Inst_VOP3__V_XOR_B32 class methods ---
1184
1186 : Inst_VOP3A(iFmt, "v_xor_b32", false)
1187 {
1188 setFlag(ALU);
1189 } // Inst_VOP3__V_XOR_B32
1190
1192 {
1193 } // ~Inst_VOP3__V_XOR_B32
1194
1195 // --- description from .arch file ---
1196 // D.u = S0.u ^ S1.u.
1197 // Input and output modifiers not supported.
1198 void
1200 {
1201 Wavefront *wf = gpuDynInst->wavefront();
1202 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1203 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1204 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1205
1206 src0.readSrc();
1207 src1.readSrc();
1208
1212 assert(!(instData.ABS & 0x1));
1213 assert(!(instData.ABS & 0x2));
1214 assert(!(instData.ABS & 0x4));
1215 assert(!(extData.NEG & 0x1));
1216 assert(!(extData.NEG & 0x2));
1217 assert(!(extData.NEG & 0x4));
1218
1219 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1220 if (wf->execMask(lane)) {
1221 vdst[lane] = src0[lane] ^ src1[lane];
1222 }
1223 }
1224
1225 vdst.write();
1226 } // execute
1227 // --- Inst_VOP3__V_MAC_F32 class methods ---
1228
1230 : Inst_VOP3A(iFmt, "v_mac_f32", false)
1231 {
1232 setFlag(ALU);
1233 setFlag(F32);
1234 setFlag(MAC);
1235 } // Inst_VOP3__V_MAC_F32
1236
1238 {
1239 } // ~Inst_VOP3__V_MAC_F32
1240
1241 // --- description from .arch file ---
1242 // D.f = S0.f * S1.f + D.f.
1243 // SQ translates to V_MAD_F32.
1244 void
1246 {
1247 Wavefront *wf = gpuDynInst->wavefront();
1248 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1249 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1250 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1251
1252 src0.readSrc();
1253 src1.readSrc();
1254 vdst.read();
1255
1256 if (instData.ABS & 0x1) {
1257 src0.absModifier();
1258 }
1259
1260 if (instData.ABS & 0x2) {
1261 src1.absModifier();
1262 }
1263
1264 if (extData.NEG & 0x1) {
1265 src0.negModifier();
1266 }
1267
1268 if (extData.NEG & 0x2) {
1269 src1.negModifier();
1270 }
1271
1275 assert(!(instData.ABS & 0x4));
1276 assert(!(extData.NEG & 0x4));
1277
1278 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1279 if (wf->execMask(lane)) {
1280 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1281 }
1282 }
1283
1284 vdst.write();
1285 } // execute
1286 // --- Inst_VOP3__V_ADD_CO_U32 class methods ---
1287
1289 : Inst_VOP3B(iFmt, "v_add_co_u32")
1290 {
1291 setFlag(ALU);
1292 setFlag(WritesVCC);
1293 } // Inst_VOP3__V_ADD_CO_U32
1294
1296 {
1297 } // ~Inst_VOP3__V_ADD_CO_U32
1298
1299 // --- description from .arch file ---
1300 // D.u = S0.u + S1.u;
1301 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
1302 // --- overflow or carry-out for V_ADDC_U32.
1303 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1304 void
1306 {
1307 Wavefront *wf = gpuDynInst->wavefront();
1308 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1309 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1310 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1311 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1312
1313 src0.readSrc();
1314 src1.readSrc();
1315
1319 assert(!(extData.NEG & 0x1));
1320 assert(!(extData.NEG & 0x2));
1321 assert(!(extData.NEG & 0x4));
1322
1323 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1324 if (wf->execMask(lane)) {
1325 vdst[lane] = src0[lane] + src1[lane];
1326 vcc.setBit(lane, ((VecElemU64)src0[lane]
1327 + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
1328 }
1329 }
1330
1331 vdst.write();
1332 vcc.write();
1333 } // execute
1334 // --- Inst_VOP3__V_SUB_CO_U32 class methods ---
1335
1337 : Inst_VOP3B(iFmt, "v_sub_co_u32")
1338 {
1339 setFlag(ALU);
1340 setFlag(WritesVCC);
1341 } // Inst_VOP3__V_SUB_CO_U32
1342
1344 {
1345 } // ~Inst_VOP3__V_SUB_CO_U32
1346
1347 // --- description from .arch file ---
1348 // D.u = S0.u - S1.u;
1349 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
1350 // carry-out for V_SUBB_U32.
1351 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1352 void
1354 {
1355 Wavefront *wf = gpuDynInst->wavefront();
1356 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1357 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1358 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1359 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1360
1361 src0.readSrc();
1362 src1.readSrc();
1363
1367 assert(!(extData.NEG & 0x1));
1368 assert(!(extData.NEG & 0x2));
1369 assert(!(extData.NEG & 0x4));
1370
1371 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1372 if (wf->execMask(lane)) {
1373 vdst[lane] = src0[lane] - src1[lane];
1374 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1375 }
1376 }
1377
1378 vdst.write();
1379 vcc.write();
1380 } // execute
1381 // --- Inst_VOP3__V_SUBREV_CO_U32 class methods ---
1382
1384 InFmt_VOP3B *iFmt)
1385 : Inst_VOP3B(iFmt, "v_subrev_co_u32")
1386 {
1387 setFlag(ALU);
1388 setFlag(WritesVCC);
1389 } // Inst_VOP3__V_SUBREV_CO_U32
1390
1392 {
1393 } // ~Inst_VOP3__V_SUBREV_CO_U32
1394
1395 // --- description from .arch file ---
1396 // D.u = S1.u - S0.u;
1397 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
1398 // carry-out for V_SUBB_U32.
1399 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1400 // SQ translates this to V_SUB_U32 with reversed operands.
1401 void
1403 {
1404 Wavefront *wf = gpuDynInst->wavefront();
1405 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1406 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1407 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1408 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1409
1410 src0.readSrc();
1411 src1.readSrc();
1412
1416 assert(!(extData.NEG & 0x1));
1417 assert(!(extData.NEG & 0x2));
1418 assert(!(extData.NEG & 0x4));
1419
1420 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1421 if (wf->execMask(lane)) {
1422 vdst[lane] = src1[lane] - src0[lane];
1423 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1424 }
1425 }
1426
1427 vdst.write();
1428 vcc.write();
1429 } // execute
1430 // --- Inst_VOP3__V_ADDC_CO_U32 class methods ---
1431
1433 : Inst_VOP3B(iFmt, "v_addc_co_u32")
1434 {
1435 setFlag(ALU);
1436 setFlag(WritesVCC);
1437 setFlag(ReadsVCC);
1438 } // Inst_VOP3__V_ADDC_CO_U32
1439
1441 {
1442 } // ~Inst_VOP3__V_ADDC_CO_U32
1443
1444 // --- description from .arch file ---
1445 // D.u = S0.u + S1.u + VCC[threadId];
1446 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)
1447 // is an UNSIGNED overflow.
1448 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1449 // source comes from the SGPR-pair at S2.u.
1450 void
1452 {
1453 Wavefront *wf = gpuDynInst->wavefront();
1454 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1455 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1456 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1457 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1458 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1459
1460 src0.readSrc();
1461 src1.readSrc();
1462 vcc.read();
1463
1467 assert(!(extData.NEG & 0x1));
1468 assert(!(extData.NEG & 0x2));
1469 assert(!(extData.NEG & 0x4));
1470
1471 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1472 if (wf->execMask(lane)) {
1473 vdst[lane] = src0[lane] + src1[lane]
1474 + bits(vcc.rawData(), lane);
1475 sdst.setBit(lane, ((VecElemU64)src0[lane]
1476 + (VecElemU64)src1[lane]
1477 + (VecElemU64)bits(vcc.rawData(), lane))
1478 >= 0x100000000 ? 1 : 0);
1479 }
1480 }
1481
1482 vdst.write();
1483 sdst.write();
1484 } // execute
1485 // --- Inst_VOP3__V_SUBB_CO_U32 class methods ---
1486
1488 : Inst_VOP3B(iFmt, "v_subb_co_u32")
1489 {
1490 setFlag(ALU);
1491 setFlag(WritesVCC);
1492 setFlag(ReadsVCC);
1493 } // Inst_VOP3__V_SUBB_CO_U32
1494
1496 {
1497 } // ~Inst_VOP3__V_SUBB_CO_U32
1498
1499 // --- description from .arch file ---
1500 // D.u = S0.u - S1.u - VCC[threadId];
1501 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1502 // --- overflow.
1503 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1504 // --- source comes from the SGPR-pair at S2.u.
1505 void
1507 {
1508 Wavefront *wf = gpuDynInst->wavefront();
1509 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1510 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1511 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1512 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1513 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1514
1515 src0.readSrc();
1516 src1.readSrc();
1517 vcc.read();
1518
1522 assert(!(extData.NEG & 0x1));
1523 assert(!(extData.NEG & 0x2));
1524 assert(!(extData.NEG & 0x4));
1525
1526 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1527 if (wf->execMask(lane)) {
1528 vdst[lane] = src0[lane] - src1[lane]
1529 - bits(vcc.rawData(), lane);
1530 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
1531 > src0[lane] ? 1 : 0);
1532 }
1533 }
1534
1535 vdst.write();
1536 sdst.write();
1537 } // execute
1538 // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods ---
1539
1541 InFmt_VOP3B *iFmt)
1542 : Inst_VOP3B(iFmt, "v_subbrev_co_u32")
1543 {
1544 setFlag(ALU);
1545 setFlag(WritesVCC);
1546 setFlag(ReadsVCC);
1547 } // Inst_VOP3__V_SUBBREV_CO_U32
1548
1550 {
1551 } // ~Inst_VOP3__V_SUBBREV_CO_U32
1552
1553 // --- description from .arch file ---
1554 // D.u = S1.u - S0.u - VCC[threadId];
1555 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1556 // overflow.
1557 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1558 // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
1559 void
1561 {
1562 Wavefront *wf = gpuDynInst->wavefront();
1563 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1564 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1565 ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1566 ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1567 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1568
1569 src0.readSrc();
1570 src1.readSrc();
1571 vcc.read();
1572
1576 assert(!(extData.NEG & 0x1));
1577 assert(!(extData.NEG & 0x2));
1578 assert(!(extData.NEG & 0x4));
1579
1580 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1581 if (wf->execMask(lane)) {
1582 vdst[lane] = src1[lane] - src0[lane]
1583 - bits(vcc.rawData(), lane);
1584 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
1585 > src0[lane] ? 1 : 0);
1586 }
1587 }
1588
1589 vdst.write();
1590 sdst.write();
1591 } // execute
1592 // --- Inst_VOP3__V_ADD_F16 class methods ---
1593
1595 : Inst_VOP3A(iFmt, "v_add_f16", false)
1596 {
1597 setFlag(ALU);
1598 setFlag(F16);
1599 } // Inst_VOP3__V_ADD_F16
1600
1602 {
1603 } // ~Inst_VOP3__V_ADD_F16
1604
1605 // --- description from .arch file ---
1606 // D.f16 = S0.f16 + S1.f16.
1607 // Supports denormals, round mode, exception flags, saturation.
1608 void
1610 {
1612 } // execute
1613 // --- Inst_VOP3__V_SUB_F16 class methods ---
1614
1616 : Inst_VOP3A(iFmt, "v_sub_f16", false)
1617 {
1618 setFlag(ALU);
1619 setFlag(F16);
1620 } // Inst_VOP3__V_SUB_F16
1621
1623 {
1624 } // ~Inst_VOP3__V_SUB_F16
1625
1626 // --- description from .arch file ---
1627 // D.f16 = S0.f16 - S1.f16.
1628 // Supports denormals, round mode, exception flags, saturation.
1629 // SQ translates to V_ADD_F16.
1630 void
1632 {
1634 } // execute
1635 // --- Inst_VOP3__V_SUBREV_F16 class methods ---
1636
1638 : Inst_VOP3A(iFmt, "v_subrev_f16", false)
1639 {
1640 setFlag(ALU);
1641 setFlag(F16);
1642 } // Inst_VOP3__V_SUBREV_F16
1643
1645 {
1646 } // ~Inst_VOP3__V_SUBREV_F16
1647
1648 // --- description from .arch file ---
1649 // D.f16 = S1.f16 - S0.f16.
1650 // Supports denormals, round mode, exception flags, saturation.
1651 // SQ translates to V_ADD_F16.
1652 void
1654 {
1656 } // execute
1657 // --- Inst_VOP3__V_MUL_F16 class methods ---
1658
1660 : Inst_VOP3A(iFmt, "v_mul_f16", false)
1661 {
1662 setFlag(ALU);
1663 setFlag(F16);
1664 } // Inst_VOP3__V_MUL_F16
1665
1667 {
1668 } // ~Inst_VOP3__V_MUL_F16
1669
1670 // --- description from .arch file ---
1671 // D.f16 = S0.f16 * S1.f16.
1672 // Supports denormals, round mode, exception flags, saturation.
1673 void
1675 {
1677 } // execute
1678 // --- Inst_VOP3__V_MAC_F16 class methods ---
1679
1681 : Inst_VOP3A(iFmt, "v_mac_f16", false)
1682 {
1683 setFlag(ALU);
1684 setFlag(F16);
1685 setFlag(MAC);
1686 } // Inst_VOP3__V_MAC_F16
1687
1689 {
1690 } // ~Inst_VOP3__V_MAC_F16
1691
1692 // --- description from .arch file ---
1693 // D.f16 = S0.f16 * S1.f16 + D.f16.
1694 // Supports round mode, exception flags, saturation.
1695 // SQ translates this to V_MAD_F16.
1696 void
1698 {
1700 } // execute
1701 // --- Inst_VOP3__V_ADD_U16 class methods ---
1702
1704 : Inst_VOP3A(iFmt, "v_add_u16", false)
1705 {
1706 setFlag(ALU);
1707 } // Inst_VOP3__V_ADD_U16
1708
1710 {
1711 } // ~Inst_VOP3__V_ADD_U16
1712
1713 // --- description from .arch file ---
1714 // D.u16 = S0.u16 + S1.u16.
1715 // Supports saturation (unsigned 16-bit integer domain).
1716 void
1718 {
1719 Wavefront *wf = gpuDynInst->wavefront();
1720 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1721 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1722 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1723
1724 src0.readSrc();
1725 src1.readSrc();
1726
1730 assert(!(instData.ABS & 0x1));
1731 assert(!(instData.ABS & 0x2));
1732 assert(!(instData.ABS & 0x4));
1733 assert(!(extData.NEG & 0x1));
1734 assert(!(extData.NEG & 0x2));
1735 assert(!(extData.NEG & 0x4));
1736
1737 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1738 if (wf->execMask(lane)) {
1739 vdst[lane] = src0[lane] + src1[lane];
1740 }
1741 }
1742
1743 vdst.write();
1744 } // execute
1745 // --- Inst_VOP3__V_SUB_U16 class methods ---
1746
1748 : Inst_VOP3A(iFmt, "v_sub_u16", false)
1749 {
1750 setFlag(ALU);
1751 } // Inst_VOP3__V_SUB_U16
1752
1754 {
1755 } // ~Inst_VOP3__V_SUB_U16
1756
1757 // --- description from .arch file ---
1758 // D.u16 = S0.u16 - S1.u16.
1759 // Supports saturation (unsigned 16-bit integer domain).
1760 void
1762 {
1763 Wavefront *wf = gpuDynInst->wavefront();
1764 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1765 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1766 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1767
1768 src0.readSrc();
1769 src1.readSrc();
1770
1774 assert(!(instData.ABS & 0x1));
1775 assert(!(instData.ABS & 0x2));
1776 assert(!(instData.ABS & 0x4));
1777 assert(!(extData.NEG & 0x1));
1778 assert(!(extData.NEG & 0x2));
1779 assert(!(extData.NEG & 0x4));
1780
1781 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1782 if (wf->execMask(lane)) {
1783 vdst[lane] = src0[lane] - src1[lane];
1784 }
1785 }
1786
1787 vdst.write();
1788 } // execute
1789 // --- Inst_VOP3__V_SUBREV_U16 class methods ---
1790
1792 : Inst_VOP3A(iFmt, "v_subrev_u16", false)
1793 {
1794 setFlag(ALU);
1795 } // Inst_VOP3__V_SUBREV_U16
1796
1798 {
1799 } // ~Inst_VOP3__V_SUBREV_U16
1800
1801 // --- description from .arch file ---
1802 // D.u16 = S1.u16 - S0.u16.
1803 // Supports saturation (unsigned 16-bit integer domain).
1804 // SQ translates this to V_SUB_U16 with reversed operands.
1805 void
1807 {
1808 Wavefront *wf = gpuDynInst->wavefront();
1809 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1810 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1811 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1812
1813 src0.readSrc();
1814 src1.readSrc();
1815
1819 assert(!(instData.ABS & 0x1));
1820 assert(!(instData.ABS & 0x2));
1821 assert(!(instData.ABS & 0x4));
1822 assert(!(extData.NEG & 0x1));
1823 assert(!(extData.NEG & 0x2));
1824 assert(!(extData.NEG & 0x4));
1825
1826 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1827 if (wf->execMask(lane)) {
1828 vdst[lane] = src1[lane] - src0[lane];
1829 }
1830 }
1831
1832 vdst.write();
1833 } // execute
1834 // --- Inst_VOP3__V_MUL_LO_U16 class methods ---
1835
1837 : Inst_VOP3A(iFmt, "v_mul_lo_u16", false)
1838 {
1839 setFlag(ALU);
1840 } // Inst_VOP3__V_MUL_LO_U16
1841
1843 {
1844 } // ~Inst_VOP3__V_MUL_LO_U16
1845
1846 // --- description from .arch file ---
1847 // D.u16 = S0.u16 * S1.u16.
1848 // Supports saturation (unsigned 16-bit integer domain).
1849 void
1851 {
1852 Wavefront *wf = gpuDynInst->wavefront();
1853 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1854 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1855 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1856
1857 src0.readSrc();
1858 src1.readSrc();
1859
1863 assert(!(instData.ABS & 0x1));
1864 assert(!(instData.ABS & 0x2));
1865 assert(!(instData.ABS & 0x4));
1866 assert(!(extData.NEG & 0x1));
1867 assert(!(extData.NEG & 0x2));
1868 assert(!(extData.NEG & 0x4));
1869
1870 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1871 if (wf->execMask(lane)) {
1872 vdst[lane] = src0[lane] * src1[lane];
1873 }
1874 }
1875
1876 vdst.write();
1877 } // execute
1878 // --- Inst_VOP3__V_LSHLREV_B16 class methods ---
1879
1881 : Inst_VOP3A(iFmt, "v_lshlrev_b16", false)
1882 {
1883 setFlag(ALU);
1884 } // Inst_VOP3__V_LSHLREV_B16
1885
1887 {
1888 } // ~Inst_VOP3__V_LSHLREV_B16
1889
1890 // --- description from .arch file ---
1891 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
1892 // SQ translates this to an internal SP opcode.
1893 void
1895 {
1896 Wavefront *wf = gpuDynInst->wavefront();
1897 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1898 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1899 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1900
1901 src0.readSrc();
1902 src1.readSrc();
1903
1907 assert(!(instData.ABS & 0x1));
1908 assert(!(instData.ABS & 0x2));
1909 assert(!(instData.ABS & 0x4));
1910 assert(!(extData.NEG & 0x1));
1911 assert(!(extData.NEG & 0x2));
1912 assert(!(extData.NEG & 0x4));
1913
1914 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1915 if (wf->execMask(lane)) {
1916 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
1917 }
1918 }
1919
1920 vdst.write();
1921 } // execute
1922 // --- Inst_VOP3__V_LSHRREV_B16 class methods ---
1923
1925 : Inst_VOP3A(iFmt, "v_lshrrev_b16", false)
1926 {
1927 setFlag(ALU);
1928 } // Inst_VOP3__V_LSHRREV_B16
1929
1931 {
1932 } // ~Inst_VOP3__V_LSHRREV_B16
1933
1934 // --- description from .arch file ---
1935 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
1936 // The vacated bits are set to zero.
1937 // SQ translates this to an internal SP opcode.
1938 void
1940 {
1941 Wavefront *wf = gpuDynInst->wavefront();
1942 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1943 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1944 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1945
1946 src0.readSrc();
1947 src1.readSrc();
1948
1949 if (instData.ABS & 0x1) {
1950 src0.absModifier();
1951 }
1952
1953 if (instData.ABS & 0x2) {
1954 src1.absModifier();
1955 }
1956
1957 if (extData.NEG & 0x1) {
1958 src0.negModifier();
1959 }
1960
1961 if (extData.NEG & 0x2) {
1962 src1.negModifier();
1963 }
1964
1965 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1966 if (wf->execMask(lane)) {
1967 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
1968 }
1969 }
1970
1971 vdst.write();
1972 } // execute
1973 // --- Inst_VOP3__V_ASHRREV_I16 class methods ---
1974
1976 : Inst_VOP3A(iFmt, "v_ashrrev_i16", false)
1977 {
1978 setFlag(ALU);
1979 } // Inst_VOP3__V_ASHRREV_I16
1980
1982 {
1983 } // ~Inst_VOP3__V_ASHRREV_I16
1984
1985 // --- description from .arch file ---
1986 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
1987 // The vacated bits are set to the sign bit of the input value.
1988 // SQ translates this to an internal SP opcode.
1989 void
1991 {
1992 Wavefront *wf = gpuDynInst->wavefront();
1993 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1994 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
1995 VecOperandI16 vdst(gpuDynInst, instData.VDST);
1996
1997 src0.readSrc();
1998 src1.readSrc();
1999
2003 assert(!(instData.ABS & 0x1));
2004 assert(!(instData.ABS & 0x2));
2005 assert(!(instData.ABS & 0x4));
2006 assert(!(extData.NEG & 0x1));
2007 assert(!(extData.NEG & 0x2));
2008 assert(!(extData.NEG & 0x4));
2009
2010 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2011 if (wf->execMask(lane)) {
2012 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
2013 }
2014 }
2015
2016 vdst.write();
2017 } // execute
2018 // --- Inst_VOP3__V_MAX_F16 class methods ---
2019
2021 : Inst_VOP3A(iFmt, "v_max_f16", false)
2022 {
2023 setFlag(ALU);
2024 setFlag(F16);
2025 } // Inst_VOP3__V_MAX_F16
2026
2028 {
2029 } // ~Inst_VOP3__V_MAX_F16
2030
2031 // --- description from .arch file ---
2032 // D.f16 = max(S0.f16, S1.f16).
2033 // IEEE compliant. Supports denormals, round mode, exception flags,
2034 // saturation.
2035 void
2037 {
2039 } // execute
2040 // --- Inst_VOP3__V_MIN_F16 class methods ---
2041
2043 : Inst_VOP3A(iFmt, "v_min_f16", false)
2044 {
2045 setFlag(ALU);
2046 setFlag(F16);
2047 } // Inst_VOP3__V_MIN_F16
2048
2050 {
2051 } // ~Inst_VOP3__V_MIN_F16
2052
2053 // --- description from .arch file ---
2054 // D.f16 = min(S0.f16, S1.f16).
2055 // IEEE compliant. Supports denormals, round mode, exception flags,
2056 // saturation.
2057 void
2059 {
2061 } // execute
2062 // --- Inst_VOP3__V_MAX_U16 class methods ---
2063
2065 : Inst_VOP3A(iFmt, "v_max_u16", false)
2066 {
2067 setFlag(ALU);
2068 } // Inst_VOP3__V_MAX_U16
2069
2071 {
2072 } // ~Inst_VOP3__V_MAX_U16
2073
2074 // --- description from .arch file ---
2075 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
2076 void
2078 {
2079 Wavefront *wf = gpuDynInst->wavefront();
2080 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
2081 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
2082 VecOperandU16 vdst(gpuDynInst, instData.VDST);
2083
2084 src0.readSrc();
2085 src1.readSrc();
2086
2087 if (instData.ABS & 0x1) {
2088 src0.absModifier();
2089 }
2090
2091 if (instData.ABS & 0x2) {
2092 src1.absModifier();
2093 }
2094
2095 if (extData.NEG & 0x1) {
2096 src0.negModifier();
2097 }
2098
2099 if (extData.NEG & 0x2) {
2100 src1.negModifier();
2101 }
2102
2103 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2104 if (wf->execMask(lane)) {
2105 vdst[lane] = std::max(src0[lane], src1[lane]);
2106 }
2107 }
2108
2109 vdst.write();
2110 } // execute
2111 // --- Inst_VOP3__V_MAX_I16 class methods ---
2112
2114 : Inst_VOP3A(iFmt, "v_max_i16", false)
2115 {
2116 setFlag(ALU);
2117 } // Inst_VOP3__V_MAX_I16
2118
2120 {
2121 } // ~Inst_VOP3__V_MAX_I16
2122
2123 // --- description from .arch file ---
2124 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
2125 void
2127 {
2128 Wavefront *wf = gpuDynInst->wavefront();
2129 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
2130 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
2131 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2132
2133 src0.readSrc();
2134 src1.readSrc();
2135
2136 if (instData.ABS & 0x1) {
2137 src0.absModifier();
2138 }
2139
2140 if (instData.ABS & 0x2) {
2141 src1.absModifier();
2142 }
2143
2144 if (extData.NEG & 0x1) {
2145 src0.negModifier();
2146 }
2147
2148 if (extData.NEG & 0x2) {
2149 src1.negModifier();
2150 }
2151
2152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2153 if (wf->execMask(lane)) {
2154 vdst[lane] = std::max(src0[lane], src1[lane]);
2155 }
2156 }
2157
2158 vdst.write();
2159 } // execute
2160 // --- Inst_VOP3__V_MIN_U16 class methods ---
2161
2163 : Inst_VOP3A(iFmt, "v_min_u16", false)
2164 {
2165 setFlag(ALU);
2166 } // Inst_VOP3__V_MIN_U16
2167
2169 {
2170 } // ~Inst_VOP3__V_MIN_U16
2171
2172 // --- description from .arch file ---
2173 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
2174 void
2176 {
2177 Wavefront *wf = gpuDynInst->wavefront();
2178 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
2179 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
2180 VecOperandU16 vdst(gpuDynInst, instData.VDST);
2181
2182 src0.readSrc();
2183 src1.readSrc();
2184
2185 if (instData.ABS & 0x1) {
2186 src0.absModifier();
2187 }
2188
2189 if (instData.ABS & 0x2) {
2190 src1.absModifier();
2191 }
2192
2193 if (extData.NEG & 0x1) {
2194 src0.negModifier();
2195 }
2196
2197 if (extData.NEG & 0x2) {
2198 src1.negModifier();
2199 }
2200
2201 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2202 if (wf->execMask(lane)) {
2203 vdst[lane] = std::min(src0[lane], src1[lane]);
2204 }
2205 }
2206
2207 vdst.write();
2208 } // execute
2209 // --- Inst_VOP3__V_MIN_I16 class methods ---
2210
2212 : Inst_VOP3A(iFmt, "v_min_i16", false)
2213 {
2214 setFlag(ALU);
2215 } // Inst_VOP3__V_MIN_I16
2216
2218 {
2219 } // ~Inst_VOP3__V_MIN_I16
2220
2221 // --- description from .arch file ---
2222 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
2223 void
2225 {
2226 Wavefront *wf = gpuDynInst->wavefront();
2227 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
2228 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
2229 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2230
2231 src0.readSrc();
2232 src1.readSrc();
2233
2234 if (instData.ABS & 0x1) {
2235 src0.absModifier();
2236 }
2237
2238 if (instData.ABS & 0x2) {
2239 src1.absModifier();
2240 }
2241
2242 if (extData.NEG & 0x1) {
2243 src0.negModifier();
2244 }
2245
2246 if (extData.NEG & 0x2) {
2247 src1.negModifier();
2248 }
2249
2250 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2251 if (wf->execMask(lane)) {
2252 vdst[lane] = std::min(src0[lane], src1[lane]);
2253 }
2254 }
2255
2256 vdst.write();
2257 } // execute
2258 // --- Inst_VOP3__V_LDEXP_F16 class methods ---
2259
2261 : Inst_VOP3A(iFmt, "v_ldexp_f16", false)
2262 {
2263 setFlag(ALU);
2264 setFlag(F16);
2265 } // Inst_VOP3__V_LDEXP_F16
2266
2268 {
2269 } // ~Inst_VOP3__V_LDEXP_F16
2270
2271 // --- description from .arch file ---
2272 // D.f16 = S0.f16 * (2 ** S1.i16).
2273 void
2275 {
2277 } // execute
2278 // --- Inst_VOP3__V_ADD_U32 class methods ---
2279
2281 : Inst_VOP3A(iFmt, "v_add_u32", false)
2282 {
2283 setFlag(ALU);
2284 } // Inst_VOP3__V_ADD_U32
2285
2287 {
2288 } // ~Inst_VOP3__V_ADD_U32
2289
2290 // --- description from .arch file ---
2291 // D.u32 = S0.u32 + S1.u32.
2292 void
2294 {
2295 Wavefront *wf = gpuDynInst->wavefront();
2296 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2297 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2298 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2299
2300 src0.readSrc();
2301 src1.readSrc();
2302
2306 assert(!(instData.ABS & 0x1));
2307 assert(!(instData.ABS & 0x2));
2308 assert(!(instData.ABS & 0x4));
2309 assert(!(extData.NEG & 0x1));
2310 assert(!(extData.NEG & 0x2));
2311 assert(!(extData.NEG & 0x4));
2312
2313 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2314 if (wf->execMask(lane)) {
2315 vdst[lane] = src0[lane] + src1[lane];
2316 }
2317 }
2318
2319 vdst.write();
2320 } // execute
2321 // --- Inst_VOP3__V_SUB_U32 class methods ---
2322
2324 : Inst_VOP3A(iFmt, "v_sub_u32", false)
2325 {
2326 setFlag(ALU);
2327 } // Inst_VOP3__V_SUB_U32
2328
2330 {
2331 } // ~Inst_VOP3__V_SUB_U32
2332
2333 // --- description from .arch file ---
2334 // D.u32 = S0.u32 - S1.u32.
2335 void
2337 {
2338 Wavefront *wf = gpuDynInst->wavefront();
2339 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2340 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2341 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2342
2343 src0.readSrc();
2344 src1.readSrc();
2345
2349 assert(!(instData.ABS & 0x1));
2350 assert(!(instData.ABS & 0x2));
2351 assert(!(instData.ABS & 0x4));
2352 assert(!(extData.NEG & 0x1));
2353 assert(!(extData.NEG & 0x2));
2354 assert(!(extData.NEG & 0x4));
2355
2356 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2357 if (wf->execMask(lane)) {
2358 vdst[lane] = src0[lane] - src1[lane];
2359 }
2360 }
2361
2362 vdst.write();
2363 } // execute
2364 // --- Inst_VOP3__V_SUBREV_U32 class methods ---
2365
2367 : Inst_VOP3A(iFmt, "v_subrev_u32", false)
2368 {
2369 setFlag(ALU);
2370 } // Inst_VOP3__V_SUBREV_U32
2371
2373 {
2374 } // ~Inst_VOP3__V_SUBREV_U32
2375
2376 // --- description from .arch file ---
2377 // D.u32 = S1.u32 - S0.u32.
2378 void
2380 {
2381 Wavefront *wf = gpuDynInst->wavefront();
2382 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2383 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2384 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2385
2386 src0.readSrc();
2387 src1.readSrc();
2388
2392 assert(!(instData.ABS & 0x1));
2393 assert(!(instData.ABS & 0x2));
2394 assert(!(instData.ABS & 0x4));
2395 assert(!(extData.NEG & 0x1));
2396 assert(!(extData.NEG & 0x2));
2397 assert(!(extData.NEG & 0x4));
2398
2399 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2400 if (wf->execMask(lane)) {
2401 vdst[lane] = src1[lane] - src0[lane];
2402 }
2403 }
2404
2405 vdst.write();
2406 } // execute
2407 // --- Inst_VOP3__V_FMAC_F32 class methods ---
2408
2410 : Inst_VOP3A(iFmt, "v_fmac_f32", false)
2411 {
2412 setFlag(ALU);
2413 setFlag(F32);
2414 setFlag(FMA);
2415 } // Inst_VOP3__V_FMAC_F32
2416
2418 {
2419 } // ~Inst_VOP3__V_FMAC_F32
2420
2421 // --- description from .arch file ---
2422 // D.f = S0.f * S1.f + D.f.
2423 void
2425 {
2426 Wavefront *wf = gpuDynInst->wavefront();
2427 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2428 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2429 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2430
2431 src0.readSrc();
2432 src1.readSrc();
2433 vdst.read();
2434
2435 panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
2436 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2437 panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode);
2438
2439 if (instData.ABS & 0x1) {
2440 src0.absModifier();
2441 }
2442
2443 if (instData.ABS & 0x2) {
2444 src1.absModifier();
2445 }
2446
2447 if (instData.ABS & 0x4) {
2448 vdst.absModifier();
2449 }
2450
2451 if (extData.NEG & 0x1) {
2452 src0.negModifier();
2453 }
2454
2455 if (extData.NEG & 0x2) {
2456 src1.negModifier();
2457 }
2458
2459 if (extData.NEG & 0x4) {
2460 vdst.negModifier();
2461 }
2462
2463 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2464 if (wf->execMask(lane)) {
2465 float out = std::fma(src0[lane], src1[lane], vdst[lane]);
2466 out = omodModifier(out, extData.OMOD);
2467 if (instData.CLAMP) {
2468 out = std::clamp(vdst[lane], 0.0f, 1.0f);
2469 }
2470 vdst[lane] = out;
2471 }
2472 }
2473
2474 vdst.write();
2475 } // execute
2476 // --- Inst_VOP3__V_NOP class methods ---
2477
2479 : Inst_VOP3A(iFmt, "v_nop", false)
2480 {
2481 setFlag(Nop);
2482 setFlag(ALU);
2483 } // Inst_VOP3__V_NOP
2484
2486 {
2487 } // ~Inst_VOP3__V_NOP
2488
2489 // --- description from .arch file ---
2490 // Do nothing.
2491 void
2493 {
2494 } // execute
2495 // --- Inst_VOP3__V_MOV_B32 class methods ---
2496
2498 : Inst_VOP3A(iFmt, "v_mov_b32", false)
2499 {
2500 setFlag(ALU);
2501 } // Inst_VOP3__V_MOV_B32
2502
2504 {
2505 } // ~Inst_VOP3__V_MOV_B32
2506
2507 // --- description from .arch file ---
2508 // D.u = S0.u.
2509 // Input and output modifiers not supported; this is an untyped operation.
2510 void
2512 {
2513 Wavefront *wf = gpuDynInst->wavefront();
2514 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
2515 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2516
2517 src.readSrc();
2518
2519 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2520 if (wf->execMask(lane)) {
2521 vdst[lane] = src[lane];
2522 }
2523 }
2524
2525 vdst.write();
2526 } // execute
2527 // --- Inst_VOP3__V_CVT_I32_F64 class methods ---
2528
2530 : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false)
2531 {
2532 setFlag(ALU);
2533 setFlag(F64);
2534 } // Inst_VOP3__V_CVT_I32_F64
2535
2537 {
2538 } // ~Inst_VOP3__V_CVT_I32_F64
2539
2540 // --- description from .arch file ---
2541 // D.i = (int)S0.d.
2542 // Out-of-range floating point values (including infinity) saturate. NaN is
2543 // --- converted to 0.
2544 void
2546 {
2547 Wavefront *wf = gpuDynInst->wavefront();
2548 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
2549 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2550
2551 src.readSrc();
2552
2553 if (instData.ABS & 0x1) {
2554 src.absModifier();
2555 }
2556
2557 if (extData.NEG & 0x1) {
2558 src.negModifier();
2559 }
2560
2561 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2562 if (wf->execMask(lane)) {
2563 int exp;
2564 std::frexp(src[lane],&exp);
2565 if (std::isnan(src[lane])) {
2566 vdst[lane] = 0;
2567 } else if (std::isinf(src[lane]) || exp > 30) {
2568 if (std::signbit(src[lane])) {
2569 vdst[lane] = INT_MIN;
2570 } else {
2571 vdst[lane] = INT_MAX;
2572 }
2573 } else {
2574 vdst[lane] = (VecElemI32)src[lane];
2575 }
2576 }
2577 }
2578
2579 vdst.write();
2580 } // execute
2581 // --- Inst_VOP3__V_CVT_F64_I32 class methods ---
2582
2584 : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false)
2585 {
2586 setFlag(ALU);
2587 setFlag(F64);
2588 } // Inst_VOP3__V_CVT_F64_I32
2589
2591 {
2592 } // ~Inst_VOP3__V_CVT_F64_I32
2593
2594 // --- description from .arch file ---
2595 // D.d = (double)S0.i.
2596 void
2598 {
2599 Wavefront *wf = gpuDynInst->wavefront();
2600 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
2601 VecOperandF64 vdst(gpuDynInst, instData.VDST);
2602
2603 src.readSrc();
2604
2605 if (instData.ABS & 0x1) {
2606 src.absModifier();
2607 }
2608
2609 if (extData.NEG & 0x1) {
2610 src.negModifier();
2611 }
2612
2613 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2614 if (wf->execMask(lane)) {
2615 vdst[lane] = (VecElemF64)src[lane];
2616 }
2617 }
2618
2619 vdst.write();
2620 } // execute
2621 // --- Inst_VOP3__V_CVT_F32_I32 class methods ---
2622
2624 : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false)
2625 {
2626 setFlag(ALU);
2627 setFlag(F32);
2628 } // Inst_VOP3__V_CVT_F32_I32
2629
2631 {
2632 } // ~Inst_VOP3__V_CVT_F32_I32
2633
2634 // --- description from .arch file ---
2635 // D.f = (float)S0.i.
2636 void
2638 {
2639 Wavefront *wf = gpuDynInst->wavefront();
2640 VecOperandI32 src(gpuDynInst, extData.SRC0);
2641 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2642
2643 src.readSrc();
2644
2648 assert(!(instData.ABS & 0x1));
2649 assert(!(instData.ABS & 0x2));
2650 assert(!(instData.ABS & 0x4));
2651 assert(!(extData.NEG & 0x1));
2652 assert(!(extData.NEG & 0x2));
2653 assert(!(extData.NEG & 0x4));
2654
2655 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2656 if (wf->execMask(lane)) {
2657 vdst[lane] = (VecElemF32)src[lane];
2658 }
2659 }
2660
2661 vdst.write();
2662 } // execute
2663 // --- Inst_VOP3__V_CVT_F32_U32 class methods ---
2664
2666 : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false)
2667 {
2668 setFlag(ALU);
2669 setFlag(F32);
2670 } // Inst_VOP3__V_CVT_F32_U32
2671
2673 {
2674 } // ~Inst_VOP3__V_CVT_F32_U32
2675
2676 // --- description from .arch file ---
2677 // D.f = (float)S0.u.
2678 void
2680 {
2681 Wavefront *wf = gpuDynInst->wavefront();
2682 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
2683 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2684
2685 src.readSrc();
2686
2687 if (instData.ABS & 0x1) {
2688 src.absModifier();
2689 }
2690
2691 if (extData.NEG & 0x1) {
2692 src.negModifier();
2693 }
2694
2695 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2696 if (wf->execMask(lane)) {
2697 vdst[lane] = (VecElemF32)src[lane];
2698 }
2699 }
2700
2701 vdst.write();
2702 } // execute
2703 // --- Inst_VOP3__V_CVT_U32_F32 class methods ---
2704
2706 : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false)
2707 {
2708 setFlag(ALU);
2709 setFlag(F32);
2710 } // Inst_VOP3__V_CVT_U32_F32
2711
2713 {
2714 } // ~Inst_VOP3__V_CVT_U32_F32
2715
2716 // --- description from .arch file ---
2717 // D.u = (unsigned)S0.f.
2718 // Out-of-range floating point values (including infinity) saturate. NaN is
2719 // --- converted to 0.
2720 void
2722 {
2723 Wavefront *wf = gpuDynInst->wavefront();
2724 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2725 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2726
2727 src.readSrc();
2728
2729 if (instData.ABS & 0x1) {
2730 src.absModifier();
2731 }
2732
2733 if (extData.NEG & 0x1) {
2734 src.negModifier();
2735 }
2736
2737 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2738 if (wf->execMask(lane)) {
2739 int exp;
2740 std::frexp(src[lane],&exp);
2741 if (std::isnan(src[lane])) {
2742 vdst[lane] = 0;
2743 } else if (std::isinf(src[lane])) {
2744 if (std::signbit(src[lane])) {
2745 vdst[lane] = 0;
2746 } else {
2747 vdst[lane] = UINT_MAX;
2748 }
2749 } else if (exp > 31) {
2750 vdst[lane] = UINT_MAX;
2751 } else {
2752 vdst[lane] = (VecElemU32)src[lane];
2753 }
2754 }
2755 }
2756
2757 vdst.write();
2758 } // execute
2759 // --- Inst_VOP3__V_CVT_I32_F32 class methods ---
2760
2762 : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false)
2763 {
2764 setFlag(ALU);
2765 setFlag(F32);
2766 } // Inst_VOP3__V_CVT_I32_F32
2767
2769 {
2770 } // ~Inst_VOP3__V_CVT_I32_F32
2771
2772 // --- description from .arch file ---
2773 // D.i = (int)S0.f.
2774 // Out-of-range floating point values (including infinity) saturate. NaN is
2775 // --- converted to 0.
2776 void
2778 {
2779 Wavefront *wf = gpuDynInst->wavefront();
2780 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2781 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2782
2783 src.readSrc();
2784
2785 if (instData.ABS & 0x1) {
2786 src.absModifier();
2787 }
2788
2789 if (extData.NEG & 0x1) {
2790 src.negModifier();
2791 }
2792
2796 assert(!(instData.ABS & 0x2));
2797 assert(!(instData.ABS & 0x4));
2798 assert(!(extData.NEG & 0x2));
2799 assert(!(extData.NEG & 0x4));
2800
2801 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2802 if (wf->execMask(lane)) {
2803 int exp;
2804 std::frexp(src[lane],&exp);
2805 if (std::isnan(src[lane])) {
2806 vdst[lane] = 0;
2807 } else if (std::isinf(src[lane]) || exp > 30) {
2808 if (std::signbit(src[lane])) {
2809 vdst[lane] = INT_MIN;
2810 } else {
2811 vdst[lane] = INT_MAX;
2812 }
2813 } else {
2814 vdst[lane] = (VecElemI32)src[lane];
2815 }
2816 }
2817 }
2818
2819 vdst.write();
2820 } // execute
2821 // --- Inst_VOP3__V_MOV_FED_B32 class methods ---
2822
2824 : Inst_VOP3A(iFmt, "v_mov_fed_b32", false)
2825 {
2826 setFlag(ALU);
2827 } // Inst_VOP3__V_MOV_FED_B32
2828
2830 {
2831 } // ~Inst_VOP3__V_MOV_FED_B32
2832
2833 // --- description from .arch file ---
2834 // D.u = S0.u;
2835 // Introduce EDC double error upon write to dest vgpr without causing an
2836 // --- exception.
2837 // Input and output modifiers not supported; this is an untyped operation.
2838 void
2840 {
2842 } // execute
2843 // --- Inst_VOP3__V_CVT_F16_F32 class methods ---
2844
2846 : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false)
2847 {
2848 setFlag(ALU);
2849 setFlag(F32);
2850 } // Inst_VOP3__V_CVT_F16_F32
2851
2853 {
2854 } // ~Inst_VOP3__V_CVT_F16_F32
2855
2856 // --- description from .arch file ---
2857 // D.f16 = flt32_to_flt16(S0.f).
2858 // Supports input modifiers and creates FP16 denormals when appropriate.
2859 void
2861 {
2862 Wavefront *wf = gpuDynInst->wavefront();
2863 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2864 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2865
2866 src0.readSrc();
2867 vdst.read();
2868
2869 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2870 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2871
2872 unsigned abs = instData.ABS;
2873 unsigned neg = extData.NEG;
2874 int opsel = instData.OPSEL;
2875
2876 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2877 if (wf->execMask(lane)) {
2878 float tmp = src0[lane];
2879
2880 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2881 if (neg & 1) tmp = -tmp;
2882
2883 tmp = omodModifier(tmp, extData.OMOD);
2884 if (instData.CLAMP) {
2885 tmp = std::clamp(tmp, 0.0f, 1.0f);
2886 }
2887
2888 AMDGPU::mxfloat16 out(tmp);
2889
2890 // If opsel[3] use upper 16-bits of dest, otherwise lower.
2891 if (opsel & 8) {
2892 replaceBits(vdst[lane], 31, 16, (out.data >> 16));
2893 } else {
2894 replaceBits(vdst[lane], 15, 0, (out.data >> 16));
2895 }
2896 }
2897 }
2898
2899 vdst.write();
2900 } // execute
2901 // --- Inst_VOP3__V_CVT_F32_F16 class methods ---
2902
2904 : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false)
2905 {
2906 setFlag(ALU);
2907 setFlag(F32);
2908 } // Inst_VOP3__V_CVT_F32_F16
2909
2911 {
2912 } // ~Inst_VOP3__V_CVT_F32_F16
2913
2914 // --- description from .arch file ---
2915 // D.f = flt16_to_flt32(S0.f16).
2916 // FP16 denormal inputs are always accepted.
2917 void
2919 {
2920 Wavefront *wf = gpuDynInst->wavefront();
2921 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2922 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2923
2924 src0.readSrc();
2925
2926 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2927 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2928 panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode);
2929
2930 unsigned abs = instData.ABS;
2931 unsigned neg = extData.NEG;
2932
2933 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2934 if (wf->execMask(lane)) {
2935 AMDGPU::mxfloat16 tmp(src0[lane]);
2936
2937 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2938 if (neg & 1) tmp = -tmp;
2939
2940 float out = omodModifier(float(tmp), extData.OMOD);
2941 if (instData.CLAMP) {
2942 out = std::clamp(out, 0.0f, 1.0f);
2943 }
2944
2945 vdst[lane] = out;
2946 }
2947 }
2948
2949 vdst.write();
2950 } // execute
2951 // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods ---
2952
2954 InFmt_VOP3A *iFmt)
2955 : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false)
2956 {
2957 setFlag(ALU);
2958 setFlag(F32);
2959 } // Inst_VOP3__V_CVT_RPI_I32_F32
2960
2962 {
2963 } // ~Inst_VOP3__V_CVT_RPI_I32_F32
2964
2965 // --- description from .arch file ---
2966 // D.i = (int)floor(S0.f + 0.5).
2967 void
2969 {
2970 Wavefront *wf = gpuDynInst->wavefront();
2971 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2972 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2973
2974 src.readSrc();
2975
2976 if (instData.ABS & 0x1) {
2977 src.absModifier();
2978 }
2979
2980 if (extData.NEG & 0x1) {
2981 src.negModifier();
2982 }
2983
2984 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2985 if (wf->execMask(lane)) {
2986 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
2987 }
2988 }
2989
2990 vdst.write();
2991 } // execute
2992 // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods ---
2993
2995 InFmt_VOP3A *iFmt)
2996 : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false)
2997 {
2998 setFlag(ALU);
2999 setFlag(F32);
3000 } // Inst_VOP3__V_CVT_FLR_I32_F32
3001
3003 {
3004 } // ~Inst_VOP3__V_CVT_FLR_I32_F32
3005
3006 // --- description from .arch file ---
3007 // D.i = (int)floor(S0.f).
3008 void
3010 {
3011 Wavefront *wf = gpuDynInst->wavefront();
3012 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3013 VecOperandI32 vdst(gpuDynInst, instData.VDST);
3014
3015 src.readSrc();
3016
3017 if (instData.ABS & 0x1) {
3018 src.absModifier();
3019 }
3020
3021 if (extData.NEG & 0x1) {
3022 src.negModifier();
3023 }
3024
3025 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3026 if (wf->execMask(lane)) {
3027 vdst[lane] = (VecElemI32)std::floor(src[lane]);
3028 }
3029 }
3030
3031 vdst.write();
3032 } // execute
3033 // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods ---
3034
3036 : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false)
3037 {
3038 setFlag(ALU);
3039 setFlag(F32);
3040 } // Inst_VOP3__V_CVT_OFF_F32_I4
3041
3043 {
3044 } // ~Inst_VOP3__V_CVT_OFF_F32_I4
3045
3046 // --- description from .arch file ---
3047 // 4-bit signed int to 32-bit float. Used for interpolation in shader.
3048 void
3050 {
3051 // Could not parse sq_uc.arch desc field
3053 } // execute
3054 // --- Inst_VOP3__V_CVT_F32_F64 class methods ---
3055
3057 : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false)
3058 {
3059 setFlag(ALU);
3060 setFlag(F64);
3061 } // Inst_VOP3__V_CVT_F32_F64
3062
3064 {
3065 } // ~Inst_VOP3__V_CVT_F32_F64
3066
3067 // --- description from .arch file ---
3068 // D.f = (float)S0.d.
3069 void
3071 {
3072 Wavefront *wf = gpuDynInst->wavefront();
3073 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3074 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3075
3076 src.readSrc();
3077
3078 if (instData.ABS & 0x1) {
3079 src.absModifier();
3080 }
3081
3082 if (extData.NEG & 0x1) {
3083 src.negModifier();
3084 }
3085
3089 assert(!(instData.ABS & 0x2));
3090 assert(!(instData.ABS & 0x4));
3091 assert(!(extData.NEG & 0x2));
3092 assert(!(extData.NEG & 0x4));
3093
3094 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3095 if (wf->execMask(lane)) {
3096 vdst[lane] = (VecElemF32)src[lane];
3097 }
3098 }
3099
3100 vdst.write();
3101 } // execute
3102 // --- Inst_VOP3__V_CVT_F64_F32 class methods ---
3103
3105 : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false)
3106 {
3107 setFlag(ALU);
3108 setFlag(F64);
3109 } // Inst_VOP3__V_CVT_F64_F32
3110
3112 {
3113 } // ~Inst_VOP3__V_CVT_F64_F32
3114
3115 // --- description from .arch file ---
3116 // D.d = (double)S0.f.
3117 void
3119 {
3120 Wavefront *wf = gpuDynInst->wavefront();
3121 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3122 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3123
3124 src.readSrc();
3125
3126 if (instData.ABS & 0x1) {
3127 src.absModifier();
3128 }
3129
3130 if (extData.NEG & 0x1) {
3131 src.negModifier();
3132 }
3133
3137 assert(!(instData.ABS & 0x2));
3138 assert(!(instData.ABS & 0x4));
3139 assert(!(extData.NEG & 0x2));
3140 assert(!(extData.NEG & 0x4));
3141
3142 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3143 if (wf->execMask(lane)) {
3144 vdst[lane] = (VecElemF64)src[lane];
3145 }
3146 }
3147
3148 vdst.write();
3149 } // execute
3150 // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods ---
3151
3153 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false)
3154 {
3155 setFlag(ALU);
3156 setFlag(F32);
3157 } // Inst_VOP3__V_CVT_F32_UBYTE0
3158
3160 {
3161 } // ~Inst_VOP3__V_CVT_F32_UBYTE0
3162
3163 // --- description from .arch file ---
3164 // D.f = (float)(S0.u[7:0]).
3165 void
3167 {
3168 Wavefront *wf = gpuDynInst->wavefront();
3169 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3170 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3171
3172 src.readSrc();
3173
3174 if (instData.ABS & 0x1) {
3175 src.absModifier();
3176 }
3177
3178 if (extData.NEG & 0x1) {
3179 src.negModifier();
3180 }
3181
3182 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3183 if (wf->execMask(lane)) {
3184 vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
3185 }
3186 }
3187
3188 vdst.write();
3189 } // execute
3190 // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods ---
3191
3193 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false)
3194 {
3195 setFlag(ALU);
3196 setFlag(F32);
3197 } // Inst_VOP3__V_CVT_F32_UBYTE1
3198
3200 {
3201 } // ~Inst_VOP3__V_CVT_F32_UBYTE1
3202
3203 // --- description from .arch file ---
3204 // D.f = (float)(S0.u[15:8]).
3205 void
3207 {
3208 Wavefront *wf = gpuDynInst->wavefront();
3209 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3210 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3211
3212 src.readSrc();
3213
3214 if (instData.ABS & 0x1) {
3215 src.absModifier();
3216 }
3217
3218 if (extData.NEG & 0x1) {
3219 src.negModifier();
3220 }
3221
3222 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3223 if (wf->execMask(lane)) {
3224 vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
3225 }
3226 }
3227
3228 vdst.write();
3229 } // execute
3230 // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods ---
3231
3233 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false)
3234 {
3235 setFlag(ALU);
3236 setFlag(F32);
3237 } // Inst_VOP3__V_CVT_F32_UBYTE2
3238
3240 {
3241 } // ~Inst_VOP3__V_CVT_F32_UBYTE2
3242
3243 // --- description from .arch file ---
3244 // D.f = (float)(S0.u[23:16]).
3245 void
3247 {
3248 Wavefront *wf = gpuDynInst->wavefront();
3249 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3250 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3251
3252 src.readSrc();
3253
3254 if (instData.ABS & 0x1) {
3255 src.absModifier();
3256 }
3257
3258 if (extData.NEG & 0x1) {
3259 src.negModifier();
3260 }
3261
3262 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3263 if (wf->execMask(lane)) {
3264 vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
3265 }
3266 }
3267
3268 vdst.write();
3269 } // execute
3270 // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods ---
3271
3273 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false)
3274 {
3275 setFlag(ALU);
3276 setFlag(F32);
3277 } // Inst_VOP3__V_CVT_F32_UBYTE3
3278
3280 {
3281 } // ~Inst_VOP3__V_CVT_F32_UBYTE3
3282
3283 // --- description from .arch file ---
3284 // D.f = (float)(S0.u[31:24]).
3285 void
3287 {
3288 Wavefront *wf = gpuDynInst->wavefront();
3289 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3290 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3291
3292 src.readSrc();
3293
3294 if (instData.ABS & 0x1) {
3295 src.absModifier();
3296 }
3297
3298 if (extData.NEG & 0x1) {
3299 src.negModifier();
3300 }
3301
3302 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3303 if (wf->execMask(lane)) {
3304 vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
3305 }
3306 }
3307
3308 vdst.write();
3309 } // execute
3310 // --- Inst_VOP3__V_CVT_U32_F64 class methods ---
3311
3313 : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false)
3314 {
3315 setFlag(ALU);
3316 setFlag(F64);
3317 } // Inst_VOP3__V_CVT_U32_F64
3318
3320 {
3321 } // ~Inst_VOP3__V_CVT_U32_F64
3322
3323 // --- description from .arch file ---
3324 // D.u = (unsigned)S0.d.
3325 // Out-of-range floating point values (including infinity) saturate. NaN is
3326 // --- converted to 0.
3327 void
3329 {
3330 Wavefront *wf = gpuDynInst->wavefront();
3331 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3332 VecOperandU32 vdst(gpuDynInst, instData.VDST);
3333
3334 src.readSrc();
3335
3336 if (instData.ABS & 0x1) {
3337 src.absModifier();
3338 }
3339
3340 if (extData.NEG & 0x1) {
3341 src.negModifier();
3342 }
3343
3344 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3345 if (wf->execMask(lane)) {
3346 int exp;
3347 std::frexp(src[lane],&exp);
3348 if (std::isnan(src[lane])) {
3349 vdst[lane] = 0;
3350 } else if (std::isinf(src[lane])) {
3351 if (std::signbit(src[lane])) {
3352 vdst[lane] = 0;
3353 } else {
3354 vdst[lane] = UINT_MAX;
3355 }
3356 } else if (exp > 31) {
3357 vdst[lane] = UINT_MAX;
3358 } else {
3359 vdst[lane] = (VecElemU32)src[lane];
3360 }
3361 }
3362 }
3363
3364 vdst.write();
3365 } // execute
3366 // --- Inst_VOP3__V_CVT_F64_U32 class methods ---
3367
3369 : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false)
3370 {
3371 setFlag(ALU);
3372 setFlag(F64);
3373 } // Inst_VOP3__V_CVT_F64_U32
3374
3376 {
3377 } // ~Inst_VOP3__V_CVT_F64_U32
3378
3379 // --- description from .arch file ---
3380 // D.d = (double)S0.u.
3381 void
3383 {
3384 Wavefront *wf = gpuDynInst->wavefront();
3385 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3386 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3387
3388 src.readSrc();
3389
3390 if (instData.ABS & 0x1) {
3391 src.absModifier();
3392 }
3393
3394 if (extData.NEG & 0x1) {
3395 src.negModifier();
3396 }
3397
3398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3399 if (wf->execMask(lane)) {
3400 vdst[lane] = (VecElemF64)src[lane];
3401 }
3402 }
3403
3404 vdst.write();
3405 } // execute
3406 // --- Inst_VOP3__V_TRUNC_F64 class methods ---
3407
3409 : Inst_VOP3A(iFmt, "v_trunc_f64", false)
3410 {
3411 setFlag(ALU);
3412 setFlag(F64);
3413 } // Inst_VOP3__V_TRUNC_F64
3414
3416 {
3417 } // ~Inst_VOP3__V_TRUNC_F64
3418
3419 // --- description from .arch file ---
3420 // D.d = trunc(S0.d), return integer part of S0.d.
3421 void
3423 {
3424 Wavefront *wf = gpuDynInst->wavefront();
3425 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3426 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3427
3428 src.readSrc();
3429
3430 if (instData.ABS & 0x1) {
3431 src.absModifier();
3432 }
3433
3434 if (extData.NEG & 0x1) {
3435 src.negModifier();
3436 }
3437
3438 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3439 if (wf->execMask(lane)) {
3440 vdst[lane] = std::trunc(src[lane]);
3441 }
3442 }
3443
3444 vdst.write();
3445 } // execute
3446 // --- Inst_VOP3__V_CEIL_F64 class methods ---
3447
3449 : Inst_VOP3A(iFmt, "v_ceil_f64", false)
3450 {
3451 setFlag(ALU);
3452 setFlag(F64);
3453 } // Inst_VOP3__V_CEIL_F64
3454
3456 {
3457 } // ~Inst_VOP3__V_CEIL_F64
3458
3459 // --- description from .arch file ---
3460 // D.d = trunc(S0.d);
3461 // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.
3462 void
3464 {
3465 Wavefront *wf = gpuDynInst->wavefront();
3466 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3467 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3468
3469 src.readSrc();
3470
3471 if (instData.ABS & 0x1) {
3472 src.absModifier();
3473 }
3474
3475 if (extData.NEG & 0x1) {
3476 src.negModifier();
3477 }
3478
3479 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3480 if (wf->execMask(lane)) {
3481 vdst[lane] = std::ceil(src[lane]);
3482 }
3483 }
3484
3485 vdst.write();
3486 } // execute
3487 // --- Inst_VOP3__V_RNDNE_F64 class methods ---
3488
3490 : Inst_VOP3A(iFmt, "v_rndne_f64", false)
3491 {
3492 setFlag(ALU);
3493 setFlag(F64);
3494 } // Inst_VOP3__V_RNDNE_F64
3495
3497 {
3498 } // ~Inst_VOP3__V_RNDNE_F64
3499
3500 // --- description from .arch file ---
3501 // D.d = round_nearest_even(S0.d).
3502 void
3504 {
3505 Wavefront *wf = gpuDynInst->wavefront();
3506 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3507 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3508
3509 src.readSrc();
3510
3511 if (instData.ABS & 0x1) {
3512 src.absModifier();
3513 }
3514
3515 if (extData.NEG & 0x1) {
3516 src.negModifier();
3517 }
3518
3519 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3520 if (wf->execMask(lane)) {
3521 vdst[lane] = roundNearestEven(src[lane]);
3522 }
3523 }
3524
3525 vdst.write();
3526 } // execute
3527 // --- Inst_VOP3__V_FLOOR_F64 class methods ---
3528
3530 : Inst_VOP3A(iFmt, "v_floor_f64", false)
3531 {
3532 setFlag(ALU);
3533 setFlag(F64);
3534 } // Inst_VOP3__V_FLOOR_F64
3535
3537 {
3538 } // ~Inst_VOP3__V_FLOOR_F64
3539
3540 // --- description from .arch file ---
3541 // D.d = trunc(S0.d);
3542 // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.
3543 void
3545 {
3546 Wavefront *wf = gpuDynInst->wavefront();
3547 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3548 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3549
3550 src.readSrc();
3551
3552 if (instData.ABS & 0x1) {
3553 src.absModifier();
3554 }
3555
3556 if (extData.NEG & 0x1) {
3557 src.negModifier();
3558 }
3559
3560 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3561 if (wf->execMask(lane)) {
3562 vdst[lane] = std::floor(src[lane]);
3563 }
3564 }
3565
3566 vdst.write();
3567 } // execute
3568 // --- Inst_VOP3__V_FRACT_F32 class methods ---
3569
3571 : Inst_VOP3A(iFmt, "v_fract_f32", false)
3572 {
3573 setFlag(ALU);
3574 setFlag(F32);
3575 } // Inst_VOP3__V_FRACT_F32
3576
3578 {
3579 } // ~Inst_VOP3__V_FRACT_F32
3580
3581 // --- description from .arch file ---
3582 // D.f = S0.f - floor(S0.f).
3583 void
3585 {
3586 Wavefront *wf = gpuDynInst->wavefront();
3587 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3588 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3589
3590 src.readSrc();
3591
3592 if (instData.ABS & 0x1) {
3593 src.absModifier();
3594 }
3595
3596 if (extData.NEG & 0x1) {
3597 src.negModifier();
3598 }
3599
3600 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3601 if (wf->execMask(lane)) {
3602 VecElemF32 int_part(0.0);
3603 vdst[lane] = std::modf(src[lane], &int_part);
3604 }
3605 }
3606
3607 vdst.write();
3608 } // execute
3609 // --- Inst_VOP3__V_TRUNC_F32 class methods ---
3610
3612 : Inst_VOP3A(iFmt, "v_trunc_f32", false)
3613 {
3614 setFlag(ALU);
3615 setFlag(F32);
3616 } // Inst_VOP3__V_TRUNC_F32
3617
3619 {
3620 } // ~Inst_VOP3__V_TRUNC_F32
3621
3622 // --- description from .arch file ---
3623 // D.f = trunc(S0.f), return integer part of S0.f.
3624 void
3626 {
3627 Wavefront *wf = gpuDynInst->wavefront();
3628 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3629 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3630
3631 src.readSrc();
3632
3633 if (instData.ABS & 0x1) {
3634 src.absModifier();
3635 }
3636
3637 if (extData.NEG & 0x1) {
3638 src.negModifier();
3639 }
3640
3641 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3642 if (wf->execMask(lane)) {
3643 vdst[lane] = std::trunc(src[lane]);
3644 }
3645 }
3646
3647 vdst.write();
3648 } // execute
3649 // --- Inst_VOP3__V_CEIL_F32 class methods ---
3650
3652 : Inst_VOP3A(iFmt, "v_ceil_f32", false)
3653 {
3654 setFlag(ALU);
3655 setFlag(F32);
3656 } // Inst_VOP3__V_CEIL_F32
3657
3659 {
3660 } // ~Inst_VOP3__V_CEIL_F32
3661
3662 // --- description from .arch file ---
3663 // D.f = trunc(S0.f);
3664 // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.
3665 void
3667 {
3668 Wavefront *wf = gpuDynInst->wavefront();
3669 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3670 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3671
3672 src.readSrc();
3673
3674 if (instData.ABS & 0x1) {
3675 src.absModifier();
3676 }
3677
3678 if (extData.NEG & 0x1) {
3679 src.negModifier();
3680 }
3681
3682 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3683 if (wf->execMask(lane)) {
3684 vdst[lane] = std::ceil(src[lane]);
3685 }
3686 }
3687
3688 vdst.write();
3689 } // execute
3690 // --- Inst_VOP3__V_RNDNE_F32 class methods ---
3691
3693 : Inst_VOP3A(iFmt, "v_rndne_f32", false)
3694 {
3695 setFlag(ALU);
3696 setFlag(F32);
3697 } // Inst_VOP3__V_RNDNE_F32
3698
3700 {
3701 } // ~Inst_VOP3__V_RNDNE_F32
3702
3703 // --- description from .arch file ---
3704 // D.f = round_nearest_even(S0.f).
3705 void
3707 {
3708 Wavefront *wf = gpuDynInst->wavefront();
3709 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3710 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3711
3712 src.readSrc();
3713
3714 if (instData.ABS & 0x1) {
3715 src.absModifier();
3716 }
3717
3718 if (extData.NEG & 0x1) {
3719 src.negModifier();
3720 }
3721
3722 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3723 if (wf->execMask(lane)) {
3724 vdst[lane] = roundNearestEven(src[lane]);
3725 }
3726 }
3727
3728 vdst.write();
3729 } // execute
3730 // --- Inst_VOP3__V_FLOOR_F32 class methods ---
3731
3733 : Inst_VOP3A(iFmt, "v_floor_f32", false)
3734 {
3735 setFlag(ALU);
3736 setFlag(F32);
3737 } // Inst_VOP3__V_FLOOR_F32
3738
3740 {
3741 } // ~Inst_VOP3__V_FLOOR_F32
3742
3743 // --- description from .arch file ---
3744 // D.f = trunc(S0.f);
3745 // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.
3746 void
3748 {
3749 Wavefront *wf = gpuDynInst->wavefront();
3750 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3751 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3752
3753 src.readSrc();
3754
3755 if (instData.ABS & 0x1) {
3756 src.absModifier();
3757 }
3758
3759 if (extData.NEG & 0x1) {
3760 src.negModifier();
3761 }
3762
3763 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3764 if (wf->execMask(lane)) {
3765 vdst[lane] = std::floor(src[lane]);
3766 }
3767 }
3768
3769 vdst.write();
3770 } // execute
3771 // --- Inst_VOP3__V_EXP_F32 class methods ---
3772
3774 : Inst_VOP3A(iFmt, "v_exp_f32", false)
3775 {
3776 setFlag(ALU);
3777 setFlag(F32);
3778 } // Inst_VOP3__V_EXP_F32
3779
3781 {
3782 } // ~Inst_VOP3__V_EXP_F32
3783
3784 // --- description from .arch file ---
3785 // D.f = pow(2.0, S0.f).
3786 void
3788 {
3789 Wavefront *wf = gpuDynInst->wavefront();
3790 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3791 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3792
3793 src.readSrc();
3794
3795 if (instData.ABS & 0x1) {
3796 src.absModifier();
3797 }
3798
3799 if (extData.NEG & 0x1) {
3800 src.negModifier();
3801 }
3802
3803 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3804 if (wf->execMask(lane)) {
3805 vdst[lane] = std::pow(2.0, src[lane]);
3806 }
3807 }
3808
3809 vdst.write();
3810 } // execute
3811 // --- Inst_VOP3__V_LOG_F32 class methods ---
3812
3814 : Inst_VOP3A(iFmt, "v_log_f32", false)
3815 {
3816 setFlag(ALU);
3817 setFlag(F32);
3818 } // Inst_VOP3__V_LOG_F32
3819
3821 {
3822 } // ~Inst_VOP3__V_LOG_F32
3823
3824 // --- description from .arch file ---
3825 // D.f = log2(S0.f). Base 2 logarithm.
3826 void
3828 {
3829 Wavefront *wf = gpuDynInst->wavefront();
3830 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3831 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3832
3833 src.readSrc();
3834
3835 if (instData.ABS & 0x1) {
3836 src.absModifier();
3837 }
3838
3839 if (extData.NEG & 0x1) {
3840 src.negModifier();
3841 }
3842
3846 assert(!(instData.ABS & 0x2));
3847 assert(!(instData.ABS & 0x4));
3848 assert(!(extData.NEG & 0x2));
3849 assert(!(extData.NEG & 0x4));
3850
3851 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3852 if (wf->execMask(lane)) {
3853 vdst[lane] = std::log2(src[lane]);
3854 }
3855 }
3856
3857 vdst.write();
3858 } // execute
3859 // --- Inst_VOP3__V_RCP_F32 class methods ---
3860
3862 : Inst_VOP3A(iFmt, "v_rcp_f32", false)
3863 {
3864 setFlag(ALU);
3865 setFlag(F32);
3866 } // Inst_VOP3__V_RCP_F32
3867
3869 {
3870 } // ~Inst_VOP3__V_RCP_F32
3871
3872 // --- description from .arch file ---
3873 // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.
3874 void
3876 {
3877 Wavefront *wf = gpuDynInst->wavefront();
3878 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3879 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3880
3881 src.readSrc();
3882
3883 if (instData.ABS & 0x1) {
3884 src.absModifier();
3885 }
3886
3887 if (extData.NEG & 0x1) {
3888 src.negModifier();
3889 }
3890
3891 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3892 if (wf->execMask(lane)) {
3893 vdst[lane] = 1.0 / src[lane];
3894 }
3895 }
3896
3897 vdst.write();
3898 } // execute
3899 // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods ---
3900
3902 : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false)
3903 {
3904 setFlag(ALU);
3905 setFlag(F32);
3906 } // Inst_VOP3__V_RCP_IFLAG_F32
3907
3909 {
3910 } // ~Inst_VOP3__V_RCP_IFLAG_F32
3911
3912 // --- description from .arch file ---
3913 // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise
3914 // --- integer DIV_BY_ZERO exception but cannot raise floating-point
3915 // --- exceptions.
3916 void
3918 {
3919 Wavefront *wf = gpuDynInst->wavefront();
3920 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3921 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3922
3923 src.readSrc();
3924
3925 if (instData.ABS & 0x1) {
3926 src.absModifier();
3927 }
3928
3929 if (extData.NEG & 0x1) {
3930 src.negModifier();
3931 }
3932
3933 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3934 if (wf->execMask(lane)) {
3935 vdst[lane] = 1.0 / src[lane];
3936 }
3937 }
3938
3939 vdst.write();
3940 } // execute
3941 // --- Inst_VOP3__V_RSQ_F32 class methods ---
3942
3944 : Inst_VOP3A(iFmt, "v_rsq_f32", false)
3945 {
3946 setFlag(ALU);
3947 setFlag(F32);
3948 } // Inst_VOP3__V_RSQ_F32
3949
3951 {
3952 } // ~Inst_VOP3__V_RSQ_F32
3953
3954 // --- description from .arch file ---
3955 // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.
3956 void
3958 {
3959 Wavefront *wf = gpuDynInst->wavefront();
3960 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3961 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3962
3963 src.readSrc();
3964
3965 if (instData.ABS & 0x1) {
3966 src.absModifier();
3967 }
3968
3969 if (extData.NEG & 0x1) {
3970 src.negModifier();
3971 }
3972
3973 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3974 if (wf->execMask(lane)) {
3975 vdst[lane] = 1.0 / std::sqrt(src[lane]);
3976 }
3977 }
3978
3979 vdst.write();
3980 } // execute
3981 // --- Inst_VOP3__V_RCP_F64 class methods ---
3982
3984 : Inst_VOP3A(iFmt, "v_rcp_f64", false)
3985 {
3986 setFlag(ALU);
3987 setFlag(F64);
3988 } // Inst_VOP3__V_RCP_F64
3989
3991 {
3992 } // ~Inst_VOP3__V_RCP_F64
3993
3994 // --- description from .arch file ---
3995 // D.d = 1.0 / S0.d.
3996 void
3998 {
3999 Wavefront *wf = gpuDynInst->wavefront();
4000 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4001 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4002
4003 src.readSrc();
4004
4005 if (instData.ABS & 0x1) {
4006 src.absModifier();
4007 }
4008
4009 if (extData.NEG & 0x1) {
4010 src.negModifier();
4011 }
4012
4013 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4014 if (wf->execMask(lane)) {
4015 if (std::fpclassify(src[lane]) == FP_ZERO) {
4016 vdst[lane] = +INFINITY;
4017 } else if (std::isnan(src[lane])) {
4018 vdst[lane] = NAN;
4019 } else if (std::isinf(src[lane])) {
4020 if (std::signbit(src[lane])) {
4021 vdst[lane] = -0.0;
4022 } else {
4023 vdst[lane] = 0.0;
4024 }
4025 } else {
4026 vdst[lane] = 1.0 / src[lane];
4027 }
4028 }
4029 }
4030
4031 vdst.write();
4032 } // execute
4033 // --- Inst_VOP3__V_RSQ_F64 class methods ---
4034
4036 : Inst_VOP3A(iFmt, "v_rsq_f64", false)
4037 {
4038 setFlag(ALU);
4039 setFlag(F64);
4040 } // Inst_VOP3__V_RSQ_F64
4041
4043 {
4044 } // ~Inst_VOP3__V_RSQ_F64
4045
4046 // --- description from .arch file ---
4047 // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.
4048 void
4050 {
4051 Wavefront *wf = gpuDynInst->wavefront();
4052 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4053 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4054
4055 src.readSrc();
4056
4057 if (instData.ABS & 0x1) {
4058 src.absModifier();
4059 }
4060
4061 if (extData.NEG & 0x1) {
4062 src.negModifier();
4063 }
4064
4065 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4066 if (wf->execMask(lane)) {
4067 if (std::fpclassify(src[lane]) == FP_ZERO) {
4068 vdst[lane] = +INFINITY;
4069 } else if (std::isnan(src[lane])) {
4070 vdst[lane] = NAN;
4071 } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
4072 vdst[lane] = 0.0;
4073 } else if (std::signbit(src[lane])) {
4074 vdst[lane] = NAN;
4075 } else {
4076 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4077 }
4078 }
4079 }
4080
4081 vdst.write();
4082 } // execute
4083 // --- Inst_VOP3__V_SQRT_F32 class methods ---
4084
4086 : Inst_VOP3A(iFmt, "v_sqrt_f32", false)
4087 {
4088 setFlag(ALU);
4089 setFlag(F32);
4090 } // Inst_VOP3__V_SQRT_F32
4091
4093 {
4094 } // ~Inst_VOP3__V_SQRT_F32
4095
4096 // --- description from .arch file ---
4097 // D.f = sqrt(S0.f).
4098 void
4100 {
4101 Wavefront *wf = gpuDynInst->wavefront();
4102 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4103 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4104
4105 src.readSrc();
4106
4107 if (instData.ABS & 0x1) {
4108 src.absModifier();
4109 }
4110
4111 if (extData.NEG & 0x1) {
4112 src.negModifier();
4113 }
4114
4115 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4116 if (wf->execMask(lane)) {
4117 vdst[lane] = std::sqrt(src[lane]);
4118 }
4119 }
4120
4121 vdst.write();
4122 } // execute
4123 // --- Inst_VOP3__V_SQRT_F64 class methods ---
4124
4126 : Inst_VOP3A(iFmt, "v_sqrt_f64", false)
4127 {
4128 setFlag(ALU);
4129 setFlag(F64);
4130 } // Inst_VOP3__V_SQRT_F64
4131
4133 {
4134 } // ~Inst_VOP3__V_SQRT_F64
4135
4136 // --- description from .arch file ---
4137 // D.d = sqrt(S0.d).
4138 void
4140 {
4141 Wavefront *wf = gpuDynInst->wavefront();
4142 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4143 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4144
4145 src.readSrc();
4146
4147 if (instData.ABS & 0x1) {
4148 src.absModifier();
4149 }
4150
4151 if (extData.NEG & 0x1) {
4152 src.negModifier();
4153 }
4154
4155 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4156 if (wf->execMask(lane)) {
4157 vdst[lane] = std::sqrt(src[lane]);
4158 }
4159 }
4160
4161 vdst.write();
4162 } // execute
4163 // --- Inst_VOP3__V_SIN_F32 class methods ---
4164
4166 : Inst_VOP3A(iFmt, "v_sin_f32", false)
4167 {
4168 setFlag(ALU);
4169 setFlag(F32);
4170 } // Inst_VOP3__V_SIN_F32
4171
4173 {
4174 } // ~Inst_VOP3__V_SIN_F32
4175
4176 // --- description from .arch file ---
4177 // D.f = sin(S0.f * 2 * PI).
4178 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
4179 // float 0.0.
4180 void
4182 {
4183 Wavefront *wf = gpuDynInst->wavefront();
4184 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4185 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
4186 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4187
4188 src.readSrc();
4189 pi.read();
4190
4191 if (instData.ABS & 0x1) {
4192 src.absModifier();
4193 }
4194
4195 if (extData.NEG & 0x1) {
4196 src.negModifier();
4197 }
4198
4199 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4200 if (wf->execMask(lane)) {
4201 vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
4202 }
4203 }
4204
4205 vdst.write();
4206 } // execute
4207 // --- Inst_VOP3__V_COS_F32 class methods ---
4208
4210 : Inst_VOP3A(iFmt, "v_cos_f32", false)
4211 {
4212 setFlag(ALU);
4213 setFlag(F32);
4214 } // Inst_VOP3__V_COS_F32
4215
4217 {
4218 } // ~Inst_VOP3__V_COS_F32
4219
4220 // --- description from .arch file ---
4221 // D.f = cos(S0.f * 2 * PI).
4222 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
4223 // float 1.0.
4224 void
4226 {
4227 Wavefront *wf = gpuDynInst->wavefront();
4228 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4229 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
4230 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4231
4232 src.readSrc();
4233 pi.read();
4234
4235 if (instData.ABS & 0x1) {
4236 src.absModifier();
4237 }
4238
4239 if (extData.NEG & 0x1) {
4240 src.negModifier();
4241 }
4242
4243 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4244 if (wf->execMask(lane)) {
4245 vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
4246 }
4247 }
4248
4249 vdst.write();
4250 } // execute
4251 // --- Inst_VOP3__V_NOT_B32 class methods ---
4252
4254 : Inst_VOP3A(iFmt, "v_not_b32", false)
4255 {
4256 setFlag(ALU);
4257 } // Inst_VOP3__V_NOT_B32
4258
4260 {
4261 } // ~Inst_VOP3__V_NOT_B32
4262
4263 // --- description from .arch file ---
4264 // D.u = ~S0.u.
4265 // Input and output modifiers not supported.
4266 void
4268 {
4269 Wavefront *wf = gpuDynInst->wavefront();
4270 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4271 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4272
4273 src.readSrc();
4274
4275 if (instData.ABS & 0x1) {
4276 src.absModifier();
4277 }
4278
4279 if (extData.NEG & 0x1) {
4280 src.negModifier();
4281 }
4282
4283 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4284 if (wf->execMask(lane)) {
4285 vdst[lane] = ~src[lane];
4286 }
4287 }
4288
4289 vdst.write();
4290 } // execute
4291 // --- Inst_VOP3__V_BFREV_B32 class methods ---
4292
4294 : Inst_VOP3A(iFmt, "v_bfrev_b32", false)
4295 {
4296 setFlag(ALU);
4297 } // Inst_VOP3__V_BFREV_B32
4298
4300 {
4301 } // ~Inst_VOP3__V_BFREV_B32
4302
4303 // --- description from .arch file ---
4304 // D.u[31:0] = S0.u[0:31], bitfield reverse.
4305 // Input and output modifiers not supported.
4306 void
4308 {
4309 Wavefront *wf = gpuDynInst->wavefront();
4310 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4311 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4312
4313 src.readSrc();
4314
4315 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4316 if (wf->execMask(lane)) {
4317 vdst[lane] = reverseBits(src[lane]);
4318 }
4319 }
4320
4321 vdst.write();
4322 } // execute
4323 // --- Inst_VOP3__V_FFBH_U32 class methods ---
4324
4326 : Inst_VOP3A(iFmt, "v_ffbh_u32", false)
4327 {
4328 setFlag(ALU);
4329 } // Inst_VOP3__V_FFBH_U32
4330
4332 {
4333 } // ~Inst_VOP3__V_FFBH_U32
4334
4335 // --- description from .arch file ---
4336 // D.u = position of first 1 in S0.u from MSB;
4337 // D.u = 0xffffffff if S0.u == 0.
4338 void
4340 {
4341 Wavefront *wf = gpuDynInst->wavefront();
4342 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4343 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4344
4345 src.readSrc();
4346
4347 if (instData.ABS & 0x1) {
4348 src.absModifier();
4349 }
4350
4351 if (extData.NEG & 0x1) {
4352 src.negModifier();
4353 }
4354
4355 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4356 if (wf->execMask(lane)) {
4357 vdst[lane] = findFirstOneMsb(src[lane]);
4358 }
4359 }
4360
4361 vdst.write();
4362 } // execute
4363 // --- Inst_VOP3__V_FFBL_B32 class methods ---
4364
4366 : Inst_VOP3A(iFmt, "v_ffbl_b32", false)
4367 {
4368 setFlag(ALU);
4369 } // Inst_VOP3__V_FFBL_B32
4370
4372 {
4373 } // ~Inst_VOP3__V_FFBL_B32
4374
4375 // --- description from .arch file ---
4376 // D.u = position of first 1 in S0.u from LSB;
4377 // D.u = 0xffffffff if S0.u == 0.
4378 void
4380 {
4381 Wavefront *wf = gpuDynInst->wavefront();
4382 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4383 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4384
4385 src.readSrc();
4386
4387 if (instData.ABS & 0x1) {
4388 src.absModifier();
4389 }
4390
4391 if (extData.NEG & 0x1) {
4392 src.negModifier();
4393 }
4394
4395 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4396 if (wf->execMask(lane)) {
4397 vdst[lane] = findFirstOne(src[lane]);
4398 }
4399 }
4400
4401 vdst.write();
4402 } // execute
4403 // --- Inst_VOP3__V_FFBH_I32 class methods ---
4404
4406 : Inst_VOP3A(iFmt, "v_ffbh_i32", false)
4407 {
4408 setFlag(ALU);
4409 } // Inst_VOP3__V_FFBH_I32
4410
4412 {
4413 } // ~Inst_VOP3__V_FFBH_I32
4414
4415 // --- description from .arch file ---
4416 // D.u = position of first bit different from sign bit in S0.i from MSB;
4417 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
4418 void
4420 {
4421 Wavefront *wf = gpuDynInst->wavefront();
4422 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
4423 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4424
4425 src.readSrc();
4426
4427 if (instData.ABS & 0x1) {
4428 src.absModifier();
4429 }
4430
4431 if (extData.NEG & 0x1) {
4432 src.negModifier();
4433 }
4434
4435 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4436 if (wf->execMask(lane)) {
4437 vdst[lane] = firstOppositeSignBit(src[lane]);
4438 }
4439 }
4440
4441 vdst.write();
4442 } // execute
4443 // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods ---
4444
4446 InFmt_VOP3A *iFmt)
4447 : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false)
4448 {
4449 setFlag(ALU);
4450 setFlag(F64);
4451 } // Inst_VOP3__V_FREXP_EXP_I32_F64
4452
4454 {
4455 } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
4456
4457 // --- description from .arch file ---
4458 // See V_FREXP_EXP_I32_F32.
4459 void
4461 {
4462 Wavefront *wf = gpuDynInst->wavefront();
4463 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4464 VecOperandI32 vdst(gpuDynInst, instData.VDST);
4465
4466 src.readSrc();
4467
4468 if (instData.ABS & 0x1) {
4469 src.absModifier();
4470 }
4471
4472 if (extData.NEG & 0x1) {
4473 src.negModifier();
4474 }
4475
4476 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4477 if (wf->execMask(lane)) {
4478 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
4479 vdst[lane] = 0;
4480 } else {
4481 VecElemI32 exp(0);
4482 std::frexp(src[lane], &exp);
4483 vdst[lane] = exp;
4484 }
4485 }
4486 }
4487
4488 vdst.write();
4489 } // execute
4490 // --- Inst_VOP3__V_FREXP_MANT_F64 class methods ---
4491
4493 : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false)
4494 {
4495 setFlag(ALU);
4496 setFlag(F64);
4497 } // Inst_VOP3__V_FREXP_MANT_F64
4498
4500 {
4501 } // ~Inst_VOP3__V_FREXP_MANT_F64
4502
4503 // --- description from .arch file ---
4504 // See V_FREXP_MANT_F32.
4505 void
4507 {
4508 Wavefront *wf = gpuDynInst->wavefront();
4509 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4510 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4511
4512 src.readSrc();
4513
4514 if (instData.ABS & 0x1) {
4515 src.absModifier();
4516 }
4517
4518 if (extData.NEG & 0x1) {
4519 src.negModifier();
4520 }
4521
4522 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4523 if (wf->execMask(lane)) {
4524 VecElemI32 exp(0);
4525 vdst[lane] = std::frexp(src[lane], &exp);
4526 }
4527 }
4528
4529 vdst.write();
4530 } // execute
4531 // --- Inst_VOP3__V_FRACT_F64 class methods ---
4532
4534 : Inst_VOP3A(iFmt, "v_fract_f64", false)
4535 {
4536 setFlag(ALU);
4537 setFlag(F64);
4538 } // Inst_VOP3__V_FRACT_F64
4539
4541 {
4542 } // ~Inst_VOP3__V_FRACT_F64
4543
4544 // --- description from .arch file ---
4545 // See V_FRACT_F32.
4546 void
4548 {
4549 Wavefront *wf = gpuDynInst->wavefront();
4550 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4551 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4552
4553 src.readSrc();
4554
4555 if (instData.ABS & 0x1) {
4556 src.absModifier();
4557 }
4558
4559 if (extData.NEG & 0x1) {
4560 src.negModifier();
4561 }
4562
4563 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4564 if (wf->execMask(lane)) {
4565 VecElemF32 int_part(0.0);
4566 vdst[lane] = std::modf(src[lane], &int_part);
4567 }
4568 }
4569
4570 vdst.write();
4571 } // execute
4572 // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods ---
4573
4575 InFmt_VOP3A *iFmt)
4576 : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false)
4577 {
4578 setFlag(ALU);
4579 setFlag(F32);
4580 } // Inst_VOP3__V_FREXP_EXP_I32_F32
4581
4583 {
4584 } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
4585
4586 // --- description from .arch file ---
4587 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
4588 // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).
4589 // Returns exponent of single precision float input, such that S0.f =
4590 // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns
4591 // the significand.
4592 void
4594 {
4595 Wavefront *wf = gpuDynInst->wavefront();
4596 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4597 VecOperandI32 vdst(gpuDynInst, instData.VDST);
4598
4599 src.readSrc();
4600
4601 if (instData.ABS & 0x1) {
4602 src.absModifier();
4603 }
4604
4605 if (extData.NEG & 0x1) {
4606 src.negModifier();
4607 }
4608
4609 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4610 if (wf->execMask(lane)) {
4611 if (std::isinf(src[lane])|| std::isnan(src[lane])) {
4612 vdst[lane] = 0;
4613 } else {
4614 VecElemI32 exp(0);
4615 std::frexp(src[lane], &exp);
4616 vdst[lane] = exp;
4617 }
4618 }
4619 }
4620
4621 vdst.write();
4622 } // execute
4623 // --- Inst_VOP3__V_FREXP_MANT_F32 class methods ---
4624
4626 : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false)
4627 {
4628 setFlag(ALU);
4629 setFlag(F32);
4630 } // Inst_VOP3__V_FREXP_MANT_F32
4631
4633 {
4634 } // ~Inst_VOP3__V_FREXP_MANT_F32
4635
4636 // --- description from .arch file ---
4637 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
4638 // else D.f = Mantissa(S0.f).
4639 // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary
4640 // --- significand of single precision float input, such that S0.f =
4641 // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which
4642 // --- returns integer exponent.
4643 void
4645 {
4646 Wavefront *wf = gpuDynInst->wavefront();
4647 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4648 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4649
4650 src.readSrc();
4651
4652 if (instData.ABS & 0x1) {
4653 src.absModifier();
4654 }
4655
4656 if (extData.NEG & 0x1) {
4657 src.negModifier();
4658 }
4659
4660 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4661 if (wf->execMask(lane)) {
4662 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
4663 vdst[lane] = src[lane];
4664 } else {
4665 VecElemI32 exp(0);
4666 vdst[lane] = std::frexp(src[lane], &exp);
4667 }
4668 }
4669 }
4670
4671 vdst.write();
4672 } // execute
4673 // --- Inst_VOP3__V_CLREXCP class methods ---
4674
4676 : Inst_VOP3A(iFmt, "v_clrexcp", false)
4677 {
4678 } // Inst_VOP3__V_CLREXCP
4679
4681 {
4682 } // ~Inst_VOP3__V_CLREXCP
4683
4684 // --- description from .arch file ---
4685 // Clear wave's exception state in SIMD (SP).
4686 void
4688 {
4690 } // execute
4691 // --- Inst_VOP3__V_CVT_F16_U16 class methods ---
4692
4694 : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false)
4695 {
4696 setFlag(ALU);
4697 setFlag(F16);
4698 } // Inst_VOP3__V_CVT_F16_U16
4699
4701 {
4702 } // ~Inst_VOP3__V_CVT_F16_U16
4703
4704 // --- description from .arch file ---
4705 // D.f16 = uint16_to_flt16(S.u16).
4706 // Supports denormals, rounding, exception flags and saturation.
4707 void
4709 {
4711 } // execute
4712 // --- Inst_VOP3__V_CVT_F16_I16 class methods ---
4713
4715 : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false)
4716 {
4717 setFlag(ALU);
4718 setFlag(F16);
4719 } // Inst_VOP3__V_CVT_F16_I16
4720
4722 {
4723 } // ~Inst_VOP3__V_CVT_F16_I16
4724
4725 // --- description from .arch file ---
4726 // D.f16 = int16_to_flt16(S.i16).
4727 // Supports denormals, rounding, exception flags and saturation.
4728 void
4730 {
4732 } // execute
4733 // --- Inst_VOP3__V_CVT_U16_F16 class methods ---
4734
4736 : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false)
4737 {
4738 setFlag(ALU);
4739 setFlag(F16);
4740 } // Inst_VOP3__V_CVT_U16_F16
4741
4743 {
4744 } // ~Inst_VOP3__V_CVT_U16_F16
4745
4746 // --- description from .arch file ---
4747 // D.u16 = flt16_to_uint16(S.f16).
4748 // Supports rounding, exception flags and saturation.
4749 void
4751 {
4753 } // execute
4754 // --- Inst_VOP3__V_CVT_I16_F16 class methods ---
4755
4757 : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false)
4758 {
4759 setFlag(ALU);
4760 setFlag(F16);
4761 } // Inst_VOP3__V_CVT_I16_F16
4762
4764 {
4765 } // ~Inst_VOP3__V_CVT_I16_F16
4766
4767 // --- description from .arch file ---
4768 // D.i16 = flt16_to_int16(S.f16).
4769 // Supports rounding, exception flags and saturation.
4770 void
4772 {
4774 } // execute
4775 // --- Inst_VOP3__V_RCP_F16 class methods ---
4776
4778 : Inst_VOP3A(iFmt, "v_rcp_f16", false)
4779 {
4780 setFlag(ALU);
4781 setFlag(F16);
4782 } // Inst_VOP3__V_RCP_F16
4783
4785 {
4786 } // ~Inst_VOP3__V_RCP_F16
4787
4788 // --- description from .arch file ---
4789 // if (S0.f16 == 1.0f)
4790 // D.f16 = 1.0f;
4791 // else
4792 // D.f16 = ApproximateRecip(S0.f16).
4793 void
4795 {
4797 } // execute
4798 // --- Inst_VOP3__V_SQRT_F16 class methods ---
4799
4801 : Inst_VOP3A(iFmt, "v_sqrt_f16", false)
4802 {
4803 setFlag(ALU);
4804 setFlag(F16);
4805 } // Inst_VOP3__V_SQRT_F16
4806
4808 {
4809 } // ~Inst_VOP3__V_SQRT_F16
4810
4811 // --- description from .arch file ---
4812 // if (S0.f16 == 1.0f)
4813 // D.f16 = 1.0f;
4814 // else
4815 // D.f16 = ApproximateSqrt(S0.f16).
4816 void
4818 {
4820 } // execute
4821 // --- Inst_VOP3__V_RSQ_F16 class methods ---
4822
4824 : Inst_VOP3A(iFmt, "v_rsq_f16", false)
4825 {
4826 setFlag(ALU);
4827 setFlag(F16);
4828 } // Inst_VOP3__V_RSQ_F16
4829
4831 {
4832 } // ~Inst_VOP3__V_RSQ_F16
4833
4834 // --- description from .arch file ---
4835 // if (S0.f16 == 1.0f)
4836 // D.f16 = 1.0f;
4837 // else
4838 // D.f16 = ApproximateRecipSqrt(S0.f16).
4839 void
4841 {
4843 } // execute
4844 // --- Inst_VOP3__V_LOG_F16 class methods ---
4845
4847 : Inst_VOP3A(iFmt, "v_log_f16", false)
4848 {
4849 setFlag(ALU);
4850 setFlag(F16);
4851 } // Inst_VOP3__V_LOG_F16
4852
4854 {
4855 } // ~Inst_VOP3__V_LOG_F16
4856
4857 // --- description from .arch file ---
4858 // if (S0.f16 == 1.0f)
4859 // D.f16 = 0.0f;
4860 // else
4861 // D.f16 = ApproximateLog2(S0.f16).
4862 void
4864 {
4866 } // execute
4867 // --- Inst_VOP3__V_EXP_F16 class methods ---
4868
4870 : Inst_VOP3A(iFmt, "v_exp_f16", false)
4871 {
4872 setFlag(ALU);
4873 setFlag(F16);
4874 } // Inst_VOP3__V_EXP_F16
4875
4877 {
4878 } // ~Inst_VOP3__V_EXP_F16
4879
4880 // --- description from .arch file ---
4881 // if (S0.f16 == 0.0f)
4882 // D.f16 = 1.0f;
4883 // else
4884 // D.f16 = Approximate2ToX(S0.f16).
4885 void
4887 {
4889 } // execute
4890 // --- Inst_VOP3__V_FREXP_MANT_F16 class methods ---
4891
4893 : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false)
4894 {
4895 setFlag(ALU);
4896 setFlag(F16);
4897 } // Inst_VOP3__V_FREXP_MANT_F16
4898
4900 {
4901 } // ~Inst_VOP3__V_FREXP_MANT_F16
4902
4903 // --- description from .arch file ---
4904 // if (S0.f16 == +-INF || S0.f16 == NAN)
4905 // D.f16 = S0.f16;
4906 // else
4907 // D.f16 = mantissa(S0.f16).
4908 // Result range is (-1.0,-0.5][0.5,1.0).
4909 // C math library frexp function.
4910 // Returns binary significand of half precision float input, such that the
4911 // original single float = significand * (2 ** exponent).
4912 void
4914 {
4916 } // execute
4917 // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods ---
4918
4920 InFmt_VOP3A *iFmt)
4921 : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false)
4922 {
4923 setFlag(ALU);
4924 setFlag(F16);
4925 } // Inst_VOP3__V_FREXP_EXP_I16_F16
4926
4928 {
4929 } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
4930
4931 // --- description from .arch file ---
4932 // if (S0.f16 == +-INF || S0.f16 == NAN)
4933 // D.i16 = 0;
4934 // else
4935 // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).
4936 // C math library frexp function.
4937 // Returns exponent of half precision float input, such that the
4938 // original single float = significand * (2 ** exponent).
4939 void
4944 // --- Inst_VOP3__V_FLOOR_F16 class methods ---
4945
4947 : Inst_VOP3A(iFmt, "v_floor_f16", false)
4948 {
4949 setFlag(ALU);
4950 setFlag(F16);
4951 } // Inst_VOP3__V_FLOOR_F16
4952
4954 {
4955 } // ~Inst_VOP3__V_FLOOR_F16
4956
4957 // --- description from .arch file ---
4958 // D.f16 = trunc(S0.f16);