gem5 v24.0.0.0
Loading...
Searching...
No Matches
vop3.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
35
36namespace gem5
37{
38
39namespace VegaISA
40{
41 // --- Inst_VOP3__V_CNDMASK_B32 class methods ---
42
44 : Inst_VOP3A(iFmt, "v_cndmask_b32", false)
45 {
46 setFlag(ALU);
47 setFlag(ReadsVCC);
48 } // Inst_VOP3__V_CNDMASK_B32
49
51 {
52 } // ~Inst_VOP3__V_CNDMASK_B32
53
54 // --- description from .arch file ---
55 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
56 // as a scalar GPR in S2.
57 void
59 {
60 Wavefront *wf = gpuDynInst->wavefront();
61 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
62 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
63 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
64 VecOperandU32 vdst(gpuDynInst, instData.VDST);
65
66 src0.readSrc();
67 src1.readSrc();
68 vcc.read();
69
70 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
71 if (wf->execMask(lane)) {
72 vdst[lane] = bits(vcc.rawData(), lane)
73 ? src1[lane] : src0[lane];
74 }
75 }
76
77 vdst.write();
78 } // execute
79 // --- Inst_VOP3__V_ADD_F32 class methods ---
80
82 : Inst_VOP3A(iFmt, "v_add_f32", false)
83 {
84 setFlag(ALU);
85 setFlag(F32);
86 } // Inst_VOP3__V_ADD_F32
87
89 {
90 } // ~Inst_VOP3__V_ADD_F32
91
92 // --- description from .arch file ---
93 // D.f = S0.f + S1.f.
94 void
96 {
97 Wavefront *wf = gpuDynInst->wavefront();
98 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
99 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
100 VecOperandF32 vdst(gpuDynInst, instData.VDST);
101
102 src0.readSrc();
103 src1.readSrc();
104
105 if (instData.ABS & 0x1) {
106 src0.absModifier();
107 }
108
109 if (instData.ABS & 0x2) {
110 src1.absModifier();
111 }
112
113 if (extData.NEG & 0x1) {
114 src0.negModifier();
115 }
116
117 if (extData.NEG & 0x2) {
118 src1.negModifier();
119 }
120
124 assert(!(instData.ABS & 0x4));
125 assert(!(extData.NEG & 0x4));
126
127 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
128 if (wf->execMask(lane)) {
129 vdst[lane] = src0[lane] + src1[lane];
130 }
131 }
132
133 vdst.write();
134 } // execute
135 // --- Inst_VOP3__V_SUB_F32 class methods ---
136
138 : Inst_VOP3A(iFmt, "v_sub_f32", false)
139 {
140 setFlag(ALU);
141 setFlag(F32);
142 } // Inst_VOP3__V_SUB_F32
143
145 {
146 } // ~Inst_VOP3__V_SUB_F32
147
148 // --- description from .arch file ---
149 // D.f = S0.f - S1.f.
150 // SQ translates to V_ADD_F32.
151 void
153 {
154 Wavefront *wf = gpuDynInst->wavefront();
155 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
156 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
157 VecOperandF32 vdst(gpuDynInst, instData.VDST);
158
159 src0.readSrc();
160 src1.readSrc();
161
162 if (instData.ABS & 0x1) {
163 src0.absModifier();
164 }
165
166 if (instData.ABS & 0x2) {
167 src1.absModifier();
168 }
169
170 if (extData.NEG & 0x1) {
171 src0.negModifier();
172 }
173
174 if (extData.NEG & 0x2) {
175 src1.negModifier();
176 }
177
181 assert(!(instData.ABS & 0x4));
182 assert(!(extData.NEG & 0x4));
183
184 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
185 if (wf->execMask(lane)) {
186 vdst[lane] = src0[lane] - src1[lane];
187 }
188 }
189
190 vdst.write();
191 } // execute
192 // --- Inst_VOP3__V_SUBREV_F32 class methods ---
193
195 : Inst_VOP3A(iFmt, "v_subrev_f32", false)
196 {
197 setFlag(ALU);
198 setFlag(F32);
199 } // Inst_VOP3__V_SUBREV_F32
200
202 {
203 } // ~Inst_VOP3__V_SUBREV_F32
204
205 // --- description from .arch file ---
206 // D.f = S1.f - S0.f.
207 // SQ translates to V_ADD_F32.
208 void
210 {
211 Wavefront *wf = gpuDynInst->wavefront();
212 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
213 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
214 VecOperandF32 vdst(gpuDynInst, instData.VDST);
215
216 src0.readSrc();
217 src1.readSrc();
218
219 if (instData.ABS & 0x1) {
220 src0.absModifier();
221 }
222
223 if (instData.ABS & 0x2) {
224 src1.absModifier();
225 }
226
227 if (extData.NEG & 0x1) {
228 src0.negModifier();
229 }
230
231 if (extData.NEG & 0x2) {
232 src1.negModifier();
233 }
234
238 assert(!(instData.ABS & 0x4));
239 assert(!(extData.NEG & 0x4));
240
241 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
242 if (wf->execMask(lane)) {
243 vdst[lane] = src1[lane] - src0[lane];
244 }
245 }
246
247 vdst.write();
248 } // execute
249 // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods ---
250
252 : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false)
253 {
254 setFlag(ALU);
255 setFlag(F32);
256 } // Inst_VOP3__V_MUL_LEGACY_F32
257
259 {
260 } // ~Inst_VOP3__V_MUL_LEGACY_F32
261
262 // --- description from .arch file ---
263 // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).
264 void
266 {
267 Wavefront *wf = gpuDynInst->wavefront();
268 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
269 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
270 VecOperandF32 vdst(gpuDynInst, instData.VDST);
271
272 src0.readSrc();
273 src1.readSrc();
274
275 if (instData.ABS & 0x1) {
276 src0.absModifier();
277 }
278
279 if (instData.ABS & 0x2) {
280 src1.absModifier();
281 }
282
283 if (extData.NEG & 0x1) {
284 src0.negModifier();
285 }
286
287 if (extData.NEG & 0x2) {
288 src1.negModifier();
289 }
290
294 assert(!(instData.ABS & 0x4));
295 assert(!(extData.NEG & 0x4));
296
297 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
298 if (wf->execMask(lane)) {
299 if (std::isnan(src0[lane]) ||
300 std::isnan(src1[lane])) {
301 vdst[lane] = NAN;
302 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
303 std::fpclassify(src0[lane]) == FP_ZERO) &&
304 !std::signbit(src0[lane])) {
305 if (std::isinf(src1[lane])) {
306 vdst[lane] = NAN;
307 } else if (!std::signbit(src1[lane])) {
308 vdst[lane] = +0.0;
309 } else {
310 vdst[lane] = -0.0;
311 }
312 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
313 std::fpclassify(src0[lane]) == FP_ZERO) &&
314 std::signbit(src0[lane])) {
315 if (std::isinf(src1[lane])) {
316 vdst[lane] = NAN;
317 } else if (std::signbit(src1[lane])) {
318 vdst[lane] = +0.0;
319 } else {
320 vdst[lane] = -0.0;
321 }
322 } else if (std::isinf(src0[lane]) &&
323 !std::signbit(src0[lane])) {
324 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
325 std::fpclassify(src1[lane]) == FP_ZERO) {
326 vdst[lane] = NAN;
327 } else if (!std::signbit(src1[lane])) {
328 vdst[lane] = +INFINITY;
329 } else {
330 vdst[lane] = -INFINITY;
331 }
332 } else if (std::isinf(src0[lane]) &&
333 std::signbit(src0[lane])) {
334 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
335 std::fpclassify(src1[lane]) == FP_ZERO) {
336 vdst[lane] = NAN;
337 } else if (std::signbit(src1[lane])) {
338 vdst[lane] = +INFINITY;
339 } else {
340 vdst[lane] = -INFINITY;
341 }
342 } else {
343 vdst[lane] = src0[lane] * src1[lane];
344 }
345 }
346 }
347
348 vdst.write();
349 } // execute
350 // --- Inst_VOP3__V_MUL_F32 class methods ---
351
353 : Inst_VOP3A(iFmt, "v_mul_f32", false)
354 {
355 setFlag(ALU);
356 setFlag(F32);
357 } // Inst_VOP3__V_MUL_F32
358
360 {
361 } // ~Inst_VOP3__V_MUL_F32
362
363 // --- description from .arch file ---
364 // D.f = S0.f * S1.f.
365 void
367 {
368 Wavefront *wf = gpuDynInst->wavefront();
369 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
370 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
371 VecOperandF32 vdst(gpuDynInst, instData.VDST);
372
373 src0.readSrc();
374 src1.readSrc();
375
376 if (instData.ABS & 0x1) {
377 src0.absModifier();
378 }
379
380 if (instData.ABS & 0x2) {
381 src1.absModifier();
382 }
383
384 if (extData.NEG & 0x1) {
385 src0.negModifier();
386 }
387
388 if (extData.NEG & 0x2) {
389 src1.negModifier();
390 }
391
395 assert(!(instData.ABS & 0x4));
396 assert(!(extData.NEG & 0x4));
397
398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
399 if (wf->execMask(lane)) {
400 if (std::isnan(src0[lane]) ||
401 std::isnan(src1[lane])) {
402 vdst[lane] = NAN;
403 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
404 std::fpclassify(src0[lane]) == FP_ZERO) &&
405 !std::signbit(src0[lane])) {
406 if (std::isinf(src1[lane])) {
407 vdst[lane] = NAN;
408 } else if (!std::signbit(src1[lane])) {
409 vdst[lane] = +0.0;
410 } else {
411 vdst[lane] = -0.0;
412 }
413 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
414 std::fpclassify(src0[lane]) == FP_ZERO) &&
415 std::signbit(src0[lane])) {
416 if (std::isinf(src1[lane])) {
417 vdst[lane] = NAN;
418 } else if (std::signbit(src1[lane])) {
419 vdst[lane] = +0.0;
420 } else {
421 vdst[lane] = -0.0;
422 }
423 } else if (std::isinf(src0[lane]) &&
424 !std::signbit(src0[lane])) {
425 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
426 std::fpclassify(src1[lane]) == FP_ZERO) {
427 vdst[lane] = NAN;
428 } else if (!std::signbit(src1[lane])) {
429 vdst[lane] = +INFINITY;
430 } else {
431 vdst[lane] = -INFINITY;
432 }
433 } else if (std::isinf(src0[lane]) &&
434 std::signbit(src0[lane])) {
435 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
436 std::fpclassify(src1[lane]) == FP_ZERO) {
437 vdst[lane] = NAN;
438 } else if (std::signbit(src1[lane])) {
439 vdst[lane] = +INFINITY;
440 } else {
441 vdst[lane] = -INFINITY;
442 }
443 } else {
444 vdst[lane] = src0[lane] * src1[lane];
445 }
446 }
447 }
448
449 vdst.write();
450 } // execute
451 // --- Inst_VOP3__V_MUL_I32_I24 class methods ---
452
454 : Inst_VOP3A(iFmt, "v_mul_i32_i24", false)
455 {
456 setFlag(ALU);
457 } // Inst_VOP3__V_MUL_I32_I24
458
460 {
461 } // ~Inst_VOP3__V_MUL_I32_I24
462
463 // --- description from .arch file ---
464 // D.i = S0.i[23:0] * S1.i[23:0].
465 void
467 {
468 Wavefront *wf = gpuDynInst->wavefront();
469 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
470 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
471 VecOperandI32 vdst(gpuDynInst, instData.VDST);
472
473 src0.readSrc();
474 src1.read();
475
479 assert(!(instData.ABS & 0x1));
480 assert(!(instData.ABS & 0x2));
481 assert(!(instData.ABS & 0x4));
482 assert(!(extData.NEG & 0x1));
483 assert(!(extData.NEG & 0x2));
484 assert(!(extData.NEG & 0x4));
485
486 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
487 if (wf->execMask(lane)) {
488 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
489 * sext<24>(bits(src1[lane], 23, 0));
490 }
491 }
492
493 vdst.write();
494 } // execute
495 // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods ---
496
498 : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false)
499 {
500 setFlag(ALU);
501 } // Inst_VOP3__V_MUL_HI_I32_I24
502
504 {
505 } // ~Inst_VOP3__V_MUL_HI_I32_I24
506
507 // --- description from .arch file ---
508 // D.i = (S0.i[23:0] * S1.i[23:0])>>32.
509 void
511 {
512 Wavefront *wf = gpuDynInst->wavefront();
513 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
514 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
515 VecOperandI32 vdst(gpuDynInst, instData.VDST);
516
517 src0.readSrc();
518 src1.readSrc();
519
523 assert(!(instData.ABS & 0x1));
524 assert(!(instData.ABS & 0x2));
525 assert(!(instData.ABS & 0x4));
526 assert(!(extData.NEG & 0x1));
527 assert(!(extData.NEG & 0x2));
528 assert(!(extData.NEG & 0x4));
529
530 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
531 if (wf->execMask(lane)) {
532 VecElemI64 tmp_src0
533 = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
534 VecElemI64 tmp_src1
535 = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
536
537 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
538 }
539 }
540
541 vdst.write();
542 } // execute
543 // --- Inst_VOP3__V_MUL_U32_U24 class methods ---
544
546 : Inst_VOP3A(iFmt, "v_mul_u32_u24", false)
547 {
548 setFlag(ALU);
549 } // Inst_VOP3__V_MUL_U32_U24
550
552 {
553 } // ~Inst_VOP3__V_MUL_U32_U24
554
555 // --- description from .arch file ---
556 // D.u = S0.u[23:0] * S1.u[23:0].
557 void
559 {
560 Wavefront *wf = gpuDynInst->wavefront();
561 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
562 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
563 VecOperandU32 vdst(gpuDynInst, instData.VDST);
564
565 src0.readSrc();
566 src1.readSrc();
567
571 assert(!(instData.ABS & 0x1));
572 assert(!(instData.ABS & 0x2));
573 assert(!(instData.ABS & 0x4));
574 assert(!(extData.NEG & 0x1));
575 assert(!(extData.NEG & 0x2));
576 assert(!(extData.NEG & 0x4));
577
578 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
579 if (wf->execMask(lane)) {
580 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
581 }
582 }
583
584 vdst.write();
585 } // execute
586 // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods ---
587
589 : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false)
590 {
591 setFlag(ALU);
592 } // Inst_VOP3__V_MUL_HI_U32_U24
593
595 {
596 } // ~Inst_VOP3__V_MUL_HI_U32_U24
597
598 // --- description from .arch file ---
599 // D.i = (S0.u[23:0] * S1.u[23:0])>>32.
600 void
602 {
603 Wavefront *wf = gpuDynInst->wavefront();
604 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
605 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
606 VecOperandU32 vdst(gpuDynInst, instData.VDST);
607
608 src0.readSrc();
609 src1.readSrc();
610
614 assert(!(instData.ABS & 0x1));
615 assert(!(instData.ABS & 0x2));
616 assert(!(instData.ABS & 0x4));
617 assert(!(extData.NEG & 0x1));
618 assert(!(extData.NEG & 0x2));
619 assert(!(extData.NEG & 0x4));
620
621 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
622 if (wf->execMask(lane)) {
623 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
624 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
625 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
626 }
627 }
628
629 vdst.write();
630 } // execute
631 // --- Inst_VOP3__V_MIN_F32 class methods ---
632
634 : Inst_VOP3A(iFmt, "v_min_f32", false)
635 {
636 setFlag(ALU);
637 setFlag(F32);
638 } // Inst_VOP3__V_MIN_F32
639
641 {
642 } // ~Inst_VOP3__V_MIN_F32
643
644 // --- description from .arch file ---
645 // D.f = (S0.f < S1.f ? S0.f : S1.f).
646 void
648 {
649 Wavefront *wf = gpuDynInst->wavefront();
650 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
651 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
652 VecOperandF32 vdst(gpuDynInst, instData.VDST);
653
654 src0.readSrc();
655 src1.readSrc();
656
657 if (instData.ABS & 0x1) {
658 src0.absModifier();
659 }
660
661 if (instData.ABS & 0x2) {
662 src1.absModifier();
663 }
664
665 if (extData.NEG & 0x1) {
666 src0.negModifier();
667 }
668
669 if (extData.NEG & 0x2) {
670 src1.negModifier();
671 }
672
676 assert(!(instData.ABS & 0x4));
677 assert(!(extData.NEG & 0x4));
678
679 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
680 if (wf->execMask(lane)) {
681 vdst[lane] = std::fmin(src0[lane], src1[lane]);
682 }
683 }
684
685 vdst.write();
686 } // execute
687 // --- Inst_VOP3__V_MAX_F32 class methods ---
688
690 : Inst_VOP3A(iFmt, "v_max_f32", false)
691 {
692 setFlag(ALU);
693 setFlag(F32);
694 } // Inst_VOP3__V_MAX_F32
695
697 {
698 } // ~Inst_VOP3__V_MAX_F32
699
700 // --- description from .arch file ---
701 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
702 void
704 {
705 Wavefront *wf = gpuDynInst->wavefront();
706 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
707 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
708 VecOperandF32 vdst(gpuDynInst, instData.VDST);
709
710 src0.readSrc();
711 src1.readSrc();
712
713 if (instData.ABS & 0x1) {
714 src0.absModifier();
715 }
716
717 if (instData.ABS & 0x2) {
718 src1.absModifier();
719 }
720
721 if (extData.NEG & 0x1) {
722 src0.negModifier();
723 }
724
725 if (extData.NEG & 0x2) {
726 src1.negModifier();
727 }
728
732 assert(!(instData.ABS & 0x4));
733 assert(!(extData.NEG & 0x4));
734
735 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
736 if (wf->execMask(lane)) {
737 vdst[lane] = std::fmax(src0[lane], src1[lane]);
738 }
739 }
740
741 vdst.write();
742 } // execute
743 // --- Inst_VOP3__V_MIN_I32 class methods ---
744
746 : Inst_VOP3A(iFmt, "v_min_i32", false)
747 {
748 setFlag(ALU);
749 } // Inst_VOP3__V_MIN_I32
750
752 {
753 } // ~Inst_VOP3__V_MIN_I32
754
755 // --- description from .arch file ---
756 // D.i = min(S0.i, S1.i).
757 void
759 {
760 Wavefront *wf = gpuDynInst->wavefront();
761 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
762 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
763 VecOperandI32 vdst(gpuDynInst, instData.VDST);
764
765 src0.readSrc();
766 src1.readSrc();
767
771 assert(!(instData.ABS & 0x1));
772 assert(!(instData.ABS & 0x2));
773 assert(!(instData.ABS & 0x4));
774 assert(!(extData.NEG & 0x1));
775 assert(!(extData.NEG & 0x2));
776 assert(!(extData.NEG & 0x4));
777
778 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
779 if (wf->execMask(lane)) {
780 vdst[lane] = std::min(src0[lane], src1[lane]);
781 }
782 }
783
784 vdst.write();
785 } // execute
786 // --- Inst_VOP3__V_MAX_I32 class methods ---
787
789 : Inst_VOP3A(iFmt, "v_max_i32", false)
790 {
791 setFlag(ALU);
792 } // Inst_VOP3__V_MAX_I32
793
795 {
796 } // ~Inst_VOP3__V_MAX_I32
797
798 // --- description from .arch file ---
799 // D.i = max(S0.i, S1.i).
800 void
802 {
803 Wavefront *wf = gpuDynInst->wavefront();
804 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
805 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
806 VecOperandI32 vdst(gpuDynInst, instData.VDST);
807
808 src0.readSrc();
809 src1.readSrc();
810
814 assert(!(instData.ABS & 0x1));
815 assert(!(instData.ABS & 0x2));
816 assert(!(instData.ABS & 0x4));
817 assert(!(extData.NEG & 0x1));
818 assert(!(extData.NEG & 0x2));
819 assert(!(extData.NEG & 0x4));
820
821 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
822 if (wf->execMask(lane)) {
823 vdst[lane] = std::max(src0[lane], src1[lane]);
824 }
825 }
826
827 vdst.write();
828 } // execute
829 // --- Inst_VOP3__V_MIN_U32 class methods ---
830
832 : Inst_VOP3A(iFmt, "v_min_u32", false)
833 {
834 setFlag(ALU);
835 } // Inst_VOP3__V_MIN_U32
836
838 {
839 } // ~Inst_VOP3__V_MIN_U32
840
841 // --- description from .arch file ---
842 // D.u = min(S0.u, S1.u).
843 void
845 {
846 Wavefront *wf = gpuDynInst->wavefront();
847 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
848 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
849 VecOperandU32 vdst(gpuDynInst, instData.VDST);
850
851 src0.readSrc();
852 src1.readSrc();
853
857 assert(!(instData.ABS & 0x1));
858 assert(!(instData.ABS & 0x2));
859 assert(!(instData.ABS & 0x4));
860 assert(!(extData.NEG & 0x1));
861 assert(!(extData.NEG & 0x2));
862 assert(!(extData.NEG & 0x4));
863
864 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
865 if (wf->execMask(lane)) {
866 vdst[lane] = std::min(src0[lane], src1[lane]);
867 }
868 }
869
870 vdst.write();
871 } // execute
872 // --- Inst_VOP3__V_MAX_U32 class methods ---
873
875 : Inst_VOP3A(iFmt, "v_max_u32", false)
876 {
877 setFlag(ALU);
878 } // Inst_VOP3__V_MAX_U32
879
881 {
882 } // ~Inst_VOP3__V_MAX_U32
883
884 // --- description from .arch file ---
885 // D.u = max(S0.u, S1.u).
886 void
888 {
889 Wavefront *wf = gpuDynInst->wavefront();
890 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
891 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
892 VecOperandU32 vdst(gpuDynInst, instData.VDST);
893
894 src0.readSrc();
895 src1.readSrc();
896
900 assert(!(instData.ABS & 0x1));
901 assert(!(instData.ABS & 0x2));
902 assert(!(instData.ABS & 0x4));
903 assert(!(extData.NEG & 0x1));
904 assert(!(extData.NEG & 0x2));
905 assert(!(extData.NEG & 0x4));
906
907 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
908 if (wf->execMask(lane)) {
909 vdst[lane] = std::max(src0[lane], src1[lane]);
910 }
911 }
912
913 vdst.write();
914 } // execute
915 // --- Inst_VOP3__V_LSHRREV_B32 class methods ---
916
918 : Inst_VOP3A(iFmt, "v_lshrrev_b32", false)
919 {
920 setFlag(ALU);
921 } // Inst_VOP3__V_LSHRREV_B32
922
924 {
925 } // ~Inst_VOP3__V_LSHRREV_B32
926
927 // --- description from .arch file ---
928 // D.u = S1.u >> S0.u[4:0].
929 // The vacated bits are set to zero.
930 // SQ translates this to an internal SP opcode.
931 void
933 {
934 Wavefront *wf = gpuDynInst->wavefront();
935 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
936 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
937 VecOperandU32 vdst(gpuDynInst, instData.VDST);
938
939 src0.readSrc();
940 src1.readSrc();
941
945 assert(!(instData.ABS & 0x1));
946 assert(!(instData.ABS & 0x2));
947 assert(!(instData.ABS & 0x4));
948 assert(!(extData.NEG & 0x1));
949 assert(!(extData.NEG & 0x2));
950 assert(!(extData.NEG & 0x4));
951
952 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
953 if (wf->execMask(lane)) {
954 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
955 }
956 }
957
958 vdst.write();
959 } // execute
960 // --- Inst_VOP3__V_ASHRREV_I32 class methods ---
961
963 : Inst_VOP3A(iFmt, "v_ashrrev_i32", false)
964 {
965 setFlag(ALU);
966 } // Inst_VOP3__V_ASHRREV_I32
967
969 {
970 } // ~Inst_VOP3__V_ASHRREV_I32
971
972 // --- description from .arch file ---
973 // D.i = signext(S1.i) >> S0.i[4:0].
974 // The vacated bits are set to the sign bit of the input value.
975 // SQ translates this to an internal SP opcode.
976 void
978 {
979 Wavefront *wf = gpuDynInst->wavefront();
980 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
981 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
982 VecOperandI32 vdst(gpuDynInst, instData.VDST);
983
984 src0.readSrc();
985 src1.readSrc();
986
990 assert(!(instData.ABS & 0x1));
991 assert(!(instData.ABS & 0x2));
992 assert(!(instData.ABS & 0x4));
993 assert(!(extData.NEG & 0x1));
994 assert(!(extData.NEG & 0x2));
995 assert(!(extData.NEG & 0x4));
996
997 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
998 if (wf->execMask(lane)) {
999 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
1000 }
1001 }
1002
1003 vdst.write();
1004 } // execute
1005 // --- Inst_VOP3__V_LSHLREV_B32 class methods ---
1006
1008 : Inst_VOP3A(iFmt, "v_lshlrev_b32", false)
1009 {
1010 setFlag(ALU);
1011 } // Inst_VOP3__V_LSHLREV_B32
1012
1014 {
1015 } // ~Inst_VOP3__V_LSHLREV_B32
1016
1017 // --- description from .arch file ---
1018 // D.u = S1.u << S0.u[4:0].
1019 // SQ translates this to an internal SP opcode.
1020 void
1022 {
1023 Wavefront *wf = gpuDynInst->wavefront();
1024 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1025 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1026 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1027
1028 src0.readSrc();
1029 src1.readSrc();
1030
1034 assert(!(instData.ABS & 0x1));
1035 assert(!(instData.ABS & 0x2));
1036 assert(!(instData.ABS & 0x4));
1037 assert(!(extData.NEG & 0x1));
1038 assert(!(extData.NEG & 0x2));
1039 assert(!(extData.NEG & 0x4));
1040
1041 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1042 if (wf->execMask(lane)) {
1043 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
1044 }
1045 }
1046
1047 vdst.write();
1048 } // execute
1049 // --- Inst_VOP3__V_AND_B32 class methods ---
1050
1052 : Inst_VOP3A(iFmt, "v_and_b32", false)
1053 {
1054 setFlag(ALU);
1055 } // Inst_VOP3__V_AND_B32
1056
1058 {
1059 } // ~Inst_VOP3__V_AND_B32
1060
1061 // --- description from .arch file ---
1062 // D.u = S0.u & S1.u.
1063 // Input and output modifiers not supported.
1064 void
1066 {
1067 Wavefront *wf = gpuDynInst->wavefront();
1068 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1069 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1070 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1071
1072 src0.readSrc();
1073 src1.readSrc();
1074
1078 assert(!(instData.ABS & 0x1));
1079 assert(!(instData.ABS & 0x2));
1080 assert(!(instData.ABS & 0x4));
1081 assert(!(extData.NEG & 0x1));
1082 assert(!(extData.NEG & 0x2));
1083 assert(!(extData.NEG & 0x4));
1084
1085 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1086 if (wf->execMask(lane)) {
1087 vdst[lane] = src0[lane] & src1[lane];
1088 }
1089 }
1090
1091 vdst.write();
1092 } // execute
1093 // --- Inst_VOP3__V_OR_B32 class methods ---
1094
1096 : Inst_VOP3A(iFmt, "v_or_b32", false)
1097 {
1098 setFlag(ALU);
1099 } // Inst_VOP3__V_OR_B32
1100
1102 {
1103 } // ~Inst_VOP3__V_OR_B32
1104
1105 // --- description from .arch file ---
1106 // D.u = S0.u | S1.u.
1107 // Input and output modifiers not supported.
1108 void
1110 {
1111 Wavefront *wf = gpuDynInst->wavefront();
1112 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1113 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1114 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1115
1116 src0.readSrc();
1117 src1.readSrc();
1118
1122 assert(!(instData.ABS & 0x1));
1123 assert(!(instData.ABS & 0x2));
1124 assert(!(instData.ABS & 0x4));
1125 assert(!(extData.NEG & 0x1));
1126 assert(!(extData.NEG & 0x2));
1127 assert(!(extData.NEG & 0x4));
1128
1129 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1130 if (wf->execMask(lane)) {
1131 vdst[lane] = src0[lane] | src1[lane];
1132 }
1133 }
1134
1135 vdst.write();
1136 } // execute
1137 // --- Inst_VOP3__V_OR3_B32 class methods ---
1138
1140 : Inst_VOP3A(iFmt, "v_or3_b32", false)
1141 {
1142 setFlag(ALU);
1143 } // Inst_VOP3__V_OR3_B32
1144
1146 {
1147 } // ~Inst_VOP3__V_OR3_B32
1148
1149 // --- description from .arch file ---
1150 // D.u = S0.u | S1.u | S2.u.
1151 // Input and output modifiers not supported.
1152 void
1154 {
1155 Wavefront *wf = gpuDynInst->wavefront();
1156 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1157 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1158 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
1159 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1160
1161 src0.readSrc();
1162 src1.readSrc();
1163 src2.readSrc();
1164
1168 assert(!(instData.ABS & 0x1));
1169 assert(!(instData.ABS & 0x2));
1170 assert(!(instData.ABS & 0x4));
1171 assert(!(extData.NEG & 0x1));
1172 assert(!(extData.NEG & 0x2));
1173 assert(!(extData.NEG & 0x4));
1174
1175 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1176 if (wf->execMask(lane)) {
1177 vdst[lane] = src0[lane] | src1[lane] | src2[lane];
1178 }
1179 }
1180
1181 vdst.write();
1182 } // execute
1183 // --- Inst_VOP3__V_XOR_B32 class methods ---
1184
1186 : Inst_VOP3A(iFmt, "v_xor_b32", false)
1187 {
1188 setFlag(ALU);
1189 } // Inst_VOP3__V_XOR_B32
1190
1192 {
1193 } // ~Inst_VOP3__V_XOR_B32
1194
1195 // --- description from .arch file ---
1196 // D.u = S0.u ^ S1.u.
1197 // Input and output modifiers not supported.
1198 void
1200 {
1201 Wavefront *wf = gpuDynInst->wavefront();
1202 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1203 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1204 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1205
1206 src0.readSrc();
1207 src1.readSrc();
1208
1212 assert(!(instData.ABS & 0x1));
1213 assert(!(instData.ABS & 0x2));
1214 assert(!(instData.ABS & 0x4));
1215 assert(!(extData.NEG & 0x1));
1216 assert(!(extData.NEG & 0x2));
1217 assert(!(extData.NEG & 0x4));
1218
1219 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1220 if (wf->execMask(lane)) {
1221 vdst[lane] = src0[lane] ^ src1[lane];
1222 }
1223 }
1224
1225 vdst.write();
1226 } // execute
1227 // --- Inst_VOP3__V_MAC_F32 class methods ---
1228
1230 : Inst_VOP3A(iFmt, "v_mac_f32", false)
1231 {
1232 setFlag(ALU);
1233 setFlag(F32);
1234 setFlag(MAC);
1235 } // Inst_VOP3__V_MAC_F32
1236
1238 {
1239 } // ~Inst_VOP3__V_MAC_F32
1240
1241 // --- description from .arch file ---
1242 // D.f = S0.f * S1.f + D.f.
1243 // SQ translates to V_MAD_F32.
1244 void
1246 {
1247 Wavefront *wf = gpuDynInst->wavefront();
1248 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1249 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1250 VecOperandF32 vdst(gpuDynInst, instData.VDST);
1251
1252 src0.readSrc();
1253 src1.readSrc();
1254 vdst.read();
1255
1256 if (instData.ABS & 0x1) {
1257 src0.absModifier();
1258 }
1259
1260 if (instData.ABS & 0x2) {
1261 src1.absModifier();
1262 }
1263
1264 if (extData.NEG & 0x1) {
1265 src0.negModifier();
1266 }
1267
1268 if (extData.NEG & 0x2) {
1269 src1.negModifier();
1270 }
1271
1275 assert(!(instData.ABS & 0x4));
1276 assert(!(extData.NEG & 0x4));
1277
1278 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1279 if (wf->execMask(lane)) {
1280 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1281 }
1282 }
1283
1284 vdst.write();
1285 } // execute
1286 // --- Inst_VOP3__V_ADD_CO_U32 class methods ---
1287
1289 : Inst_VOP3B(iFmt, "v_add_co_u32")
1290 {
1291 setFlag(ALU);
1292 setFlag(WritesVCC);
1293 } // Inst_VOP3__V_ADD_CO_U32
1294
1296 {
1297 } // ~Inst_VOP3__V_ADD_CO_U32
1298
1299 // --- description from .arch file ---
1300 // D.u = S0.u + S1.u;
1301 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
1302 // --- overflow or carry-out for V_ADDC_U32.
1303 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1304 void
1306 {
1307 Wavefront *wf = gpuDynInst->wavefront();
1308 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1309 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1310 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1311 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1312
1313 src0.readSrc();
1314 src1.readSrc();
1315
1319 assert(!(extData.NEG & 0x1));
1320 assert(!(extData.NEG & 0x2));
1321 assert(!(extData.NEG & 0x4));
1322
1323 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1324 if (wf->execMask(lane)) {
1325 vdst[lane] = src0[lane] + src1[lane];
1326 vcc.setBit(lane, ((VecElemU64)src0[lane]
1327 + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
1328 }
1329 }
1330
1331 vdst.write();
1332 vcc.write();
1333 } // execute
1334 // --- Inst_VOP3__V_SUB_CO_U32 class methods ---
1335
1337 : Inst_VOP3B(iFmt, "v_sub_co_u32")
1338 {
1339 setFlag(ALU);
1340 setFlag(WritesVCC);
1341 } // Inst_VOP3__V_SUB_CO_U32
1342
1344 {
1345 } // ~Inst_VOP3__V_SUB_CO_U32
1346
1347 // --- description from .arch file ---
1348 // D.u = S0.u - S1.u;
1349 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
1350 // carry-out for V_SUBB_U32.
1351 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1352 void
1354 {
1355 Wavefront *wf = gpuDynInst->wavefront();
1356 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1357 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1358 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1359 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1360
1361 src0.readSrc();
1362 src1.readSrc();
1363
1367 assert(!(extData.NEG & 0x1));
1368 assert(!(extData.NEG & 0x2));
1369 assert(!(extData.NEG & 0x4));
1370
1371 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1372 if (wf->execMask(lane)) {
1373 vdst[lane] = src0[lane] - src1[lane];
1374 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1375 }
1376 }
1377
1378 vdst.write();
1379 vcc.write();
1380 } // execute
1381 // --- Inst_VOP3__V_SUBREV_CO_U32 class methods ---
1382
1384 InFmt_VOP3B *iFmt)
1385 : Inst_VOP3B(iFmt, "v_subrev_co_u32")
1386 {
1387 setFlag(ALU);
1388 setFlag(WritesVCC);
1389 } // Inst_VOP3__V_SUBREV_CO_U32
1390
1392 {
1393 } // ~Inst_VOP3__V_SUBREV_CO_U32
1394
1395 // --- description from .arch file ---
1396 // D.u = S1.u - S0.u;
1397 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
1398 // carry-out for V_SUBB_U32.
1399 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
1400 // SQ translates this to V_SUB_U32 with reversed operands.
1401 void
1403 {
1404 Wavefront *wf = gpuDynInst->wavefront();
1405 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1406 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1407 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1408 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
1409
1410 src0.readSrc();
1411 src1.readSrc();
1412
1416 assert(!(extData.NEG & 0x1));
1417 assert(!(extData.NEG & 0x2));
1418 assert(!(extData.NEG & 0x4));
1419
1420 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1421 if (wf->execMask(lane)) {
1422 vdst[lane] = src1[lane] - src0[lane];
1423 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1424 }
1425 }
1426
1427 vdst.write();
1428 vcc.write();
1429 } // execute
1430 // --- Inst_VOP3__V_ADDC_CO_U32 class methods ---
1431
1433 : Inst_VOP3B(iFmt, "v_addc_co_u32")
1434 {
1435 setFlag(ALU);
1436 setFlag(WritesVCC);
1437 setFlag(ReadsVCC);
1438 } // Inst_VOP3__V_ADDC_CO_U32
1439
1441 {
1442 } // ~Inst_VOP3__V_ADDC_CO_U32
1443
1444 // --- description from .arch file ---
1445 // D.u = S0.u + S1.u + VCC[threadId];
1446 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)
1447 // is an UNSIGNED overflow.
1448 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1449 // source comes from the SGPR-pair at S2.u.
1450 void
1452 {
1453 Wavefront *wf = gpuDynInst->wavefront();
1454 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1455 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1456 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1457 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1458 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1459
1460 src0.readSrc();
1461 src1.readSrc();
1462 vcc.read();
1463
1467 assert(!(extData.NEG & 0x1));
1468 assert(!(extData.NEG & 0x2));
1469 assert(!(extData.NEG & 0x4));
1470
1471 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1472 if (wf->execMask(lane)) {
1473 vdst[lane] = src0[lane] + src1[lane]
1474 + bits(vcc.rawData(), lane);
1475 sdst.setBit(lane, ((VecElemU64)src0[lane]
1476 + (VecElemU64)src1[lane]
1477 + (VecElemU64)bits(vcc.rawData(), lane))
1478 >= 0x100000000 ? 1 : 0);
1479 }
1480 }
1481
1482 vdst.write();
1483 sdst.write();
1484 } // execute
1485 // --- Inst_VOP3__V_SUBB_CO_U32 class methods ---
1486
1488 : Inst_VOP3B(iFmt, "v_subb_co_u32")
1489 {
1490 setFlag(ALU);
1491 setFlag(WritesVCC);
1492 setFlag(ReadsVCC);
1493 } // Inst_VOP3__V_SUBB_CO_U32
1494
1496 {
1497 } // ~Inst_VOP3__V_SUBB_CO_U32
1498
1499 // --- description from .arch file ---
1500 // D.u = S0.u - S1.u - VCC[threadId];
1501 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1502 // --- overflow.
1503 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1504 // --- source comes from the SGPR-pair at S2.u.
1505 void
1507 {
1508 Wavefront *wf = gpuDynInst->wavefront();
1509 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1510 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1511 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1512 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1513 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1514
1515 src0.readSrc();
1516 src1.readSrc();
1517 vcc.read();
1518
1522 assert(!(extData.NEG & 0x1));
1523 assert(!(extData.NEG & 0x2));
1524 assert(!(extData.NEG & 0x4));
1525
1526 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1527 if (wf->execMask(lane)) {
1528 vdst[lane] = src0[lane] - src1[lane]
1529 - bits(vcc.rawData(), lane);
1530 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
1531 > src0[lane] ? 1 : 0);
1532 }
1533 }
1534
1535 vdst.write();
1536 sdst.write();
1537 } // execute
1538 // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods ---
1539
1541 InFmt_VOP3B *iFmt)
1542 : Inst_VOP3B(iFmt, "v_subbrev_co_u32")
1543 {
1544 setFlag(ALU);
1545 setFlag(WritesVCC);
1546 setFlag(ReadsVCC);
1547 } // Inst_VOP3__V_SUBBREV_CO_U32
1548
1550 {
1551 } // ~Inst_VOP3__V_SUBBREV_CO_U32
1552
1553 // --- description from .arch file ---
1554 // D.u = S1.u - S0.u - VCC[threadId];
1555 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
1556 // overflow.
1557 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
1558 // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.
1559 void
1561 {
1562 Wavefront *wf = gpuDynInst->wavefront();
1563 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
1564 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
1565 ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1566 ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
1567 VecOperandU32 vdst(gpuDynInst, instData.VDST);
1568
1569 src0.readSrc();
1570 src1.readSrc();
1571 vcc.read();
1572
1576 assert(!(extData.NEG & 0x1));
1577 assert(!(extData.NEG & 0x2));
1578 assert(!(extData.NEG & 0x4));
1579
1580 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1581 if (wf->execMask(lane)) {
1582 vdst[lane] = src1[lane] - src0[lane]
1583 - bits(vcc.rawData(), lane);
1584 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
1585 > src0[lane] ? 1 : 0);
1586 }
1587 }
1588
1589 vdst.write();
1590 sdst.write();
1591 } // execute
1592 // --- Inst_VOP3__V_ADD_F16 class methods ---
1593
1595 : Inst_VOP3A(iFmt, "v_add_f16", false)
1596 {
1597 setFlag(ALU);
1598 setFlag(F16);
1599 } // Inst_VOP3__V_ADD_F16
1600
1602 {
1603 } // ~Inst_VOP3__V_ADD_F16
1604
1605 // --- description from .arch file ---
1606 // D.f16 = S0.f16 + S1.f16.
1607 // Supports denormals, round mode, exception flags, saturation.
1608 void
1610 {
1612 } // execute
1613 // --- Inst_VOP3__V_SUB_F16 class methods ---
1614
1616 : Inst_VOP3A(iFmt, "v_sub_f16", false)
1617 {
1618 setFlag(ALU);
1619 setFlag(F16);
1620 } // Inst_VOP3__V_SUB_F16
1621
1623 {
1624 } // ~Inst_VOP3__V_SUB_F16
1625
1626 // --- description from .arch file ---
1627 // D.f16 = S0.f16 - S1.f16.
1628 // Supports denormals, round mode, exception flags, saturation.
1629 // SQ translates to V_ADD_F16.
1630 void
1632 {
1634 } // execute
1635 // --- Inst_VOP3__V_SUBREV_F16 class methods ---
1636
1638 : Inst_VOP3A(iFmt, "v_subrev_f16", false)
1639 {
1640 setFlag(ALU);
1641 setFlag(F16);
1642 } // Inst_VOP3__V_SUBREV_F16
1643
1645 {
1646 } // ~Inst_VOP3__V_SUBREV_F16
1647
1648 // --- description from .arch file ---
1649 // D.f16 = S1.f16 - S0.f16.
1650 // Supports denormals, round mode, exception flags, saturation.
1651 // SQ translates to V_ADD_F16.
1652 void
1654 {
1656 } // execute
1657 // --- Inst_VOP3__V_MUL_F16 class methods ---
1658
1660 : Inst_VOP3A(iFmt, "v_mul_f16", false)
1661 {
1662 setFlag(ALU);
1663 setFlag(F16);
1664 } // Inst_VOP3__V_MUL_F16
1665
1667 {
1668 } // ~Inst_VOP3__V_MUL_F16
1669
1670 // --- description from .arch file ---
1671 // D.f16 = S0.f16 * S1.f16.
1672 // Supports denormals, round mode, exception flags, saturation.
1673 void
1675 {
1677 } // execute
1678 // --- Inst_VOP3__V_MAC_F16 class methods ---
1679
1681 : Inst_VOP3A(iFmt, "v_mac_f16", false)
1682 {
1683 setFlag(ALU);
1684 setFlag(F16);
1685 setFlag(MAC);
1686 } // Inst_VOP3__V_MAC_F16
1687
1689 {
1690 } // ~Inst_VOP3__V_MAC_F16
1691
1692 // --- description from .arch file ---
1693 // D.f16 = S0.f16 * S1.f16 + D.f16.
1694 // Supports round mode, exception flags, saturation.
1695 // SQ translates this to V_MAD_F16.
1696 void
1698 {
1700 } // execute
1701 // --- Inst_VOP3__V_ADD_U16 class methods ---
1702
1704 : Inst_VOP3A(iFmt, "v_add_u16", false)
1705 {
1706 setFlag(ALU);
1707 } // Inst_VOP3__V_ADD_U16
1708
1710 {
1711 } // ~Inst_VOP3__V_ADD_U16
1712
1713 // --- description from .arch file ---
1714 // D.u16 = S0.u16 + S1.u16.
1715 // Supports saturation (unsigned 16-bit integer domain).
1716 void
1718 {
1719 Wavefront *wf = gpuDynInst->wavefront();
1720 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1721 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1722 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1723
1724 src0.readSrc();
1725 src1.readSrc();
1726
1730 assert(!(instData.ABS & 0x1));
1731 assert(!(instData.ABS & 0x2));
1732 assert(!(instData.ABS & 0x4));
1733 assert(!(extData.NEG & 0x1));
1734 assert(!(extData.NEG & 0x2));
1735 assert(!(extData.NEG & 0x4));
1736
1737 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1738 if (wf->execMask(lane)) {
1739 vdst[lane] = src0[lane] + src1[lane];
1740 }
1741 }
1742
1743 vdst.write();
1744 } // execute
1745 // --- Inst_VOP3__V_SUB_U16 class methods ---
1746
1748 : Inst_VOP3A(iFmt, "v_sub_u16", false)
1749 {
1750 setFlag(ALU);
1751 } // Inst_VOP3__V_SUB_U16
1752
1754 {
1755 } // ~Inst_VOP3__V_SUB_U16
1756
1757 // --- description from .arch file ---
1758 // D.u16 = S0.u16 - S1.u16.
1759 // Supports saturation (unsigned 16-bit integer domain).
1760 void
1762 {
1763 Wavefront *wf = gpuDynInst->wavefront();
1764 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1765 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1766 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1767
1768 src0.readSrc();
1769 src1.readSrc();
1770
1774 assert(!(instData.ABS & 0x1));
1775 assert(!(instData.ABS & 0x2));
1776 assert(!(instData.ABS & 0x4));
1777 assert(!(extData.NEG & 0x1));
1778 assert(!(extData.NEG & 0x2));
1779 assert(!(extData.NEG & 0x4));
1780
1781 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1782 if (wf->execMask(lane)) {
1783 vdst[lane] = src0[lane] - src1[lane];
1784 }
1785 }
1786
1787 vdst.write();
1788 } // execute
1789 // --- Inst_VOP3__V_SUBREV_U16 class methods ---
1790
1792 : Inst_VOP3A(iFmt, "v_subrev_u16", false)
1793 {
1794 setFlag(ALU);
1795 } // Inst_VOP3__V_SUBREV_U16
1796
1798 {
1799 } // ~Inst_VOP3__V_SUBREV_U16
1800
1801 // --- description from .arch file ---
1802 // D.u16 = S1.u16 - S0.u16.
1803 // Supports saturation (unsigned 16-bit integer domain).
1804 // SQ translates this to V_SUB_U16 with reversed operands.
1805 void
1807 {
1808 Wavefront *wf = gpuDynInst->wavefront();
1809 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1810 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1811 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1812
1813 src0.readSrc();
1814 src1.readSrc();
1815
1819 assert(!(instData.ABS & 0x1));
1820 assert(!(instData.ABS & 0x2));
1821 assert(!(instData.ABS & 0x4));
1822 assert(!(extData.NEG & 0x1));
1823 assert(!(extData.NEG & 0x2));
1824 assert(!(extData.NEG & 0x4));
1825
1826 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1827 if (wf->execMask(lane)) {
1828 vdst[lane] = src1[lane] - src0[lane];
1829 }
1830 }
1831
1832 vdst.write();
1833 } // execute
1834 // --- Inst_VOP3__V_MUL_LO_U16 class methods ---
1835
1837 : Inst_VOP3A(iFmt, "v_mul_lo_u16", false)
1838 {
1839 setFlag(ALU);
1840 } // Inst_VOP3__V_MUL_LO_U16
1841
1843 {
1844 } // ~Inst_VOP3__V_MUL_LO_U16
1845
1846 // --- description from .arch file ---
1847 // D.u16 = S0.u16 * S1.u16.
1848 // Supports saturation (unsigned 16-bit integer domain).
1849 void
1851 {
1852 Wavefront *wf = gpuDynInst->wavefront();
1853 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1854 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1855 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1856
1857 src0.readSrc();
1858 src1.readSrc();
1859
1863 assert(!(instData.ABS & 0x1));
1864 assert(!(instData.ABS & 0x2));
1865 assert(!(instData.ABS & 0x4));
1866 assert(!(extData.NEG & 0x1));
1867 assert(!(extData.NEG & 0x2));
1868 assert(!(extData.NEG & 0x4));
1869
1870 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1871 if (wf->execMask(lane)) {
1872 vdst[lane] = src0[lane] * src1[lane];
1873 }
1874 }
1875
1876 vdst.write();
1877 } // execute
1878 // --- Inst_VOP3__V_LSHLREV_B16 class methods ---
1879
1881 : Inst_VOP3A(iFmt, "v_lshlrev_b16", false)
1882 {
1883 setFlag(ALU);
1884 } // Inst_VOP3__V_LSHLREV_B16
1885
1887 {
1888 } // ~Inst_VOP3__V_LSHLREV_B16
1889
1890 // --- description from .arch file ---
1891 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
1892 // SQ translates this to an internal SP opcode.
1893 void
1895 {
1896 Wavefront *wf = gpuDynInst->wavefront();
1897 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1898 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1899 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1900
1901 src0.readSrc();
1902 src1.readSrc();
1903
1907 assert(!(instData.ABS & 0x1));
1908 assert(!(instData.ABS & 0x2));
1909 assert(!(instData.ABS & 0x4));
1910 assert(!(extData.NEG & 0x1));
1911 assert(!(extData.NEG & 0x2));
1912 assert(!(extData.NEG & 0x4));
1913
1914 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1915 if (wf->execMask(lane)) {
1916 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
1917 }
1918 }
1919
1920 vdst.write();
1921 } // execute
1922 // --- Inst_VOP3__V_LSHRREV_B16 class methods ---
1923
1925 : Inst_VOP3A(iFmt, "v_lshrrev_b16", false)
1926 {
1927 setFlag(ALU);
1928 } // Inst_VOP3__V_LSHRREV_B16
1929
1931 {
1932 } // ~Inst_VOP3__V_LSHRREV_B16
1933
1934 // --- description from .arch file ---
1935 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
1936 // The vacated bits are set to zero.
1937 // SQ translates this to an internal SP opcode.
1938 void
1940 {
1941 Wavefront *wf = gpuDynInst->wavefront();
1942 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
1943 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1944 VecOperandU16 vdst(gpuDynInst, instData.VDST);
1945
1946 src0.readSrc();
1947 src1.readSrc();
1948
1949 if (instData.ABS & 0x1) {
1950 src0.absModifier();
1951 }
1952
1953 if (instData.ABS & 0x2) {
1954 src1.absModifier();
1955 }
1956
1957 if (extData.NEG & 0x1) {
1958 src0.negModifier();
1959 }
1960
1961 if (extData.NEG & 0x2) {
1962 src1.negModifier();
1963 }
1964
1965 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1966 if (wf->execMask(lane)) {
1967 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
1968 }
1969 }
1970
1971 vdst.write();
1972 } // execute
1973 // --- Inst_VOP3__V_ASHRREV_I16 class methods ---
1974
1976 : Inst_VOP3A(iFmt, "v_ashrrev_i16", false)
1977 {
1978 setFlag(ALU);
1979 } // Inst_VOP3__V_ASHRREV_I16
1980
1982 {
1983 } // ~Inst_VOP3__V_ASHRREV_I16
1984
1985 // --- description from .arch file ---
1986 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
1987 // The vacated bits are set to the sign bit of the input value.
1988 // SQ translates this to an internal SP opcode.
1989 void
1991 {
1992 Wavefront *wf = gpuDynInst->wavefront();
1993 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
1994 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
1995 VecOperandI16 vdst(gpuDynInst, instData.VDST);
1996
1997 src0.readSrc();
1998 src1.readSrc();
1999
2003 assert(!(instData.ABS & 0x1));
2004 assert(!(instData.ABS & 0x2));
2005 assert(!(instData.ABS & 0x4));
2006 assert(!(extData.NEG & 0x1));
2007 assert(!(extData.NEG & 0x2));
2008 assert(!(extData.NEG & 0x4));
2009
2010 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2011 if (wf->execMask(lane)) {
2012 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
2013 }
2014 }
2015
2016 vdst.write();
2017 } // execute
2018 // --- Inst_VOP3__V_MAX_F16 class methods ---
2019
2021 : Inst_VOP3A(iFmt, "v_max_f16", false)
2022 {
2023 setFlag(ALU);
2024 setFlag(F16);
2025 } // Inst_VOP3__V_MAX_F16
2026
2028 {
2029 } // ~Inst_VOP3__V_MAX_F16
2030
2031 // --- description from .arch file ---
2032 // D.f16 = max(S0.f16, S1.f16).
2033 // IEEE compliant. Supports denormals, round mode, exception flags,
2034 // saturation.
2035 void
2037 {
2039 } // execute
2040 // --- Inst_VOP3__V_MIN_F16 class methods ---
2041
2043 : Inst_VOP3A(iFmt, "v_min_f16", false)
2044 {
2045 setFlag(ALU);
2046 setFlag(F16);
2047 } // Inst_VOP3__V_MIN_F16
2048
2050 {
2051 } // ~Inst_VOP3__V_MIN_F16
2052
2053 // --- description from .arch file ---
2054 // D.f16 = min(S0.f16, S1.f16).
2055 // IEEE compliant. Supports denormals, round mode, exception flags,
2056 // saturation.
2057 void
2059 {
2061 } // execute
2062 // --- Inst_VOP3__V_MAX_U16 class methods ---
2063
2065 : Inst_VOP3A(iFmt, "v_max_u16", false)
2066 {
2067 setFlag(ALU);
2068 } // Inst_VOP3__V_MAX_U16
2069
2071 {
2072 } // ~Inst_VOP3__V_MAX_U16
2073
2074 // --- description from .arch file ---
2075 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
2076 void
2078 {
2079 Wavefront *wf = gpuDynInst->wavefront();
2080 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
2081 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
2082 VecOperandU16 vdst(gpuDynInst, instData.VDST);
2083
2084 src0.readSrc();
2085 src1.readSrc();
2086
2087 if (instData.ABS & 0x1) {
2088 src0.absModifier();
2089 }
2090
2091 if (instData.ABS & 0x2) {
2092 src1.absModifier();
2093 }
2094
2095 if (extData.NEG & 0x1) {
2096 src0.negModifier();
2097 }
2098
2099 if (extData.NEG & 0x2) {
2100 src1.negModifier();
2101 }
2102
2103 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2104 if (wf->execMask(lane)) {
2105 vdst[lane] = std::max(src0[lane], src1[lane]);
2106 }
2107 }
2108
2109 vdst.write();
2110 } // execute
2111 // --- Inst_VOP3__V_MAX_I16 class methods ---
2112
2114 : Inst_VOP3A(iFmt, "v_max_i16", false)
2115 {
2116 setFlag(ALU);
2117 } // Inst_VOP3__V_MAX_I16
2118
2120 {
2121 } // ~Inst_VOP3__V_MAX_I16
2122
2123 // --- description from .arch file ---
2124 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
2125 void
2127 {
2128 Wavefront *wf = gpuDynInst->wavefront();
2129 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
2130 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
2131 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2132
2133 src0.readSrc();
2134 src1.readSrc();
2135
2136 if (instData.ABS & 0x1) {
2137 src0.absModifier();
2138 }
2139
2140 if (instData.ABS & 0x2) {
2141 src1.absModifier();
2142 }
2143
2144 if (extData.NEG & 0x1) {
2145 src0.negModifier();
2146 }
2147
2148 if (extData.NEG & 0x2) {
2149 src1.negModifier();
2150 }
2151
2152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2153 if (wf->execMask(lane)) {
2154 vdst[lane] = std::max(src0[lane], src1[lane]);
2155 }
2156 }
2157
2158 vdst.write();
2159 } // execute
2160 // --- Inst_VOP3__V_MIN_U16 class methods ---
2161
2163 : Inst_VOP3A(iFmt, "v_min_u16", false)
2164 {
2165 setFlag(ALU);
2166 } // Inst_VOP3__V_MIN_U16
2167
2169 {
2170 } // ~Inst_VOP3__V_MIN_U16
2171
2172 // --- description from .arch file ---
2173 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
2174 void
2176 {
2177 Wavefront *wf = gpuDynInst->wavefront();
2178 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
2179 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
2180 VecOperandU16 vdst(gpuDynInst, instData.VDST);
2181
2182 src0.readSrc();
2183 src1.readSrc();
2184
2185 if (instData.ABS & 0x1) {
2186 src0.absModifier();
2187 }
2188
2189 if (instData.ABS & 0x2) {
2190 src1.absModifier();
2191 }
2192
2193 if (extData.NEG & 0x1) {
2194 src0.negModifier();
2195 }
2196
2197 if (extData.NEG & 0x2) {
2198 src1.negModifier();
2199 }
2200
2201 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2202 if (wf->execMask(lane)) {
2203 vdst[lane] = std::min(src0[lane], src1[lane]);
2204 }
2205 }
2206
2207 vdst.write();
2208 } // execute
2209 // --- Inst_VOP3__V_MIN_I16 class methods ---
2210
2212 : Inst_VOP3A(iFmt, "v_min_i16", false)
2213 {
2214 setFlag(ALU);
2215 } // Inst_VOP3__V_MIN_I16
2216
2218 {
2219 } // ~Inst_VOP3__V_MIN_I16
2220
2221 // --- description from .arch file ---
2222 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
2223 void
2225 {
2226 Wavefront *wf = gpuDynInst->wavefront();
2227 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
2228 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
2229 VecOperandI16 vdst(gpuDynInst, instData.VDST);
2230
2231 src0.readSrc();
2232 src1.readSrc();
2233
2234 if (instData.ABS & 0x1) {
2235 src0.absModifier();
2236 }
2237
2238 if (instData.ABS & 0x2) {
2239 src1.absModifier();
2240 }
2241
2242 if (extData.NEG & 0x1) {
2243 src0.negModifier();
2244 }
2245
2246 if (extData.NEG & 0x2) {
2247 src1.negModifier();
2248 }
2249
2250 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2251 if (wf->execMask(lane)) {
2252 vdst[lane] = std::min(src0[lane], src1[lane]);
2253 }
2254 }
2255
2256 vdst.write();
2257 } // execute
2258 // --- Inst_VOP3__V_LDEXP_F16 class methods ---
2259
2261 : Inst_VOP3A(iFmt, "v_ldexp_f16", false)
2262 {
2263 setFlag(ALU);
2264 setFlag(F16);
2265 } // Inst_VOP3__V_LDEXP_F16
2266
2268 {
2269 } // ~Inst_VOP3__V_LDEXP_F16
2270
2271 // --- description from .arch file ---
2272 // D.f16 = S0.f16 * (2 ** S1.i16).
2273 void
2275 {
2277 } // execute
2278 // --- Inst_VOP3__V_ADD_U32 class methods ---
2279
2281 : Inst_VOP3A(iFmt, "v_add_u32", false)
2282 {
2283 setFlag(ALU);
2284 } // Inst_VOP3__V_ADD_U32
2285
2287 {
2288 } // ~Inst_VOP3__V_ADD_U32
2289
2290 // --- description from .arch file ---
2291 // D.u32 = S0.u32 + S1.u32.
2292 void
2294 {
2295 Wavefront *wf = gpuDynInst->wavefront();
2296 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2297 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2298 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2299
2300 src0.readSrc();
2301 src1.readSrc();
2302
2306 assert(!(instData.ABS & 0x1));
2307 assert(!(instData.ABS & 0x2));
2308 assert(!(instData.ABS & 0x4));
2309 assert(!(extData.NEG & 0x1));
2310 assert(!(extData.NEG & 0x2));
2311 assert(!(extData.NEG & 0x4));
2312
2313 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2314 if (wf->execMask(lane)) {
2315 vdst[lane] = src0[lane] + src1[lane];
2316 }
2317 }
2318
2319 vdst.write();
2320 } // execute
2321 // --- Inst_VOP3__V_SUB_U32 class methods ---
2322
2324 : Inst_VOP3A(iFmt, "v_sub_u32", false)
2325 {
2326 setFlag(ALU);
2327 } // Inst_VOP3__V_SUB_U32
2328
2330 {
2331 } // ~Inst_VOP3__V_SUB_U32
2332
2333 // --- description from .arch file ---
2334 // D.u32 = S0.u32 - S1.u32.
2335 void
2337 {
2338 Wavefront *wf = gpuDynInst->wavefront();
2339 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2340 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2341 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2342
2343 src0.readSrc();
2344 src1.readSrc();
2345
2349 assert(!(instData.ABS & 0x1));
2350 assert(!(instData.ABS & 0x2));
2351 assert(!(instData.ABS & 0x4));
2352 assert(!(extData.NEG & 0x1));
2353 assert(!(extData.NEG & 0x2));
2354 assert(!(extData.NEG & 0x4));
2355
2356 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2357 if (wf->execMask(lane)) {
2358 vdst[lane] = src0[lane] - src1[lane];
2359 }
2360 }
2361
2362 vdst.write();
2363 } // execute
2364 // --- Inst_VOP3__V_SUBREV_U32 class methods ---
2365
2367 : Inst_VOP3A(iFmt, "v_subrev_u32", false)
2368 {
2369 setFlag(ALU);
2370 } // Inst_VOP3__V_SUBREV_U32
2371
2373 {
2374 } // ~Inst_VOP3__V_SUBREV_U32
2375
2376 // --- description from .arch file ---
2377 // D.u32 = S1.u32 - S0.u32.
2378 void
2380 {
2381 Wavefront *wf = gpuDynInst->wavefront();
2382 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2383 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
2384 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2385
2386 src0.readSrc();
2387 src1.readSrc();
2388
2392 assert(!(instData.ABS & 0x1));
2393 assert(!(instData.ABS & 0x2));
2394 assert(!(instData.ABS & 0x4));
2395 assert(!(extData.NEG & 0x1));
2396 assert(!(extData.NEG & 0x2));
2397 assert(!(extData.NEG & 0x4));
2398
2399 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2400 if (wf->execMask(lane)) {
2401 vdst[lane] = src1[lane] - src0[lane];
2402 }
2403 }
2404
2405 vdst.write();
2406 } // execute
2407 // --- Inst_VOP3__V_FMAC_F32 class methods ---
2408
2410 : Inst_VOP3A(iFmt, "v_fmac_f32", false)
2411 {
2412 setFlag(ALU);
2413 setFlag(F32);
2414 setFlag(FMA);
2415 } // Inst_VOP3__V_FMAC_F32
2416
2418 {
2419 } // ~Inst_VOP3__V_FMAC_F32
2420
2421 // --- description from .arch file ---
2422 // D.f = S0.f * S1.f + D.f.
2423 void
2425 {
2426 Wavefront *wf = gpuDynInst->wavefront();
2427 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2428 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2429 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2430
2431 src0.readSrc();
2432 src1.readSrc();
2433 vdst.read();
2434
2435 panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
2436 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2437 panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode);
2438
2439 if (instData.ABS & 0x1) {
2440 src0.absModifier();
2441 }
2442
2443 if (instData.ABS & 0x2) {
2444 src1.absModifier();
2445 }
2446
2447 if (instData.ABS & 0x4) {
2448 vdst.absModifier();
2449 }
2450
2451 if (extData.NEG & 0x1) {
2452 src0.negModifier();
2453 }
2454
2455 if (extData.NEG & 0x2) {
2456 src1.negModifier();
2457 }
2458
2459 if (extData.NEG & 0x4) {
2460 vdst.negModifier();
2461 }
2462
2463 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2464 if (wf->execMask(lane)) {
2465 float out = std::fma(src0[lane], src1[lane], vdst[lane]);
2466 out = omodModifier(out, extData.OMOD);
2467 out = std::clamp(vdst[lane], 0.0f, 1.0f);
2468 vdst[lane] = out;
2469 }
2470 }
2471
2472 vdst.write();
2473 } // execute
2474 // --- Inst_VOP3__V_NOP class methods ---
2475
2477 : Inst_VOP3A(iFmt, "v_nop", false)
2478 {
2479 setFlag(Nop);
2480 setFlag(ALU);
2481 } // Inst_VOP3__V_NOP
2482
2484 {
2485 } // ~Inst_VOP3__V_NOP
2486
2487 // --- description from .arch file ---
2488 // Do nothing.
2489 void
2491 {
2492 } // execute
2493 // --- Inst_VOP3__V_MOV_B32 class methods ---
2494
2496 : Inst_VOP3A(iFmt, "v_mov_b32", false)
2497 {
2498 setFlag(ALU);
2499 } // Inst_VOP3__V_MOV_B32
2500
2502 {
2503 } // ~Inst_VOP3__V_MOV_B32
2504
2505 // --- description from .arch file ---
2506 // D.u = S0.u.
2507 // Input and output modifiers not supported; this is an untyped operation.
2508 void
2510 {
2511 Wavefront *wf = gpuDynInst->wavefront();
2512 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
2513 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2514
2515 src.readSrc();
2516
2517 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2518 if (wf->execMask(lane)) {
2519 vdst[lane] = src[lane];
2520 }
2521 }
2522
2523 vdst.write();
2524 } // execute
2525 // --- Inst_VOP3__V_CVT_I32_F64 class methods ---
2526
2528 : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false)
2529 {
2530 setFlag(ALU);
2531 setFlag(F64);
2532 } // Inst_VOP3__V_CVT_I32_F64
2533
2535 {
2536 } // ~Inst_VOP3__V_CVT_I32_F64
2537
2538 // --- description from .arch file ---
2539 // D.i = (int)S0.d.
2540 // Out-of-range floating point values (including infinity) saturate. NaN is
2541 // --- converted to 0.
2542 void
2544 {
2545 Wavefront *wf = gpuDynInst->wavefront();
2546 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
2547 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2548
2549 src.readSrc();
2550
2551 if (instData.ABS & 0x1) {
2552 src.absModifier();
2553 }
2554
2555 if (extData.NEG & 0x1) {
2556 src.negModifier();
2557 }
2558
2559 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2560 if (wf->execMask(lane)) {
2561 int exp;
2562 std::frexp(src[lane],&exp);
2563 if (std::isnan(src[lane])) {
2564 vdst[lane] = 0;
2565 } else if (std::isinf(src[lane]) || exp > 30) {
2566 if (std::signbit(src[lane])) {
2567 vdst[lane] = INT_MIN;
2568 } else {
2569 vdst[lane] = INT_MAX;
2570 }
2571 } else {
2572 vdst[lane] = (VecElemI32)src[lane];
2573 }
2574 }
2575 }
2576
2577 vdst.write();
2578 } // execute
2579 // --- Inst_VOP3__V_CVT_F64_I32 class methods ---
2580
2582 : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false)
2583 {
2584 setFlag(ALU);
2585 setFlag(F64);
2586 } // Inst_VOP3__V_CVT_F64_I32
2587
2589 {
2590 } // ~Inst_VOP3__V_CVT_F64_I32
2591
2592 // --- description from .arch file ---
2593 // D.d = (double)S0.i.
2594 void
2596 {
2597 Wavefront *wf = gpuDynInst->wavefront();
2598 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
2599 VecOperandF64 vdst(gpuDynInst, instData.VDST);
2600
2601 src.readSrc();
2602
2603 if (instData.ABS & 0x1) {
2604 src.absModifier();
2605 }
2606
2607 if (extData.NEG & 0x1) {
2608 src.negModifier();
2609 }
2610
2611 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2612 if (wf->execMask(lane)) {
2613 vdst[lane] = (VecElemF64)src[lane];
2614 }
2615 }
2616
2617 vdst.write();
2618 } // execute
2619 // --- Inst_VOP3__V_CVT_F32_I32 class methods ---
2620
2622 : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false)
2623 {
2624 setFlag(ALU);
2625 setFlag(F32);
2626 } // Inst_VOP3__V_CVT_F32_I32
2627
2629 {
2630 } // ~Inst_VOP3__V_CVT_F32_I32
2631
2632 // --- description from .arch file ---
2633 // D.f = (float)S0.i.
2634 void
2636 {
2637 Wavefront *wf = gpuDynInst->wavefront();
2638 VecOperandI32 src(gpuDynInst, extData.SRC0);
2639 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2640
2641 src.readSrc();
2642
2646 assert(!(instData.ABS & 0x1));
2647 assert(!(instData.ABS & 0x2));
2648 assert(!(instData.ABS & 0x4));
2649 assert(!(extData.NEG & 0x1));
2650 assert(!(extData.NEG & 0x2));
2651 assert(!(extData.NEG & 0x4));
2652
2653 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2654 if (wf->execMask(lane)) {
2655 vdst[lane] = (VecElemF32)src[lane];
2656 }
2657 }
2658
2659 vdst.write();
2660 } // execute
2661 // --- Inst_VOP3__V_CVT_F32_U32 class methods ---
2662
2664 : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false)
2665 {
2666 setFlag(ALU);
2667 setFlag(F32);
2668 } // Inst_VOP3__V_CVT_F32_U32
2669
2671 {
2672 } // ~Inst_VOP3__V_CVT_F32_U32
2673
2674 // --- description from .arch file ---
2675 // D.f = (float)S0.u.
2676 void
2678 {
2679 Wavefront *wf = gpuDynInst->wavefront();
2680 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
2681 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2682
2683 src.readSrc();
2684
2685 if (instData.ABS & 0x1) {
2686 src.absModifier();
2687 }
2688
2689 if (extData.NEG & 0x1) {
2690 src.negModifier();
2691 }
2692
2693 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2694 if (wf->execMask(lane)) {
2695 vdst[lane] = (VecElemF32)src[lane];
2696 }
2697 }
2698
2699 vdst.write();
2700 } // execute
2701 // --- Inst_VOP3__V_CVT_U32_F32 class methods ---
2702
2704 : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false)
2705 {
2706 setFlag(ALU);
2707 setFlag(F32);
2708 } // Inst_VOP3__V_CVT_U32_F32
2709
2711 {
2712 } // ~Inst_VOP3__V_CVT_U32_F32
2713
2714 // --- description from .arch file ---
2715 // D.u = (unsigned)S0.f.
2716 // Out-of-range floating point values (including infinity) saturate. NaN is
2717 // --- converted to 0.
2718 void
2720 {
2721 Wavefront *wf = gpuDynInst->wavefront();
2722 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2723 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2724
2725 src.readSrc();
2726
2727 if (instData.ABS & 0x1) {
2728 src.absModifier();
2729 }
2730
2731 if (extData.NEG & 0x1) {
2732 src.negModifier();
2733 }
2734
2735 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2736 if (wf->execMask(lane)) {
2737 int exp;
2738 std::frexp(src[lane],&exp);
2739 if (std::isnan(src[lane])) {
2740 vdst[lane] = 0;
2741 } else if (std::isinf(src[lane])) {
2742 if (std::signbit(src[lane])) {
2743 vdst[lane] = 0;
2744 } else {
2745 vdst[lane] = UINT_MAX;
2746 }
2747 } else if (exp > 31) {
2748 vdst[lane] = UINT_MAX;
2749 } else {
2750 vdst[lane] = (VecElemU32)src[lane];
2751 }
2752 }
2753 }
2754
2755 vdst.write();
2756 } // execute
2757 // --- Inst_VOP3__V_CVT_I32_F32 class methods ---
2758
2760 : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false)
2761 {
2762 setFlag(ALU);
2763 setFlag(F32);
2764 } // Inst_VOP3__V_CVT_I32_F32
2765
2767 {
2768 } // ~Inst_VOP3__V_CVT_I32_F32
2769
2770 // --- description from .arch file ---
2771 // D.i = (int)S0.f.
2772 // Out-of-range floating point values (including infinity) saturate. NaN is
2773 // --- converted to 0.
2774 void
2776 {
2777 Wavefront *wf = gpuDynInst->wavefront();
2778 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2779 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2780
2781 src.readSrc();
2782
2783 if (instData.ABS & 0x1) {
2784 src.absModifier();
2785 }
2786
2787 if (extData.NEG & 0x1) {
2788 src.negModifier();
2789 }
2790
2794 assert(!(instData.ABS & 0x2));
2795 assert(!(instData.ABS & 0x4));
2796 assert(!(extData.NEG & 0x2));
2797 assert(!(extData.NEG & 0x4));
2798
2799 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2800 if (wf->execMask(lane)) {
2801 int exp;
2802 std::frexp(src[lane],&exp);
2803 if (std::isnan(src[lane])) {
2804 vdst[lane] = 0;
2805 } else if (std::isinf(src[lane]) || exp > 30) {
2806 if (std::signbit(src[lane])) {
2807 vdst[lane] = INT_MIN;
2808 } else {
2809 vdst[lane] = INT_MAX;
2810 }
2811 } else {
2812 vdst[lane] = (VecElemI32)src[lane];
2813 }
2814 }
2815 }
2816
2817 vdst.write();
2818 } // execute
2819 // --- Inst_VOP3__V_MOV_FED_B32 class methods ---
2820
2822 : Inst_VOP3A(iFmt, "v_mov_fed_b32", false)
2823 {
2824 setFlag(ALU);
2825 } // Inst_VOP3__V_MOV_FED_B32
2826
2828 {
2829 } // ~Inst_VOP3__V_MOV_FED_B32
2830
2831 // --- description from .arch file ---
2832 // D.u = S0.u;
2833 // Introduce EDC double error upon write to dest vgpr without causing an
2834 // --- exception.
2835 // Input and output modifiers not supported; this is an untyped operation.
2836 void
2838 {
2840 } // execute
2841 // --- Inst_VOP3__V_CVT_F16_F32 class methods ---
2842
2844 : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false)
2845 {
2846 setFlag(ALU);
2847 setFlag(F32);
2848 } // Inst_VOP3__V_CVT_F16_F32
2849
2851 {
2852 } // ~Inst_VOP3__V_CVT_F16_F32
2853
2854 // --- description from .arch file ---
2855 // D.f16 = flt32_to_flt16(S0.f).
2856 // Supports input modifiers and creates FP16 denormals when appropriate.
2857 void
2859 {
2860 Wavefront *wf = gpuDynInst->wavefront();
2861 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2862 VecOperandU32 vdst(gpuDynInst, instData.VDST);
2863
2864 src0.readSrc();
2865 vdst.read();
2866
2867 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2868 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2869
2870 unsigned abs = instData.ABS;
2871 unsigned neg = extData.NEG;
2872 int opsel = instData.OPSEL;
2873
2874 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2875 if (wf->execMask(lane)) {
2876 float tmp = src0[lane];
2877
2878 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2879 if (neg & 1) tmp = -tmp;
2880
2881 tmp = omodModifier(tmp, extData.OMOD);
2882 tmp = std::clamp(tmp, 0.0f, 1.0f);
2883
2884 AMDGPU::mxfloat16 out(tmp);
2885
2886 // If opsel[3] use upper 16-bits of dest, otherwise lower.
2887 if (opsel & 8) {
2888 replaceBits(vdst[lane], 31, 16, (out.data >> 16));
2889 } else {
2890 replaceBits(vdst[lane], 15, 0, (out.data >> 16));
2891 }
2892 }
2893 }
2894
2895 vdst.write();
2896 } // execute
2897 // --- Inst_VOP3__V_CVT_F32_F16 class methods ---
2898
2900 : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false)
2901 {
2902 setFlag(ALU);
2903 setFlag(F32);
2904 } // Inst_VOP3__V_CVT_F32_F16
2905
2907 {
2908 } // ~Inst_VOP3__V_CVT_F32_F16
2909
2910 // --- description from .arch file ---
2911 // D.f = flt16_to_flt32(S0.f16).
2912 // FP16 denormal inputs are always accepted.
2913 void
2915 {
2916 Wavefront *wf = gpuDynInst->wavefront();
2917 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
2918 VecOperandF32 vdst(gpuDynInst, instData.VDST);
2919
2920 src0.readSrc();
2921
2922 panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
2923 panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
2924 panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode);
2925
2926 unsigned abs = instData.ABS;
2927 unsigned neg = extData.NEG;
2928
2929 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2930 if (wf->execMask(lane)) {
2931 AMDGPU::mxfloat16 tmp(src0[lane]);
2932
2933 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2934 if (neg & 1) tmp = -tmp;
2935
2936 float out = omodModifier(float(tmp), extData.OMOD);
2937 out = std::clamp(out, 0.0f, 1.0f);
2938
2939 vdst[lane] = out;
2940 }
2941 }
2942
2943 vdst.write();
2944 } // execute
2945 // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods ---
2946
2948 InFmt_VOP3A *iFmt)
2949 : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false)
2950 {
2951 setFlag(ALU);
2952 setFlag(F32);
2953 } // Inst_VOP3__V_CVT_RPI_I32_F32
2954
2956 {
2957 } // ~Inst_VOP3__V_CVT_RPI_I32_F32
2958
2959 // --- description from .arch file ---
2960 // D.i = (int)floor(S0.f + 0.5).
2961 void
2963 {
2964 Wavefront *wf = gpuDynInst->wavefront();
2965 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
2966 VecOperandI32 vdst(gpuDynInst, instData.VDST);
2967
2968 src.readSrc();
2969
2970 if (instData.ABS & 0x1) {
2971 src.absModifier();
2972 }
2973
2974 if (extData.NEG & 0x1) {
2975 src.negModifier();
2976 }
2977
2978 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2979 if (wf->execMask(lane)) {
2980 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
2981 }
2982 }
2983
2984 vdst.write();
2985 } // execute
2986 // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods ---
2987
2989 InFmt_VOP3A *iFmt)
2990 : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false)
2991 {
2992 setFlag(ALU);
2993 setFlag(F32);
2994 } // Inst_VOP3__V_CVT_FLR_I32_F32
2995
2997 {
2998 } // ~Inst_VOP3__V_CVT_FLR_I32_F32
2999
3000 // --- description from .arch file ---
3001 // D.i = (int)floor(S0.f).
3002 void
3004 {
3005 Wavefront *wf = gpuDynInst->wavefront();
3006 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3007 VecOperandI32 vdst(gpuDynInst, instData.VDST);
3008
3009 src.readSrc();
3010
3011 if (instData.ABS & 0x1) {
3012 src.absModifier();
3013 }
3014
3015 if (extData.NEG & 0x1) {
3016 src.negModifier();
3017 }
3018
3019 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3020 if (wf->execMask(lane)) {
3021 vdst[lane] = (VecElemI32)std::floor(src[lane]);
3022 }
3023 }
3024
3025 vdst.write();
3026 } // execute
3027 // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods ---
3028
3030 : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false)
3031 {
3032 setFlag(ALU);
3033 setFlag(F32);
3034 } // Inst_VOP3__V_CVT_OFF_F32_I4
3035
3037 {
3038 } // ~Inst_VOP3__V_CVT_OFF_F32_I4
3039
3040 // --- description from .arch file ---
3041 // 4-bit signed int to 32-bit float. Used for interpolation in shader.
3042 void
3044 {
3045 // Could not parse sq_uc.arch desc field
3047 } // execute
3048 // --- Inst_VOP3__V_CVT_F32_F64 class methods ---
3049
3051 : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false)
3052 {
3053 setFlag(ALU);
3054 setFlag(F64);
3055 } // Inst_VOP3__V_CVT_F32_F64
3056
3058 {
3059 } // ~Inst_VOP3__V_CVT_F32_F64
3060
3061 // --- description from .arch file ---
3062 // D.f = (float)S0.d.
3063 void
3065 {
3066 Wavefront *wf = gpuDynInst->wavefront();
3067 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3068 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3069
3070 src.readSrc();
3071
3072 if (instData.ABS & 0x1) {
3073 src.absModifier();
3074 }
3075
3076 if (extData.NEG & 0x1) {
3077 src.negModifier();
3078 }
3079
3083 assert(!(instData.ABS & 0x2));
3084 assert(!(instData.ABS & 0x4));
3085 assert(!(extData.NEG & 0x2));
3086 assert(!(extData.NEG & 0x4));
3087
3088 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3089 if (wf->execMask(lane)) {
3090 vdst[lane] = (VecElemF32)src[lane];
3091 }
3092 }
3093
3094 vdst.write();
3095 } // execute
3096 // --- Inst_VOP3__V_CVT_F64_F32 class methods ---
3097
3099 : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false)
3100 {
3101 setFlag(ALU);
3102 setFlag(F64);
3103 } // Inst_VOP3__V_CVT_F64_F32
3104
3106 {
3107 } // ~Inst_VOP3__V_CVT_F64_F32
3108
3109 // --- description from .arch file ---
3110 // D.d = (double)S0.f.
3111 void
3113 {
3114 Wavefront *wf = gpuDynInst->wavefront();
3115 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3116 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3117
3118 src.readSrc();
3119
3120 if (instData.ABS & 0x1) {
3121 src.absModifier();
3122 }
3123
3124 if (extData.NEG & 0x1) {
3125 src.negModifier();
3126 }
3127
3131 assert(!(instData.ABS & 0x2));
3132 assert(!(instData.ABS & 0x4));
3133 assert(!(extData.NEG & 0x2));
3134 assert(!(extData.NEG & 0x4));
3135
3136 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3137 if (wf->execMask(lane)) {
3138 vdst[lane] = (VecElemF64)src[lane];
3139 }
3140 }
3141
3142 vdst.write();
3143 } // execute
3144 // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods ---
3145
3147 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false)
3148 {
3149 setFlag(ALU);
3150 setFlag(F32);
3151 } // Inst_VOP3__V_CVT_F32_UBYTE0
3152
3154 {
3155 } // ~Inst_VOP3__V_CVT_F32_UBYTE0
3156
3157 // --- description from .arch file ---
3158 // D.f = (float)(S0.u[7:0]).
3159 void
3161 {
3162 Wavefront *wf = gpuDynInst->wavefront();
3163 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3164 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3165
3166 src.readSrc();
3167
3168 if (instData.ABS & 0x1) {
3169 src.absModifier();
3170 }
3171
3172 if (extData.NEG & 0x1) {
3173 src.negModifier();
3174 }
3175
3176 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3177 if (wf->execMask(lane)) {
3178 vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
3179 }
3180 }
3181
3182 vdst.write();
3183 } // execute
3184 // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods ---
3185
3187 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false)
3188 {
3189 setFlag(ALU);
3190 setFlag(F32);
3191 } // Inst_VOP3__V_CVT_F32_UBYTE1
3192
3194 {
3195 } // ~Inst_VOP3__V_CVT_F32_UBYTE1
3196
3197 // --- description from .arch file ---
3198 // D.f = (float)(S0.u[15:8]).
3199 void
3201 {
3202 Wavefront *wf = gpuDynInst->wavefront();
3203 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3204 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3205
3206 src.readSrc();
3207
3208 if (instData.ABS & 0x1) {
3209 src.absModifier();
3210 }
3211
3212 if (extData.NEG & 0x1) {
3213 src.negModifier();
3214 }
3215
3216 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3217 if (wf->execMask(lane)) {
3218 vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
3219 }
3220 }
3221
3222 vdst.write();
3223 } // execute
3224 // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods ---
3225
3227 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false)
3228 {
3229 setFlag(ALU);
3230 setFlag(F32);
3231 } // Inst_VOP3__V_CVT_F32_UBYTE2
3232
3234 {
3235 } // ~Inst_VOP3__V_CVT_F32_UBYTE2
3236
3237 // --- description from .arch file ---
3238 // D.f = (float)(S0.u[23:16]).
3239 void
3241 {
3242 Wavefront *wf = gpuDynInst->wavefront();
3243 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3244 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3245
3246 src.readSrc();
3247
3248 if (instData.ABS & 0x1) {
3249 src.absModifier();
3250 }
3251
3252 if (extData.NEG & 0x1) {
3253 src.negModifier();
3254 }
3255
3256 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3257 if (wf->execMask(lane)) {
3258 vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
3259 }
3260 }
3261
3262 vdst.write();
3263 } // execute
3264 // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods ---
3265
3267 : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false)
3268 {
3269 setFlag(ALU);
3270 setFlag(F32);
3271 } // Inst_VOP3__V_CVT_F32_UBYTE3
3272
3274 {
3275 } // ~Inst_VOP3__V_CVT_F32_UBYTE3
3276
3277 // --- description from .arch file ---
3278 // D.f = (float)(S0.u[31:24]).
3279 void
3281 {
3282 Wavefront *wf = gpuDynInst->wavefront();
3283 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3284 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3285
3286 src.readSrc();
3287
3288 if (instData.ABS & 0x1) {
3289 src.absModifier();
3290 }
3291
3292 if (extData.NEG & 0x1) {
3293 src.negModifier();
3294 }
3295
3296 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3297 if (wf->execMask(lane)) {
3298 vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
3299 }
3300 }
3301
3302 vdst.write();
3303 } // execute
3304 // --- Inst_VOP3__V_CVT_U32_F64 class methods ---
3305
3307 : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false)
3308 {
3309 setFlag(ALU);
3310 setFlag(F64);
3311 } // Inst_VOP3__V_CVT_U32_F64
3312
3314 {
3315 } // ~Inst_VOP3__V_CVT_U32_F64
3316
3317 // --- description from .arch file ---
3318 // D.u = (unsigned)S0.d.
3319 // Out-of-range floating point values (including infinity) saturate. NaN is
3320 // --- converted to 0.
3321 void
3323 {
3324 Wavefront *wf = gpuDynInst->wavefront();
3325 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3326 VecOperandU32 vdst(gpuDynInst, instData.VDST);
3327
3328 src.readSrc();
3329
3330 if (instData.ABS & 0x1) {
3331 src.absModifier();
3332 }
3333
3334 if (extData.NEG & 0x1) {
3335 src.negModifier();
3336 }
3337
3338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3339 if (wf->execMask(lane)) {
3340 int exp;
3341 std::frexp(src[lane],&exp);
3342 if (std::isnan(src[lane])) {
3343 vdst[lane] = 0;
3344 } else if (std::isinf(src[lane])) {
3345 if (std::signbit(src[lane])) {
3346 vdst[lane] = 0;
3347 } else {
3348 vdst[lane] = UINT_MAX;
3349 }
3350 } else if (exp > 31) {
3351 vdst[lane] = UINT_MAX;
3352 } else {
3353 vdst[lane] = (VecElemU32)src[lane];
3354 }
3355 }
3356 }
3357
3358 vdst.write();
3359 } // execute
3360 // --- Inst_VOP3__V_CVT_F64_U32 class methods ---
3361
3363 : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false)
3364 {
3365 setFlag(ALU);
3366 setFlag(F64);
3367 } // Inst_VOP3__V_CVT_F64_U32
3368
3370 {
3371 } // ~Inst_VOP3__V_CVT_F64_U32
3372
3373 // --- description from .arch file ---
3374 // D.d = (double)S0.u.
3375 void
3377 {
3378 Wavefront *wf = gpuDynInst->wavefront();
3379 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
3380 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3381
3382 src.readSrc();
3383
3384 if (instData.ABS & 0x1) {
3385 src.absModifier();
3386 }
3387
3388 if (extData.NEG & 0x1) {
3389 src.negModifier();
3390 }
3391
3392 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3393 if (wf->execMask(lane)) {
3394 vdst[lane] = (VecElemF64)src[lane];
3395 }
3396 }
3397
3398 vdst.write();
3399 } // execute
3400 // --- Inst_VOP3__V_TRUNC_F64 class methods ---
3401
3403 : Inst_VOP3A(iFmt, "v_trunc_f64", false)
3404 {
3405 setFlag(ALU);
3406 setFlag(F64);
3407 } // Inst_VOP3__V_TRUNC_F64
3408
3410 {
3411 } // ~Inst_VOP3__V_TRUNC_F64
3412
3413 // --- description from .arch file ---
3414 // D.d = trunc(S0.d), return integer part of S0.d.
3415 void
3417 {
3418 Wavefront *wf = gpuDynInst->wavefront();
3419 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3420 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3421
3422 src.readSrc();
3423
3424 if (instData.ABS & 0x1) {
3425 src.absModifier();
3426 }
3427
3428 if (extData.NEG & 0x1) {
3429 src.negModifier();
3430 }
3431
3432 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3433 if (wf->execMask(lane)) {
3434 vdst[lane] = std::trunc(src[lane]);
3435 }
3436 }
3437
3438 vdst.write();
3439 } // execute
3440 // --- Inst_VOP3__V_CEIL_F64 class methods ---
3441
3443 : Inst_VOP3A(iFmt, "v_ceil_f64", false)
3444 {
3445 setFlag(ALU);
3446 setFlag(F64);
3447 } // Inst_VOP3__V_CEIL_F64
3448
3450 {
3451 } // ~Inst_VOP3__V_CEIL_F64
3452
3453 // --- description from .arch file ---
3454 // D.d = trunc(S0.d);
3455 // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.
3456 void
3458 {
3459 Wavefront *wf = gpuDynInst->wavefront();
3460 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3461 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3462
3463 src.readSrc();
3464
3465 if (instData.ABS & 0x1) {
3466 src.absModifier();
3467 }
3468
3469 if (extData.NEG & 0x1) {
3470 src.negModifier();
3471 }
3472
3473 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3474 if (wf->execMask(lane)) {
3475 vdst[lane] = std::ceil(src[lane]);
3476 }
3477 }
3478
3479 vdst.write();
3480 } // execute
3481 // --- Inst_VOP3__V_RNDNE_F64 class methods ---
3482
3484 : Inst_VOP3A(iFmt, "v_rndne_f64", false)
3485 {
3486 setFlag(ALU);
3487 setFlag(F64);
3488 } // Inst_VOP3__V_RNDNE_F64
3489
3491 {
3492 } // ~Inst_VOP3__V_RNDNE_F64
3493
3494 // --- description from .arch file ---
3495 // D.d = round_nearest_even(S0.d).
3496 void
3498 {
3499 Wavefront *wf = gpuDynInst->wavefront();
3500 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3501 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3502
3503 src.readSrc();
3504
3505 if (instData.ABS & 0x1) {
3506 src.absModifier();
3507 }
3508
3509 if (extData.NEG & 0x1) {
3510 src.negModifier();
3511 }
3512
3513 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3514 if (wf->execMask(lane)) {
3515 vdst[lane] = roundNearestEven(src[lane]);
3516 }
3517 }
3518
3519 vdst.write();
3520 } // execute
3521 // --- Inst_VOP3__V_FLOOR_F64 class methods ---
3522
3524 : Inst_VOP3A(iFmt, "v_floor_f64", false)
3525 {
3526 setFlag(ALU);
3527 setFlag(F64);
3528 } // Inst_VOP3__V_FLOOR_F64
3529
3531 {
3532 } // ~Inst_VOP3__V_FLOOR_F64
3533
3534 // --- description from .arch file ---
3535 // D.d = trunc(S0.d);
3536 // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.
3537 void
3539 {
3540 Wavefront *wf = gpuDynInst->wavefront();
3541 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3542 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3543
3544 src.readSrc();
3545
3546 if (instData.ABS & 0x1) {
3547 src.absModifier();
3548 }
3549
3550 if (extData.NEG & 0x1) {
3551 src.negModifier();
3552 }
3553
3554 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3555 if (wf->execMask(lane)) {
3556 vdst[lane] = std::floor(src[lane]);
3557 }
3558 }
3559
3560 vdst.write();
3561 } // execute
3562 // --- Inst_VOP3__V_FRACT_F32 class methods ---
3563
3565 : Inst_VOP3A(iFmt, "v_fract_f32", false)
3566 {
3567 setFlag(ALU);
3568 setFlag(F32);
3569 } // Inst_VOP3__V_FRACT_F32
3570
3572 {
3573 } // ~Inst_VOP3__V_FRACT_F32
3574
3575 // --- description from .arch file ---
3576 // D.f = S0.f - floor(S0.f).
3577 void
3579 {
3580 Wavefront *wf = gpuDynInst->wavefront();
3581 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3582 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3583
3584 src.readSrc();
3585
3586 if (instData.ABS & 0x1) {
3587 src.absModifier();
3588 }
3589
3590 if (extData.NEG & 0x1) {
3591 src.negModifier();
3592 }
3593
3594 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3595 if (wf->execMask(lane)) {
3596 VecElemF32 int_part(0.0);
3597 vdst[lane] = std::modf(src[lane], &int_part);
3598 }
3599 }
3600
3601 vdst.write();
3602 } // execute
3603 // --- Inst_VOP3__V_TRUNC_F32 class methods ---
3604
3606 : Inst_VOP3A(iFmt, "v_trunc_f32", false)
3607 {
3608 setFlag(ALU);
3609 setFlag(F32);
3610 } // Inst_VOP3__V_TRUNC_F32
3611
3613 {
3614 } // ~Inst_VOP3__V_TRUNC_F32
3615
3616 // --- description from .arch file ---
3617 // D.f = trunc(S0.f), return integer part of S0.f.
3618 void
3620 {
3621 Wavefront *wf = gpuDynInst->wavefront();
3622 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3623 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3624
3625 src.readSrc();
3626
3627 if (instData.ABS & 0x1) {
3628 src.absModifier();
3629 }
3630
3631 if (extData.NEG & 0x1) {
3632 src.negModifier();
3633 }
3634
3635 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3636 if (wf->execMask(lane)) {
3637 vdst[lane] = std::trunc(src[lane]);
3638 }
3639 }
3640
3641 vdst.write();
3642 } // execute
3643 // --- Inst_VOP3__V_CEIL_F32 class methods ---
3644
3646 : Inst_VOP3A(iFmt, "v_ceil_f32", false)
3647 {
3648 setFlag(ALU);
3649 setFlag(F32);
3650 } // Inst_VOP3__V_CEIL_F32
3651
3653 {
3654 } // ~Inst_VOP3__V_CEIL_F32
3655
3656 // --- description from .arch file ---
3657 // D.f = trunc(S0.f);
3658 // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.
3659 void
3661 {
3662 Wavefront *wf = gpuDynInst->wavefront();
3663 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3664 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3665
3666 src.readSrc();
3667
3668 if (instData.ABS & 0x1) {
3669 src.absModifier();
3670 }
3671
3672 if (extData.NEG & 0x1) {
3673 src.negModifier();
3674 }
3675
3676 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3677 if (wf->execMask(lane)) {
3678 vdst[lane] = std::ceil(src[lane]);
3679 }
3680 }
3681
3682 vdst.write();
3683 } // execute
3684 // --- Inst_VOP3__V_RNDNE_F32 class methods ---
3685
3687 : Inst_VOP3A(iFmt, "v_rndne_f32", false)
3688 {
3689 setFlag(ALU);
3690 setFlag(F32);
3691 } // Inst_VOP3__V_RNDNE_F32
3692
3694 {
3695 } // ~Inst_VOP3__V_RNDNE_F32
3696
3697 // --- description from .arch file ---
3698 // D.f = round_nearest_even(S0.f).
3699 void
3701 {
3702 Wavefront *wf = gpuDynInst->wavefront();
3703 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3704 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3705
3706 src.readSrc();
3707
3708 if (instData.ABS & 0x1) {
3709 src.absModifier();
3710 }
3711
3712 if (extData.NEG & 0x1) {
3713 src.negModifier();
3714 }
3715
3716 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3717 if (wf->execMask(lane)) {
3718 vdst[lane] = roundNearestEven(src[lane]);
3719 }
3720 }
3721
3722 vdst.write();
3723 } // execute
3724 // --- Inst_VOP3__V_FLOOR_F32 class methods ---
3725
3727 : Inst_VOP3A(iFmt, "v_floor_f32", false)
3728 {
3729 setFlag(ALU);
3730 setFlag(F32);
3731 } // Inst_VOP3__V_FLOOR_F32
3732
3734 {
3735 } // ~Inst_VOP3__V_FLOOR_F32
3736
3737 // --- description from .arch file ---
3738 // D.f = trunc(S0.f);
3739 // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.
3740 void
3742 {
3743 Wavefront *wf = gpuDynInst->wavefront();
3744 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3745 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3746
3747 src.readSrc();
3748
3749 if (instData.ABS & 0x1) {
3750 src.absModifier();
3751 }
3752
3753 if (extData.NEG & 0x1) {
3754 src.negModifier();
3755 }
3756
3757 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3758 if (wf->execMask(lane)) {
3759 vdst[lane] = std::floor(src[lane]);
3760 }
3761 }
3762
3763 vdst.write();
3764 } // execute
3765 // --- Inst_VOP3__V_EXP_F32 class methods ---
3766
3768 : Inst_VOP3A(iFmt, "v_exp_f32", false)
3769 {
3770 setFlag(ALU);
3771 setFlag(F32);
3772 } // Inst_VOP3__V_EXP_F32
3773
3775 {
3776 } // ~Inst_VOP3__V_EXP_F32
3777
3778 // --- description from .arch file ---
3779 // D.f = pow(2.0, S0.f).
3780 void
3782 {
3783 Wavefront *wf = gpuDynInst->wavefront();
3784 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3785 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3786
3787 src.readSrc();
3788
3789 if (instData.ABS & 0x1) {
3790 src.absModifier();
3791 }
3792
3793 if (extData.NEG & 0x1) {
3794 src.negModifier();
3795 }
3796
3797 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3798 if (wf->execMask(lane)) {
3799 vdst[lane] = std::pow(2.0, src[lane]);
3800 }
3801 }
3802
3803 vdst.write();
3804 } // execute
3805 // --- Inst_VOP3__V_LOG_F32 class methods ---
3806
3808 : Inst_VOP3A(iFmt, "v_log_f32", false)
3809 {
3810 setFlag(ALU);
3811 setFlag(F32);
3812 } // Inst_VOP3__V_LOG_F32
3813
3815 {
3816 } // ~Inst_VOP3__V_LOG_F32
3817
3818 // --- description from .arch file ---
3819 // D.f = log2(S0.f). Base 2 logarithm.
3820 void
3822 {
3823 Wavefront *wf = gpuDynInst->wavefront();
3824 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3825 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3826
3827 src.readSrc();
3828
3829 if (instData.ABS & 0x1) {
3830 src.absModifier();
3831 }
3832
3833 if (extData.NEG & 0x1) {
3834 src.negModifier();
3835 }
3836
3840 assert(!(instData.ABS & 0x2));
3841 assert(!(instData.ABS & 0x4));
3842 assert(!(extData.NEG & 0x2));
3843 assert(!(extData.NEG & 0x4));
3844
3845 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3846 if (wf->execMask(lane)) {
3847 vdst[lane] = std::log2(src[lane]);
3848 }
3849 }
3850
3851 vdst.write();
3852 } // execute
3853 // --- Inst_VOP3__V_RCP_F32 class methods ---
3854
3856 : Inst_VOP3A(iFmt, "v_rcp_f32", false)
3857 {
3858 setFlag(ALU);
3859 setFlag(F32);
3860 } // Inst_VOP3__V_RCP_F32
3861
3863 {
3864 } // ~Inst_VOP3__V_RCP_F32
3865
3866 // --- description from .arch file ---
3867 // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.
3868 void
3870 {
3871 Wavefront *wf = gpuDynInst->wavefront();
3872 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3873 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3874
3875 src.readSrc();
3876
3877 if (instData.ABS & 0x1) {
3878 src.absModifier();
3879 }
3880
3881 if (extData.NEG & 0x1) {
3882 src.negModifier();
3883 }
3884
3885 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3886 if (wf->execMask(lane)) {
3887 vdst[lane] = 1.0 / src[lane];
3888 }
3889 }
3890
3891 vdst.write();
3892 } // execute
3893 // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods ---
3894
3896 : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false)
3897 {
3898 setFlag(ALU);
3899 setFlag(F32);
3900 } // Inst_VOP3__V_RCP_IFLAG_F32
3901
3903 {
3904 } // ~Inst_VOP3__V_RCP_IFLAG_F32
3905
3906 // --- description from .arch file ---
3907 // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise
3908 // --- integer DIV_BY_ZERO exception but cannot raise floating-point
3909 // --- exceptions.
3910 void
3912 {
3913 Wavefront *wf = gpuDynInst->wavefront();
3914 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3915 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3916
3917 src.readSrc();
3918
3919 if (instData.ABS & 0x1) {
3920 src.absModifier();
3921 }
3922
3923 if (extData.NEG & 0x1) {
3924 src.negModifier();
3925 }
3926
3927 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3928 if (wf->execMask(lane)) {
3929 vdst[lane] = 1.0 / src[lane];
3930 }
3931 }
3932
3933 vdst.write();
3934 } // execute
3935 // --- Inst_VOP3__V_RSQ_F32 class methods ---
3936
3938 : Inst_VOP3A(iFmt, "v_rsq_f32", false)
3939 {
3940 setFlag(ALU);
3941 setFlag(F32);
3942 } // Inst_VOP3__V_RSQ_F32
3943
3945 {
3946 } // ~Inst_VOP3__V_RSQ_F32
3947
3948 // --- description from .arch file ---
3949 // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.
3950 void
3952 {
3953 Wavefront *wf = gpuDynInst->wavefront();
3954 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
3955 VecOperandF32 vdst(gpuDynInst, instData.VDST);
3956
3957 src.readSrc();
3958
3959 if (instData.ABS & 0x1) {
3960 src.absModifier();
3961 }
3962
3963 if (extData.NEG & 0x1) {
3964 src.negModifier();
3965 }
3966
3967 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3968 if (wf->execMask(lane)) {
3969 vdst[lane] = 1.0 / std::sqrt(src[lane]);
3970 }
3971 }
3972
3973 vdst.write();
3974 } // execute
3975 // --- Inst_VOP3__V_RCP_F64 class methods ---
3976
3978 : Inst_VOP3A(iFmt, "v_rcp_f64", false)
3979 {
3980 setFlag(ALU);
3981 setFlag(F64);
3982 } // Inst_VOP3__V_RCP_F64
3983
3985 {
3986 } // ~Inst_VOP3__V_RCP_F64
3987
3988 // --- description from .arch file ---
3989 // D.d = 1.0 / S0.d.
3990 void
3992 {
3993 Wavefront *wf = gpuDynInst->wavefront();
3994 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
3995 VecOperandF64 vdst(gpuDynInst, instData.VDST);
3996
3997 src.readSrc();
3998
3999 if (instData.ABS & 0x1) {
4000 src.absModifier();
4001 }
4002
4003 if (extData.NEG & 0x1) {
4004 src.negModifier();
4005 }
4006
4007 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4008 if (wf->execMask(lane)) {
4009 if (std::fpclassify(src[lane]) == FP_ZERO) {
4010 vdst[lane] = +INFINITY;
4011 } else if (std::isnan(src[lane])) {
4012 vdst[lane] = NAN;
4013 } else if (std::isinf(src[lane])) {
4014 if (std::signbit(src[lane])) {
4015 vdst[lane] = -0.0;
4016 } else {
4017 vdst[lane] = 0.0;
4018 }
4019 } else {
4020 vdst[lane] = 1.0 / src[lane];
4021 }
4022 }
4023 }
4024
4025 vdst.write();
4026 } // execute
4027 // --- Inst_VOP3__V_RSQ_F64 class methods ---
4028
4030 : Inst_VOP3A(iFmt, "v_rsq_f64", false)
4031 {
4032 setFlag(ALU);
4033 setFlag(F64);
4034 } // Inst_VOP3__V_RSQ_F64
4035
4037 {
4038 } // ~Inst_VOP3__V_RSQ_F64
4039
4040 // --- description from .arch file ---
4041 // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.
4042 void
4044 {
4045 Wavefront *wf = gpuDynInst->wavefront();
4046 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4047 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4048
4049 src.readSrc();
4050
4051 if (instData.ABS & 0x1) {
4052 src.absModifier();
4053 }
4054
4055 if (extData.NEG & 0x1) {
4056 src.negModifier();
4057 }
4058
4059 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4060 if (wf->execMask(lane)) {
4061 if (std::fpclassify(src[lane]) == FP_ZERO) {
4062 vdst[lane] = +INFINITY;
4063 } else if (std::isnan(src[lane])) {
4064 vdst[lane] = NAN;
4065 } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
4066 vdst[lane] = 0.0;
4067 } else if (std::signbit(src[lane])) {
4068 vdst[lane] = NAN;
4069 } else {
4070 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4071 }
4072 }
4073 }
4074
4075 vdst.write();
4076 } // execute
4077 // --- Inst_VOP3__V_SQRT_F32 class methods ---
4078
4080 : Inst_VOP3A(iFmt, "v_sqrt_f32", false)
4081 {
4082 setFlag(ALU);
4083 setFlag(F32);
4084 } // Inst_VOP3__V_SQRT_F32
4085
4087 {
4088 } // ~Inst_VOP3__V_SQRT_F32
4089
4090 // --- description from .arch file ---
4091 // D.f = sqrt(S0.f).
4092 void
4094 {
4095 Wavefront *wf = gpuDynInst->wavefront();
4096 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4097 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4098
4099 src.readSrc();
4100
4101 if (instData.ABS & 0x1) {
4102 src.absModifier();
4103 }
4104
4105 if (extData.NEG & 0x1) {
4106 src.negModifier();
4107 }
4108
4109 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4110 if (wf->execMask(lane)) {
4111 vdst[lane] = std::sqrt(src[lane]);
4112 }
4113 }
4114
4115 vdst.write();
4116 } // execute
4117 // --- Inst_VOP3__V_SQRT_F64 class methods ---
4118
4120 : Inst_VOP3A(iFmt, "v_sqrt_f64", false)
4121 {
4122 setFlag(ALU);
4123 setFlag(F64);
4124 } // Inst_VOP3__V_SQRT_F64
4125
4127 {
4128 } // ~Inst_VOP3__V_SQRT_F64
4129
4130 // --- description from .arch file ---
4131 // D.d = sqrt(S0.d).
4132 void
4134 {
4135 Wavefront *wf = gpuDynInst->wavefront();
4136 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4137 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4138
4139 src.readSrc();
4140
4141 if (instData.ABS & 0x1) {
4142 src.absModifier();
4143 }
4144
4145 if (extData.NEG & 0x1) {
4146 src.negModifier();
4147 }
4148
4149 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4150 if (wf->execMask(lane)) {
4151 vdst[lane] = std::sqrt(src[lane]);
4152 }
4153 }
4154
4155 vdst.write();
4156 } // execute
4157 // --- Inst_VOP3__V_SIN_F32 class methods ---
4158
4160 : Inst_VOP3A(iFmt, "v_sin_f32", false)
4161 {
4162 setFlag(ALU);
4163 setFlag(F32);
4164 } // Inst_VOP3__V_SIN_F32
4165
4167 {
4168 } // ~Inst_VOP3__V_SIN_F32
4169
4170 // --- description from .arch file ---
4171 // D.f = sin(S0.f * 2 * PI).
4172 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
4173 // float 0.0.
4174 void
4176 {
4177 Wavefront *wf = gpuDynInst->wavefront();
4178 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4179 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
4180 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4181
4182 src.readSrc();
4183 pi.read();
4184
4185 if (instData.ABS & 0x1) {
4186 src.absModifier();
4187 }
4188
4189 if (extData.NEG & 0x1) {
4190 src.negModifier();
4191 }
4192
4193 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4194 if (wf->execMask(lane)) {
4195 vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
4196 }
4197 }
4198
4199 vdst.write();
4200 } // execute
4201 // --- Inst_VOP3__V_COS_F32 class methods ---
4202
4204 : Inst_VOP3A(iFmt, "v_cos_f32", false)
4205 {
4206 setFlag(ALU);
4207 setFlag(F32);
4208 } // Inst_VOP3__V_COS_F32
4209
4211 {
4212 } // ~Inst_VOP3__V_COS_F32
4213
4214 // --- description from .arch file ---
4215 // D.f = cos(S0.f * 2 * PI).
4216 // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in
4217 // float 1.0.
4218 void
4220 {
4221 Wavefront *wf = gpuDynInst->wavefront();
4222 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4223 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
4224 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4225
4226 src.readSrc();
4227 pi.read();
4228
4229 if (instData.ABS & 0x1) {
4230 src.absModifier();
4231 }
4232
4233 if (extData.NEG & 0x1) {
4234 src.negModifier();
4235 }
4236
4237 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4238 if (wf->execMask(lane)) {
4239 vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
4240 }
4241 }
4242
4243 vdst.write();
4244 } // execute
4245 // --- Inst_VOP3__V_NOT_B32 class methods ---
4246
4248 : Inst_VOP3A(iFmt, "v_not_b32", false)
4249 {
4250 setFlag(ALU);
4251 } // Inst_VOP3__V_NOT_B32
4252
4254 {
4255 } // ~Inst_VOP3__V_NOT_B32
4256
4257 // --- description from .arch file ---
4258 // D.u = ~S0.u.
4259 // Input and output modifiers not supported.
4260 void
4262 {
4263 Wavefront *wf = gpuDynInst->wavefront();
4264 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4265 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4266
4267 src.readSrc();
4268
4269 if (instData.ABS & 0x1) {
4270 src.absModifier();
4271 }
4272
4273 if (extData.NEG & 0x1) {
4274 src.negModifier();
4275 }
4276
4277 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4278 if (wf->execMask(lane)) {
4279 vdst[lane] = ~src[lane];
4280 }
4281 }
4282
4283 vdst.write();
4284 } // execute
4285 // --- Inst_VOP3__V_BFREV_B32 class methods ---
4286
4288 : Inst_VOP3A(iFmt, "v_bfrev_b32", false)
4289 {
4290 setFlag(ALU);
4291 } // Inst_VOP3__V_BFREV_B32
4292
4294 {
4295 } // ~Inst_VOP3__V_BFREV_B32
4296
4297 // --- description from .arch file ---
4298 // D.u[31:0] = S0.u[0:31], bitfield reverse.
4299 // Input and output modifiers not supported.
4300 void
4302 {
4303 Wavefront *wf = gpuDynInst->wavefront();
4304 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4305 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4306
4307 src.readSrc();
4308
4309 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4310 if (wf->execMask(lane)) {
4311 vdst[lane] = reverseBits(src[lane]);
4312 }
4313 }
4314
4315 vdst.write();
4316 } // execute
4317 // --- Inst_VOP3__V_FFBH_U32 class methods ---
4318
4320 : Inst_VOP3A(iFmt, "v_ffbh_u32", false)
4321 {
4322 setFlag(ALU);
4323 } // Inst_VOP3__V_FFBH_U32
4324
4326 {
4327 } // ~Inst_VOP3__V_FFBH_U32
4328
4329 // --- description from .arch file ---
4330 // D.u = position of first 1 in S0.u from MSB;
4331 // D.u = 0xffffffff if S0.u == 0.
4332 void
4334 {
4335 Wavefront *wf = gpuDynInst->wavefront();
4336 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4337 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4338
4339 src.readSrc();
4340
4341 if (instData.ABS & 0x1) {
4342 src.absModifier();
4343 }
4344
4345 if (extData.NEG & 0x1) {
4346 src.negModifier();
4347 }
4348
4349 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4350 if (wf->execMask(lane)) {
4351 vdst[lane] = findFirstOneMsb(src[lane]);
4352 }
4353 }
4354
4355 vdst.write();
4356 } // execute
4357 // --- Inst_VOP3__V_FFBL_B32 class methods ---
4358
4360 : Inst_VOP3A(iFmt, "v_ffbl_b32", false)
4361 {
4362 setFlag(ALU);
4363 } // Inst_VOP3__V_FFBL_B32
4364
4366 {
4367 } // ~Inst_VOP3__V_FFBL_B32
4368
4369 // --- description from .arch file ---
4370 // D.u = position of first 1 in S0.u from LSB;
4371 // D.u = 0xffffffff if S0.u == 0.
4372 void
4374 {
4375 Wavefront *wf = gpuDynInst->wavefront();
4376 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
4377 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4378
4379 src.readSrc();
4380
4381 if (instData.ABS & 0x1) {
4382 src.absModifier();
4383 }
4384
4385 if (extData.NEG & 0x1) {
4386 src.negModifier();
4387 }
4388
4389 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4390 if (wf->execMask(lane)) {
4391 vdst[lane] = findFirstOne(src[lane]);
4392 }
4393 }
4394
4395 vdst.write();
4396 } // execute
4397 // --- Inst_VOP3__V_FFBH_I32 class methods ---
4398
4400 : Inst_VOP3A(iFmt, "v_ffbh_i32", false)
4401 {
4402 setFlag(ALU);
4403 } // Inst_VOP3__V_FFBH_I32
4404
4406 {
4407 } // ~Inst_VOP3__V_FFBH_I32
4408
4409 // --- description from .arch file ---
4410 // D.u = position of first bit different from sign bit in S0.i from MSB;
4411 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
4412 void
4414 {
4415 Wavefront *wf = gpuDynInst->wavefront();
4416 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
4417 VecOperandU32 vdst(gpuDynInst, instData.VDST);
4418
4419 src.readSrc();
4420
4421 if (instData.ABS & 0x1) {
4422 src.absModifier();
4423 }
4424
4425 if (extData.NEG & 0x1) {
4426 src.negModifier();
4427 }
4428
4429 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4430 if (wf->execMask(lane)) {
4431 vdst[lane] = firstOppositeSignBit(src[lane]);
4432 }
4433 }
4434
4435 vdst.write();
4436 } // execute
4437 // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods ---
4438
4440 InFmt_VOP3A *iFmt)
4441 : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false)
4442 {
4443 setFlag(ALU);
4444 setFlag(F64);
4445 } // Inst_VOP3__V_FREXP_EXP_I32_F64
4446
4448 {
4449 } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
4450
4451 // --- description from .arch file ---
4452 // See V_FREXP_EXP_I32_F32.
4453 void
4455 {
4456 Wavefront *wf = gpuDynInst->wavefront();
4457 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4458 VecOperandI32 vdst(gpuDynInst, instData.VDST);
4459
4460 src.readSrc();
4461
4462 if (instData.ABS & 0x1) {
4463 src.absModifier();
4464 }
4465
4466 if (extData.NEG & 0x1) {
4467 src.negModifier();
4468 }
4469
4470 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4471 if (wf->execMask(lane)) {
4472 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
4473 vdst[lane] = 0;
4474 } else {
4475 VecElemI32 exp(0);
4476 std::frexp(src[lane], &exp);
4477 vdst[lane] = exp;
4478 }
4479 }
4480 }
4481
4482 vdst.write();
4483 } // execute
4484 // --- Inst_VOP3__V_FREXP_MANT_F64 class methods ---
4485
4487 : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false)
4488 {
4489 setFlag(ALU);
4490 setFlag(F64);
4491 } // Inst_VOP3__V_FREXP_MANT_F64
4492
4494 {
4495 } // ~Inst_VOP3__V_FREXP_MANT_F64
4496
4497 // --- description from .arch file ---
4498 // See V_FREXP_MANT_F32.
4499 void
4501 {
4502 Wavefront *wf = gpuDynInst->wavefront();
4503 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4504 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4505
4506 src.readSrc();
4507
4508 if (instData.ABS & 0x1) {
4509 src.absModifier();
4510 }
4511
4512 if (extData.NEG & 0x1) {
4513 src.negModifier();
4514 }
4515
4516 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4517 if (wf->execMask(lane)) {
4518 VecElemI32 exp(0);
4519 vdst[lane] = std::frexp(src[lane], &exp);
4520 }
4521 }
4522
4523 vdst.write();
4524 } // execute
4525 // --- Inst_VOP3__V_FRACT_F64 class methods ---
4526
4528 : Inst_VOP3A(iFmt, "v_fract_f64", false)
4529 {
4530 setFlag(ALU);
4531 setFlag(F64);
4532 } // Inst_VOP3__V_FRACT_F64
4533
4535 {
4536 } // ~Inst_VOP3__V_FRACT_F64
4537
4538 // --- description from .arch file ---
4539 // See V_FRACT_F32.
4540 void
4542 {
4543 Wavefront *wf = gpuDynInst->wavefront();
4544 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
4545 VecOperandF64 vdst(gpuDynInst, instData.VDST);
4546
4547 src.readSrc();
4548
4549 if (instData.ABS & 0x1) {
4550 src.absModifier();
4551 }
4552
4553 if (extData.NEG & 0x1) {
4554 src.negModifier();
4555 }
4556
4557 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4558 if (wf->execMask(lane)) {
4559 VecElemF32 int_part(0.0);
4560 vdst[lane] = std::modf(src[lane], &int_part);
4561 }
4562 }
4563
4564 vdst.write();
4565 } // execute
4566 // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods ---
4567
4569 InFmt_VOP3A *iFmt)
4570 : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false)
4571 {
4572 setFlag(ALU);
4573 setFlag(F32);
4574 } // Inst_VOP3__V_FREXP_EXP_I32_F32
4575
4577 {
4578 } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
4579
4580 // --- description from .arch file ---
4581 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
4582 // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).
4583 // Returns exponent of single precision float input, such that S0.f =
4584 // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns
4585 // the significand.
4586 void
4588 {
4589 Wavefront *wf = gpuDynInst->wavefront();
4590 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4591 VecOperandI32 vdst(gpuDynInst, instData.VDST);
4592
4593 src.readSrc();
4594
4595 if (instData.ABS & 0x1) {
4596 src.absModifier();
4597 }
4598
4599 if (extData.NEG & 0x1) {
4600 src.negModifier();
4601 }
4602
4603 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4604 if (wf->execMask(lane)) {
4605 if (std::isinf(src[lane])|| std::isnan(src[lane])) {
4606 vdst[lane] = 0;
4607 } else {
4608 VecElemI32 exp(0);
4609 std::frexp(src[lane], &exp);
4610 vdst[lane] = exp;
4611 }
4612 }
4613 }
4614
4615 vdst.write();
4616 } // execute
4617 // --- Inst_VOP3__V_FREXP_MANT_F32 class methods ---
4618
4620 : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false)
4621 {
4622 setFlag(ALU);
4623 setFlag(F32);
4624 } // Inst_VOP3__V_FREXP_MANT_F32
4625
4627 {
4628 } // ~Inst_VOP3__V_FREXP_MANT_F32
4629
4630 // --- description from .arch file ---
4631 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
4632 // else D.f = Mantissa(S0.f).
4633 // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary
4634 // --- significand of single precision float input, such that S0.f =
4635 // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which
4636 // --- returns integer exponent.
4637 void
4639 {
4640 Wavefront *wf = gpuDynInst->wavefront();
4641 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
4642 VecOperandF32 vdst(gpuDynInst, instData.VDST);
4643
4644 src.readSrc();
4645
4646 if (instData.ABS & 0x1) {
4647 src.absModifier();
4648 }
4649
4650 if (extData.NEG & 0x1) {
4651 src.negModifier();
4652 }
4653
4654 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4655 if (wf->execMask(lane)) {
4656 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
4657 vdst[lane] = src[lane];
4658 } else {
4659 VecElemI32 exp(0);
4660 vdst[lane] = std::frexp(src[lane], &exp);
4661 }
4662 }
4663 }
4664
4665 vdst.write();
4666 } // execute
4667 // --- Inst_VOP3__V_CLREXCP class methods ---
4668
4670 : Inst_VOP3A(iFmt, "v_clrexcp", false)
4671 {
4672 } // Inst_VOP3__V_CLREXCP
4673
4675 {
4676 } // ~Inst_VOP3__V_CLREXCP
4677
4678 // --- description from .arch file ---
4679 // Clear wave's exception state in SIMD (SP).
4680 void
4682 {
4684 } // execute
4685 // --- Inst_VOP3__V_CVT_F16_U16 class methods ---
4686
4688 : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false)
4689 {
4690 setFlag(ALU);
4691 setFlag(F16);
4692 } // Inst_VOP3__V_CVT_F16_U16
4693
4695 {
4696 } // ~Inst_VOP3__V_CVT_F16_U16
4697
4698 // --- description from .arch file ---
4699 // D.f16 = uint16_to_flt16(S.u16).
4700 // Supports denormals, rounding, exception flags and saturation.
4701 void
4703 {
4705 } // execute
4706 // --- Inst_VOP3__V_CVT_F16_I16 class methods ---
4707
4709 : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false)
4710 {
4711 setFlag(ALU);
4712 setFlag(F16);
4713 } // Inst_VOP3__V_CVT_F16_I16
4714
4716 {
4717 } // ~Inst_VOP3__V_CVT_F16_I16
4718
4719 // --- description from .arch file ---
4720 // D.f16 = int16_to_flt16(S.i16).
4721 // Supports denormals, rounding, exception flags and saturation.
4722 void
4724 {
4726 } // execute
4727 // --- Inst_VOP3__V_CVT_U16_F16 class methods ---
4728
4730 : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false)
4731 {
4732 setFlag(ALU);
4733 setFlag(F16);
4734 } // Inst_VOP3__V_CVT_U16_F16
4735
4737 {
4738 } // ~Inst_VOP3__V_CVT_U16_F16
4739
4740 // --- description from .arch file ---
4741 // D.u16 = flt16_to_uint16(S.f16).
4742 // Supports rounding, exception flags and saturation.
4743 void
4745 {
4747 } // execute
4748 // --- Inst_VOP3__V_CVT_I16_F16 class methods ---
4749
4751 : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false)
4752 {
4753 setFlag(ALU);
4754 setFlag(F16);
4755 } // Inst_VOP3__V_CVT_I16_F16
4756
4758 {
4759 } // ~Inst_VOP3__V_CVT_I16_F16
4760
4761 // --- description from .arch file ---
4762 // D.i16 = flt16_to_int16(S.f16).
4763 // Supports rounding, exception flags and saturation.
4764 void
4766 {
4768 } // execute
4769 // --- Inst_VOP3__V_RCP_F16 class methods ---
4770
4772 : Inst_VOP3A(iFmt, "v_rcp_f16", false)
4773 {
4774 setFlag(ALU);
4775 setFlag(F16);
4776 } // Inst_VOP3__V_RCP_F16
4777
4779 {
4780 } // ~Inst_VOP3__V_RCP_F16
4781
4782 // --- description from .arch file ---
4783 // if (S0.f16 == 1.0f)
4784 // D.f16 = 1.0f;
4785 // else
4786 // D.f16 = ApproximateRecip(S0.f16).
4787 void
4789 {
4791 } // execute
4792 // --- Inst_VOP3__V_SQRT_F16 class methods ---
4793
4795 : Inst_VOP3A(iFmt, "v_sqrt_f16", false)
4796 {
4797 setFlag(ALU);
4798 setFlag(F16);
4799 } // Inst_VOP3__V_SQRT_F16
4800
4802 {
4803 } // ~Inst_VOP3__V_SQRT_F16
4804
4805 // --- description from .arch file ---
4806 // if (S0.f16 == 1.0f)
4807 // D.f16 = 1.0f;
4808 // else
4809 // D.f16 = ApproximateSqrt(S0.f16).
4810 void
4812 {
4814 } // execute
4815 // --- Inst_VOP3__V_RSQ_F16 class methods ---
4816
4818 : Inst_VOP3A(iFmt, "v_rsq_f16", false)
4819 {
4820 setFlag(ALU);
4821 setFlag(F16);
4822 } // Inst_VOP3__V_RSQ_F16
4823
4825 {
4826 } // ~Inst_VOP3__V_RSQ_F16
4827
4828 // --- description from .arch file ---
4829 // if (S0.f16 == 1.0f)
4830 // D.f16 = 1.0f;
4831 // else
4832 // D.f16 = ApproximateRecipSqrt(S0.f16).
4833 void
4835 {
4837 } // execute
4838 // --- Inst_VOP3__V_LOG_F16 class methods ---
4839
4841 : Inst_VOP3A(iFmt, "v_log_f16", false)
4842 {
4843 setFlag(ALU);
4844 setFlag(F16);
4845 } // Inst_VOP3__V_LOG_F16
4846
4848 {
4849 } // ~Inst_VOP3__V_LOG_F16
4850
4851 // --- description from .arch file ---
4852 // if (S0.f16 == 1.0f)
4853 // D.f16 = 0.0f;
4854 // else
4855 // D.f16 = ApproximateLog2(S0.f16).
4856 void
4858 {
4860 } // execute
4861 // --- Inst_VOP3__V_EXP_F16 class methods ---
4862
4864 : Inst_VOP3A(iFmt, "v_exp_f16", false)
4865 {
4866 setFlag(ALU);
4867 setFlag(F16);
4868 } // Inst_VOP3__V_EXP_F16
4869
4871 {
4872 } // ~Inst_VOP3__V_EXP_F16
4873
4874 // --- description from .arch file ---
4875 // if (S0.f16 == 0.0f)
4876 // D.f16 = 1.0f;
4877 // else
4878 // D.f16 = Approximate2ToX(S0.f16).
4879 void
4881 {
4883 } // execute
4884 // --- Inst_VOP3__V_FREXP_MANT_F16 class methods ---
4885
4887 : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false)
4888 {
4889 setFlag(ALU);
4890 setFlag(F16);
4891 } // Inst_VOP3__V_FREXP_MANT_F16
4892
4894 {
4895 } // ~Inst_VOP3__V_FREXP_MANT_F16
4896
4897 // --- description from .arch file ---
4898 // if (S0.f16 == +-INF || S0.f16 == NAN)
4899 // D.f16 = S0.f16;
4900 // else
4901 // D.f16 = mantissa(S0.f16).
4902 // Result range is (-1.0,-0.5][0.5,1.0).
4903 // C math library frexp function.
4904 // Returns binary significand of half precision float input, such that the
4905 // original single float = significand * (2 ** exponent).
4906 void
4908 {
4910 } // execute
4911 // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods ---
4912
4914 InFmt_VOP3A *iFmt)
4915 : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false)
4916 {
4917 setFlag(ALU);
4918 setFlag(F16);
4919 } // Inst_VOP3__V_FREXP_EXP_I16_F16
4920
4922 {
4923 } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
4924
4925 // --- description from .arch file ---
4926 // if (S0.f16 == +-INF || S0.f16 == NAN)
4927 // D.i16 = 0;
4928 // else
4929 // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).
4930 // C math library frexp function.
4931 // Returns exponent of half precision float input, such that the
4932 // original single float = significand * (2 ** exponent).
4933 void
4938 // --- Inst_VOP3__V_FLOOR_F16 class methods ---
4939
4941 : Inst_VOP3A(iFmt, "v_floor_f16", false)
4942 {
4943 setFlag(ALU);
4944 setFlag(F16);
4945 } // Inst_VOP3__V_FLOOR_F16
4946
4948 {
4949 } // ~Inst_VOP3__V_FLOOR_F16
4950
4951 // --- description from .arch file ---
4952 // D.f16 = trunc(S0.f16);
4953 // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f.
4954 void