gem5 v24.1.0.1
Loading...
Searching...
No Matches
vop3_cmp.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
34
35namespace gem5
36{
37
38namespace VegaISA
39{
40 // --- Inst_VOP3__V_CMP_CLASS_F32 class methods ---
41
43 InFmt_VOP3A *iFmt)
44 : Inst_VOP3A(iFmt, "v_cmp_class_f32", true)
45 {
46 setFlag(ALU);
47 setFlag(F32);
48 } // Inst_VOP3__V_CMP_CLASS_F32
49
51 {
52 } // ~Inst_VOP3__V_CMP_CLASS_F32
53
54 // --- description from .arch file ---
55 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
56 // The function reports true if the floating point value is *any* of the
57 // --- numeric types selected in S1.u according to the following list:
58 // S1.u[0] -- value is a signaling NaN.
59 // S1.u[1] -- value is a quiet NaN.
60 // S1.u[2] -- value is negative infinity.
61 // S1.u[3] -- value is a negative normal value.
62 // S1.u[4] -- value is a negative denormal value.
63 // S1.u[5] -- value is negative zero.
64 // S1.u[6] -- value is positive zero.
65 // S1.u[7] -- value is a positive denormal value.
66 // S1.u[8] -- value is a positive normal value.
67 // S1.u[9] -- value is positive infinity.
68 void
70 {
71 Wavefront *wf = gpuDynInst->wavefront();
72 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
73 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
74 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
75
76 src0.readSrc();
77 src1.readSrc();
78
79 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
80 if (wf->execMask(lane)) {
81 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
82 // is NaN
83 if (std::isnan(src0[lane])) {
84 sdst.setBit(lane, 1);
85 continue;
86 }
87 }
88 if (bits(src1[lane], 2)) {
89 // is -infinity
90 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
91 sdst.setBit(lane, 1);
92 continue;
93 }
94 }
95 if (bits(src1[lane], 3)) {
96 // is -normal
97 if (std::isnormal(src0[lane])
98 && std::signbit(src0[lane])) {
99 sdst.setBit(lane, 1);
100 continue;
101 }
102 }
103 if (bits(src1[lane], 4)) {
104 // is -denormal
105 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
106 && std::signbit(src0[lane])) {
107 sdst.setBit(lane, 1);
108 continue;
109 }
110 }
111 if (bits(src1[lane], 5)) {
112 // is -zero
113 if (std::fpclassify(src0[lane]) == FP_ZERO
114 && std::signbit(src0[lane])) {
115 sdst.setBit(lane, 1);
116 continue;
117 }
118 }
119 if (bits(src1[lane], 6)) {
120 // is +zero
121 if (std::fpclassify(src0[lane]) == FP_ZERO
122 && !std::signbit(src0[lane])) {
123 sdst.setBit(lane, 1);
124 continue;
125 }
126 }
127 if (bits(src1[lane], 7)) {
128 // is +denormal
129 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
130 && !std::signbit(src0[lane])) {
131 sdst.setBit(lane, 1);
132 continue;
133 }
134 }
135 if (bits(src1[lane], 8)) {
136 // is +normal
137 if (std::isnormal(src0[lane])
138 && !std::signbit(src0[lane])) {
139 sdst.setBit(lane, 1);
140 continue;
141 }
142 }
143 if (bits(src1[lane], 9)) {
144 // is +infinity
145 if (std::isinf(src0[lane])
146 && !std::signbit(src0[lane])) {
147 sdst.setBit(lane, 1);
148 continue;
149 }
150 }
151 }
152 }
153
154 sdst.write();
155 } // execute
156 // --- Inst_VOP3__V_CMPX_CLASS_F32 class methods ---
157
159 InFmt_VOP3A *iFmt)
160 : Inst_VOP3A(iFmt, "v_cmpx_class_f32", true)
161 {
162 setFlag(ALU);
163 setFlag(F32);
164 setFlag(WritesEXEC);
165 } // Inst_VOP3__V_CMPX_CLASS_F32
166
168 {
169 } // ~Inst_VOP3__V_CMPX_CLASS_F32
170
171 // --- description from .arch file ---
172 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
173 // S0.f
174 // The function reports true if the floating point value is *any* of the
175 // numeric types selected in S1.u according to the following list:
176 // S1.u[0] -- value is a signaling NaN.
177 // S1.u[1] -- value is a quiet NaN.
178 // S1.u[2] -- value is negative infinity.
179 // S1.u[3] -- value is a negative normal value.
180 // S1.u[4] -- value is a negative denormal value.
181 // S1.u[5] -- value is negative zero.
182 // S1.u[6] -- value is positive zero.
183 // S1.u[7] -- value is a positive denormal value.
184 // S1.u[8] -- value is a positive normal value.
185 // S1.u[9] -- value is positive infinity.
186 void
188 {
189 Wavefront *wf = gpuDynInst->wavefront();
190 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
191 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
192 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
193
194 src0.readSrc();
195 src1.readSrc();
196
197 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
198 if (wf->execMask(lane)) {
199 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
200 // is NaN
201 if (std::isnan(src0[lane])) {
202 sdst.setBit(lane, 1);
203 continue;
204 }
205 }
206 if (bits(src1[lane], 2)) {
207 // is -infinity
208 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
209 sdst.setBit(lane, 1);
210 continue;
211 }
212 }
213 if (bits(src1[lane], 3)) {
214 // is -normal
215 if (std::isnormal(src0[lane])
216 && std::signbit(src0[lane])) {
217 sdst.setBit(lane, 1);
218 continue;
219 }
220 }
221 if (bits(src1[lane], 4)) {
222 // is -denormal
223 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
224 && std::signbit(src0[lane])) {
225 sdst.setBit(lane, 1);
226 continue;
227 }
228 }
229 if (bits(src1[lane], 5)) {
230 // is -zero
231 if (std::fpclassify(src0[lane]) == FP_ZERO
232 && std::signbit(src0[lane])) {
233 sdst.setBit(lane, 1);
234 continue;
235 }
236 }
237 if (bits(src1[lane], 6)) {
238 // is +zero
239 if (std::fpclassify(src0[lane]) == FP_ZERO
240 && !std::signbit(src0[lane])) {
241 sdst.setBit(lane, 1);
242 continue;
243 }
244 }
245 if (bits(src1[lane], 7)) {
246 // is +denormal
247 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
248 && !std::signbit(src0[lane])) {
249 sdst.setBit(lane, 1);
250 continue;
251 }
252 }
253 if (bits(src1[lane], 8)) {
254 // is +normal
255 if (std::isnormal(src0[lane])
256 && !std::signbit(src0[lane])) {
257 sdst.setBit(lane, 1);
258 continue;
259 }
260 }
261 if (bits(src1[lane], 9)) {
262 // is +infinity
263 if (std::isinf(src0[lane])
264 && !std::signbit(src0[lane])) {
265 sdst.setBit(lane, 1);
266 continue;
267 }
268 }
269 }
270 }
271
272 wf->execMask() = sdst.rawData();
273 sdst.write();
274 } // execute
275 // --- Inst_VOP3__V_CMP_CLASS_F64 class methods ---
276
278 InFmt_VOP3A *iFmt)
279 : Inst_VOP3A(iFmt, "v_cmp_class_f64", true)
280 {
281 setFlag(ALU);
282 setFlag(F64);
283 } // Inst_VOP3__V_CMP_CLASS_F64
284
286 {
287 } // ~Inst_VOP3__V_CMP_CLASS_F64
288
289 // --- description from .arch file ---
290 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
291 // The function reports true if the floating point value is *any* of the
292 // --- numeric types selected in S1.u according to the following list:
293 // S1.u[0] -- value is a signaling NaN.
294 // S1.u[1] -- value is a quiet NaN.
295 // S1.u[2] -- value is negative infinity.
296 // S1.u[3] -- value is a negative normal value.
297 // S1.u[4] -- value is a negative denormal value.
298 // S1.u[5] -- value is negative zero.
299 // S1.u[6] -- value is positive zero.
300 // S1.u[7] -- value is a positive denormal value.
301 // S1.u[8] -- value is a positive normal value.
302 // S1.u[9] -- value is positive infinity.
303 void
305 {
306 Wavefront *wf = gpuDynInst->wavefront();
307 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
308 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
309 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
310
311 src0.readSrc();
312 src1.readSrc();
313
314 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
315 if (wf->execMask(lane)) {
316 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
317 // is NaN
318 if (std::isnan(src0[lane])) {
319 sdst.setBit(lane, 1);
320 continue;
321 }
322 }
323 if (bits(src1[lane], 2)) {
324 // is -infinity
325 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
326 sdst.setBit(lane, 1);
327 continue;
328 }
329 }
330 if (bits(src1[lane], 3)) {
331 // is -normal
332 if (std::isnormal(src0[lane])
333 && std::signbit(src0[lane])) {
334 sdst.setBit(lane, 1);
335 continue;
336 }
337 }
338 if (bits(src1[lane], 4)) {
339 // is -denormal
340 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
341 && std::signbit(src0[lane])) {
342 sdst.setBit(lane, 1);
343 continue;
344 }
345 }
346 if (bits(src1[lane], 5)) {
347 // is -zero
348 if (std::fpclassify(src0[lane]) == FP_ZERO
349 && std::signbit(src0[lane])) {
350 sdst.setBit(lane, 1);
351 continue;
352 }
353 }
354 if (bits(src1[lane], 6)) {
355 // is +zero
356 if (std::fpclassify(src0[lane]) == FP_ZERO
357 && !std::signbit(src0[lane])) {
358 sdst.setBit(lane, 1);
359 continue;
360 }
361 }
362 if (bits(src1[lane], 7)) {
363 // is +denormal
364 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
365 && !std::signbit(src0[lane])) {
366 sdst.setBit(lane, 1);
367 continue;
368 }
369 }
370 if (bits(src1[lane], 8)) {
371 // is +normal
372 if (std::isnormal(src0[lane])
373 && !std::signbit(src0[lane])) {
374 sdst.setBit(lane, 1);
375 continue;
376 }
377 }
378 if (bits(src1[lane], 9)) {
379 // is +infinity
380 if (std::isinf(src0[lane])
381 && !std::signbit(src0[lane])) {
382 sdst.setBit(lane, 1);
383 continue;
384 }
385 }
386 }
387 }
388
389 sdst.write();
390 } // execute
391 // --- Inst_VOP3__V_CMPX_CLASS_F64 class methods ---
392
394 InFmt_VOP3A *iFmt)
395 : Inst_VOP3A(iFmt, "v_cmpx_class_f64", true)
396 {
397 setFlag(ALU);
398 setFlag(F64);
399 setFlag(WritesEXEC);
400 } // Inst_VOP3__V_CMPX_CLASS_F64
401
403 {
404 } // ~Inst_VOP3__V_CMPX_CLASS_F64
405
406 // --- description from .arch file ---
407 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
408 // S0.d
409 // The function reports true if the floating point value is *any* of the
410 // numeric types selected in S1.u according to the following list:
411 // S1.u[0] -- value is a signaling NaN.
412 // S1.u[1] -- value is a quiet NaN.
413 // S1.u[2] -- value is negative infinity.
414 // S1.u[3] -- value is a negative normal value.
415 // S1.u[4] -- value is a negative denormal value.
416 // S1.u[5] -- value is negative zero.
417 // S1.u[6] -- value is positive zero.
418 // S1.u[7] -- value is a positive denormal value.
419 // S1.u[8] -- value is a positive normal value.
420 // S1.u[9] -- value is positive infinity.
421 void
423 {
424 Wavefront *wf = gpuDynInst->wavefront();
425 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
426 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
427 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
428
429 src0.readSrc();
430 src1.readSrc();
431
432 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
433 if (wf->execMask(lane)) {
434 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
435 // is NaN
436 if (std::isnan(src0[lane])) {
437 sdst.setBit(lane, 1);
438 continue;
439 }
440 }
441 if (bits(src1[lane], 2)) {
442 // is -infinity
443 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
444 sdst.setBit(lane, 1);
445 continue;
446 }
447 }
448 if (bits(src1[lane], 3)) {
449 // is -normal
450 if (std::isnormal(src0[lane])
451 && std::signbit(src0[lane])) {
452 sdst.setBit(lane, 1);
453 continue;
454 }
455 }
456 if (bits(src1[lane], 4)) {
457 // is -denormal
458 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
459 && std::signbit(src0[lane])) {
460 sdst.setBit(lane, 1);
461 continue;
462 }
463 }
464 if (bits(src1[lane], 5)) {
465 // is -zero
466 if (std::fpclassify(src0[lane]) == FP_ZERO
467 && std::signbit(src0[lane])) {
468 sdst.setBit(lane, 1);
469 continue;
470 }
471 }
472 if (bits(src1[lane], 6)) {
473 // is +zero
474 if (std::fpclassify(src0[lane]) == FP_ZERO
475 && !std::signbit(src0[lane])) {
476 sdst.setBit(lane, 1);
477 continue;
478 }
479 }
480 if (bits(src1[lane], 7)) {
481 // is +denormal
482 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
483 && !std::signbit(src0[lane])) {
484 sdst.setBit(lane, 1);
485 continue;
486 }
487 }
488 if (bits(src1[lane], 8)) {
489 // is +normal
490 if (std::isnormal(src0[lane])
491 && !std::signbit(src0[lane])) {
492 sdst.setBit(lane, 1);
493 continue;
494 }
495 }
496 if (bits(src1[lane], 9)) {
497 // is +infinity
498 if (std::isinf(src0[lane])
499 && !std::signbit(src0[lane])) {
500 sdst.setBit(lane, 1);
501 continue;
502 }
503 }
504 }
505 }
506
507 wf->execMask() = sdst.rawData();
508 sdst.write();
509 } // execute
510 // --- Inst_VOP3__V_CMP_CLASS_F16 class methods ---
511
513 InFmt_VOP3A *iFmt)
514 : Inst_VOP3A(iFmt, "v_cmp_class_f16", true)
515 {
516 setFlag(ALU);
517 setFlag(F16);
518 } // Inst_VOP3__V_CMP_CLASS_F16
519
521 {
522 } // ~Inst_VOP3__V_CMP_CLASS_F16
523
524 // --- description from .arch file ---
525 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
526 // The function reports true if the floating point value is *any* of the
527 // --- numeric types selected in S1.u according to the following list:
528 // S1.u[0] -- value is a signaling NaN.
529 // S1.u[1] -- value is a quiet NaN.
530 // S1.u[2] -- value is negative infinity.
531 // S1.u[3] -- value is a negative normal value.
532 // S1.u[4] -- value is a negative denormal value.
533 // S1.u[5] -- value is negative zero.
534 // S1.u[6] -- value is positive zero.
535 // S1.u[7] -- value is a positive denormal value.
536 // S1.u[8] -- value is a positive normal value.
537 // S1.u[9] -- value is positive infinity.
538 void
543 // --- Inst_VOP3__V_CMPX_CLASS_F16 class methods ---
544
546 InFmt_VOP3A *iFmt)
547 : Inst_VOP3A(iFmt, "v_cmpx_class_f16", true)
548 {
549 setFlag(ALU);
550 setFlag(F16);
551 setFlag(WritesEXEC);
552 } // Inst_VOP3__V_CMPX_CLASS_F16
553
555 {
556 } // ~Inst_VOP3__V_CMPX_CLASS_F16
557
558 // --- description from .arch file ---
559 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
560 // --- S0.f16
561 // The function reports true if the floating point value is *any* of the
562 // --- numeric types selected in S1.u according to the following list:
563 // S1.u[0] -- value is a signaling NaN.
564 // S1.u[1] -- value is a quiet NaN.
565 // S1.u[2] -- value is negative infinity.
566 // S1.u[3] -- value is a negative normal value.
567 // S1.u[4] -- value is a negative denormal value.
568 // S1.u[5] -- value is negative zero.
569 // S1.u[6] -- value is positive zero.
570 // S1.u[7] -- value is a positive denormal value.
571 // S1.u[8] -- value is a positive normal value.
572 // S1.u[9] -- value is positive infinity.
573 void
578 // --- Inst_VOP3__V_CMP_F_F16 class methods ---
579
581 : Inst_VOP3A(iFmt, "v_cmp_f_f16", true)
582 {
583 setFlag(ALU);
584 setFlag(F16);
585 } // Inst_VOP3__V_CMP_F_F16
586
588 {
589 } // ~Inst_VOP3__V_CMP_F_F16
590
591 // --- description from .arch file ---
592 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
593 void
595 {
597 } // execute
598 // --- Inst_VOP3__V_CMP_LT_F16 class methods ---
599
601 InFmt_VOP3A *iFmt)
602 : Inst_VOP3A(iFmt, "v_cmp_lt_f16", true)
603 {
604 setFlag(ALU);
605 setFlag(F16);
606 } // Inst_VOP3__V_CMP_LT_F16
607
609 {
610 } // ~Inst_VOP3__V_CMP_LT_F16
611
612 // --- description from .arch file ---
613 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
614 void
616 {
618 } // execute
619 // --- Inst_VOP3__V_CMP_EQ_F16 class methods ---
620
622 InFmt_VOP3A *iFmt)
623 : Inst_VOP3A(iFmt, "v_cmp_eq_f16", true)
624 {
625 setFlag(ALU);
626 setFlag(F16);
627 } // Inst_VOP3__V_CMP_EQ_F16
628
630 {
631 } // ~Inst_VOP3__V_CMP_EQ_F16
632
633 // --- description from .arch file ---
634 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
635 void
637 {
639 } // execute
640 // --- Inst_VOP3__V_CMP_LE_F16 class methods ---
641
643 InFmt_VOP3A *iFmt)
644 : Inst_VOP3A(iFmt, "v_cmp_le_f16", true)
645 {
646 setFlag(ALU);
647 setFlag(F16);
648 } // Inst_VOP3__V_CMP_LE_F16
649
651 {
652 } // ~Inst_VOP3__V_CMP_LE_F16
653
654 // --- description from .arch file ---
655 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
656 void
658 {
660 } // execute
661 // --- Inst_VOP3__V_CMP_GT_F16 class methods ---
662
664 InFmt_VOP3A *iFmt)
665 : Inst_VOP3A(iFmt, "v_cmp_gt_f16", true)
666 {
667 setFlag(ALU);
668 setFlag(F16);
669 } // Inst_VOP3__V_CMP_GT_F16
670
672 {
673 } // ~Inst_VOP3__V_CMP_GT_F16
674
675 // --- description from .arch file ---
676 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
677 void
679 {
681 } // execute
682 // --- Inst_VOP3__V_CMP_LG_F16 class methods ---
683
685 InFmt_VOP3A *iFmt)
686 : Inst_VOP3A(iFmt, "v_cmp_lg_f16", true)
687 {
688 setFlag(ALU);
689 setFlag(F16);
690 } // Inst_VOP3__V_CMP_LG_F16
691
693 {
694 } // ~Inst_VOP3__V_CMP_LG_F16
695
696 // --- description from .arch file ---
697 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
698 void
700 {
702 } // execute
703 // --- Inst_VOP3__V_CMP_GE_F16 class methods ---
704
706 InFmt_VOP3A *iFmt)
707 : Inst_VOP3A(iFmt, "v_cmp_ge_f16", true)
708 {
709 setFlag(ALU);
710 setFlag(F16);
711 } // Inst_VOP3__V_CMP_GE_F16
712
714 {
715 } // ~Inst_VOP3__V_CMP_GE_F16
716
717 // --- description from .arch file ---
718 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
719 void
721 {
723 } // execute
724 // --- Inst_VOP3__V_CMP_O_F16 class methods ---
725
727 : Inst_VOP3A(iFmt, "v_cmp_o_f16", true)
728 {
729 setFlag(ALU);
730 setFlag(F16);
731 } // Inst_VOP3__V_CMP_O_F16
732
734 {
735 } // ~Inst_VOP3__V_CMP_O_F16
736
737 // --- description from .arch file ---
738 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
739 void
741 {
743 } // execute
744 // --- Inst_VOP3__V_CMP_U_F16 class methods ---
745
747 : Inst_VOP3A(iFmt, "v_cmp_u_f16", true)
748 {
749 setFlag(ALU);
750 setFlag(F16);
751 } // Inst_VOP3__V_CMP_U_F16
752
754 {
755 } // ~Inst_VOP3__V_CMP_U_F16
756
757 // --- description from .arch file ---
758 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
759 void
761 {
763 } // execute
764 // --- Inst_VOP3__V_CMP_NGE_F16 class methods ---
765
767 InFmt_VOP3A *iFmt)
768 : Inst_VOP3A(iFmt, "v_cmp_nge_f16", true)
769 {
770 setFlag(ALU);
771 setFlag(F16);
772 } // Inst_VOP3__V_CMP_NGE_F16
773
775 {
776 } // ~Inst_VOP3__V_CMP_NGE_F16
777
778 // --- description from .arch file ---
779 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
780 void
782 {
784 } // execute
785 // --- Inst_VOP3__V_CMP_NLG_F16 class methods ---
786
788 InFmt_VOP3A *iFmt)
789 : Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true)
790 {
791 setFlag(ALU);
792 setFlag(F16);
793 } // Inst_VOP3__V_CMP_NLG_F16
794
796 {
797 } // ~Inst_VOP3__V_CMP_NLG_F16
798
799 // --- description from .arch file ---
800 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
801 void
803 {
805 } // execute
806 // --- Inst_VOP3__V_CMP_NGT_F16 class methods ---
807
809 InFmt_VOP3A *iFmt)
810 : Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true)
811 {
812 setFlag(ALU);
813 setFlag(F16);
814 } // Inst_VOP3__V_CMP_NGT_F16
815
817 {
818 } // ~Inst_VOP3__V_CMP_NGT_F16
819
820 // --- description from .arch file ---
821 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
822 void
824 {
826 } // execute
827 // --- Inst_VOP3__V_CMP_NLE_F16 class methods ---
828
830 InFmt_VOP3A *iFmt)
831 : Inst_VOP3A(iFmt, "v_cmp_nle_f16", true)
832 {
833 setFlag(ALU);
834 setFlag(F16);
835 } // Inst_VOP3__V_CMP_NLE_F16
836
838 {
839 } // ~Inst_VOP3__V_CMP_NLE_F16
840
841 // --- description from .arch file ---
842 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
843 void
845 {
847 } // execute
848 // --- Inst_VOP3__V_CMP_NEQ_F16 class methods ---
849
851 InFmt_VOP3A *iFmt)
852 : Inst_VOP3A(iFmt, "v_cmp_neq_f16", true)
853 {
854 setFlag(ALU);
855 setFlag(F16);
856 } // Inst_VOP3__V_CMP_NEQ_F16
857
859 {
860 } // ~Inst_VOP3__V_CMP_NEQ_F16
861
862 // --- description from .arch file ---
863 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
864 void
866 {
868 } // execute
869 // --- Inst_VOP3__V_CMP_NLT_F16 class methods ---
870
872 InFmt_VOP3A *iFmt)
873 : Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true)
874 {
875 setFlag(ALU);
876 setFlag(F16);
877 } // Inst_VOP3__V_CMP_NLT_F16
878
880 {
881 } // ~Inst_VOP3__V_CMP_NLT_F16
882
883 // --- description from .arch file ---
884 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
885 void
887 {
889 } // execute
890 // --- Inst_VOP3__V_CMP_TRU_F16 class methods ---
891
893 InFmt_VOP3A *iFmt)
894 : Inst_VOP3A(iFmt, "v_cmp_tru_f16", true)
895 {
896 setFlag(ALU);
897 setFlag(F16);
898 } // Inst_VOP3__V_CMP_TRU_F16
899
901 {
902 } // ~Inst_VOP3__V_CMP_TRU_F16
903
904 // --- description from .arch file ---
905 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
906 void
908 {
909 Wavefront *wf = gpuDynInst->wavefront();
910 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
911
912 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
913 if (wf->execMask(lane)) {
914 sdst.setBit(lane, 1);
915 }
916 }
917
918 sdst.write();
919 } // execute
920 // --- Inst_VOP3__V_CMPX_F_F16 class methods ---
921
923 InFmt_VOP3A *iFmt)
924 : Inst_VOP3A(iFmt, "v_cmpx_f_f16", true)
925 {
926 setFlag(ALU);
927 setFlag(WritesEXEC);
928 } // Inst_VOP3__V_CMPX_F_F16
929
931 {
932 } // ~Inst_VOP3__V_CMPX_F_F16
933
934 // --- description from .arch file ---
935 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
936 void
938 {
939 Wavefront *wf = gpuDynInst->wavefront();
940 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
941
942 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
943 if (wf->execMask(lane)) {
944 sdst.setBit(lane, 0);
945 }
946 }
947
948 wf->execMask() = sdst.rawData();
949 sdst.write();
950 } // execute
951 // --- Inst_VOP3__V_CMPX_LT_F16 class methods ---
952
954 InFmt_VOP3A *iFmt)
955 : Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true)
956 {
957 setFlag(ALU);
958 setFlag(F16);
959 setFlag(WritesEXEC);
960 } // Inst_VOP3__V_CMPX_LT_F16
961
963 {
964 } // ~Inst_VOP3__V_CMPX_LT_F16
965
966 // --- description from .arch file ---
967 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
968 void
970 {
972 } // execute
973 // --- Inst_VOP3__V_CMPX_EQ_F16 class methods ---
974
976 InFmt_VOP3A *iFmt)
977 : Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true)
978 {
979 setFlag(ALU);
980 setFlag(F16);
981 setFlag(WritesEXEC);
982 } // Inst_VOP3__V_CMPX_EQ_F16
983
985 {
986 } // ~Inst_VOP3__V_CMPX_EQ_F16
987
988 // --- description from .arch file ---
989 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
990 void
992 {
994 } // execute
995 // --- Inst_VOP3__V_CMPX_LE_F16 class methods ---
996
998 InFmt_VOP3A *iFmt)
999 : Inst_VOP3A(iFmt, "v_cmpx_le_f16", true)
1000 {
1001 setFlag(ALU);
1002 setFlag(F16);
1003 setFlag(WritesEXEC);
1004 } // Inst_VOP3__V_CMPX_LE_F16
1005
1007 {
1008 } // ~Inst_VOP3__V_CMPX_LE_F16
1009
1010 // --- description from .arch file ---
1011 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
1012 void
1014 {
1016 } // execute
1017 // --- Inst_VOP3__V_CMPX_GT_F16 class methods ---
1018
1020 InFmt_VOP3A *iFmt)
1021 : Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true)
1022 {
1023 setFlag(ALU);
1024 setFlag(F16);
1025 setFlag(WritesEXEC);
1026 } // Inst_VOP3__V_CMPX_GT_F16
1027
1029 {
1030 } // ~Inst_VOP3__V_CMPX_GT_F16
1031
1032 // --- description from .arch file ---
1033 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
1034 void
1036 {
1038 } // execute
1039 // --- Inst_VOP3__V_CMPX_LG_F16 class methods ---
1040
1042 InFmt_VOP3A *iFmt)
1043 : Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true)
1044 {
1045 setFlag(ALU);
1046 setFlag(F16);
1047 setFlag(WritesEXEC);
1048 } // Inst_VOP3__V_CMPX_LG_F16
1049
1051 {
1052 } // ~Inst_VOP3__V_CMPX_LG_F16
1053
1054 // --- description from .arch file ---
1055 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
1056 void
1058 {
1060 } // execute
1061 // --- Inst_VOP3__V_CMPX_GE_F16 class methods ---
1062
1064 InFmt_VOP3A *iFmt)
1065 : Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true)
1066 {
1067 setFlag(ALU);
1068 setFlag(F16);
1069 setFlag(WritesEXEC);
1070 } // Inst_VOP3__V_CMPX_GE_F16
1071
1073 {
1074 } // ~Inst_VOP3__V_CMPX_GE_F16
1075
1076 // --- description from .arch file ---
1077 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
1078 void
1080 {
1082 } // execute
1083 // --- Inst_VOP3__V_CMPX_O_F16 class methods ---
1084
1086 InFmt_VOP3A *iFmt)
1087 : Inst_VOP3A(iFmt, "v_cmpx_o_f16", true)
1088 {
1089 setFlag(ALU);
1090 setFlag(F16);
1091 setFlag(WritesEXEC);
1092 } // Inst_VOP3__V_CMPX_O_F16
1093
1095 {
1096 } // ~Inst_VOP3__V_CMPX_O_F16
1097
1098 // --- description from .arch file ---
1099 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
1100 // encoding.
1101 void
1103 {
1105 } // execute
1106 // --- Inst_VOP3__V_CMPX_U_F16 class methods ---
1107
1109 InFmt_VOP3A *iFmt)
1110 : Inst_VOP3A(iFmt, "v_cmpx_u_f16", true)
1111 {
1112 setFlag(ALU);
1113 setFlag(F16);
1114 setFlag(WritesEXEC);
1115 } // Inst_VOP3__V_CMPX_U_F16
1116
1118 {
1119 } // ~Inst_VOP3__V_CMPX_U_F16
1120
1121 // --- description from .arch file ---
1122 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
1123 // encoding.
1124 void
1126 {
1128 } // execute
1129 // --- Inst_VOP3__V_CMPX_NGE_F16 class methods ---
1130
1132 InFmt_VOP3A *iFmt)
1133 : Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true)
1134 {
1135 setFlag(ALU);
1136 setFlag(F16);
1137 setFlag(WritesEXEC);
1138 } // Inst_VOP3__V_CMPX_NGE_F16
1139
1141 {
1142 } // ~Inst_VOP3__V_CMPX_NGE_F16
1143
1144 // --- description from .arch file ---
1145 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
1146 void
1148 {
1150 } // execute
1151 // --- Inst_VOP3__V_CMPX_NLG_F16 class methods ---
1152
1154 InFmt_VOP3A *iFmt)
1155 : Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true)
1156 {
1157 setFlag(ALU);
1158 setFlag(F16);
1159 setFlag(WritesEXEC);
1160 } // Inst_VOP3__V_CMPX_NLG_F16
1161
1163 {
1164 } // ~Inst_VOP3__V_CMPX_NLG_F16
1165
1166 // --- description from .arch file ---
1167 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
1168 void
1170 {
1172 } // execute
1173 // --- Inst_VOP3__V_CMPX_NGT_F16 class methods ---
1174
1176 InFmt_VOP3A *iFmt)
1177 : Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true)
1178 {
1179 setFlag(ALU);
1180 setFlag(F16);
1181 setFlag(WritesEXEC);
1182 } // Inst_VOP3__V_CMPX_NGT_F16
1183
1185 {
1186 } // ~Inst_VOP3__V_CMPX_NGT_F16
1187
1188 // --- description from .arch file ---
1189 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
1190 void
1192 {
1194 } // execute
1195 // --- Inst_VOP3__V_CMPX_NLE_F16 class methods ---
1196
1198 InFmt_VOP3A *iFmt)
1199 : Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true)
1200 {
1201 setFlag(ALU);
1202 setFlag(F16);
1203 setFlag(WritesEXEC);
1204 } // Inst_VOP3__V_CMPX_NLE_F16
1205
1207 {
1208 } // ~Inst_VOP3__V_CMPX_NLE_F16
1209
1210 // --- description from .arch file ---
1211 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
1212 void
1214 {
1216 } // execute
1217 // --- Inst_VOP3__V_CMPX_NEQ_F16 class methods ---
1218
1220 InFmt_VOP3A *iFmt)
1221 : Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true)
1222 {
1223 setFlag(ALU);
1224 setFlag(F16);
1225 setFlag(WritesEXEC);
1226 } // Inst_VOP3__V_CMPX_NEQ_F16
1227
1229 {
1230 } // ~Inst_VOP3__V_CMPX_NEQ_F16
1231
1232 // --- description from .arch file ---
1233 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
1234 void
1236 {
1238 } // execute
1239 // --- Inst_VOP3__V_CMPX_NLT_F16 class methods ---
1240
1242 InFmt_VOP3A *iFmt)
1243 : Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true)
1244 {
1245 setFlag(ALU);
1246 setFlag(F16);
1247 setFlag(WritesEXEC);
1248 } // Inst_VOP3__V_CMPX_NLT_F16
1249
1251 {
1252 } // ~Inst_VOP3__V_CMPX_NLT_F16
1253
1254 // --- description from .arch file ---
1255 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
1256 void
1258 {
1260 } // execute
1261 // --- Inst_VOP3__V_CMPX_TRU_F16 class methods ---
1262
1264 InFmt_VOP3A *iFmt)
1265 : Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true)
1266 {
1267 setFlag(ALU);
1268 setFlag(F16);
1269 setFlag(WritesEXEC);
1270 } // Inst_VOP3__V_CMPX_TRU_F16
1271
1273 {
1274 } // ~Inst_VOP3__V_CMPX_TRU_F16
1275
1276 // --- description from .arch file ---
1277 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
1278 void
1280 {
1281 Wavefront *wf = gpuDynInst->wavefront();
1282 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1283
1284 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1285 if (wf->execMask(lane)) {
1286 sdst.setBit(lane, 1);
1287 }
1288 }
1289
1290 wf->execMask() = sdst.rawData();
1291 sdst.write();
1292 } // execute
1293 // --- Inst_VOP3__V_CMP_F_F32 class methods ---
1294
1296 : Inst_VOP3A(iFmt, "v_cmp_f_f32", true)
1297 {
1298 setFlag(ALU);
1299 setFlag(F32);
1300 } // Inst_VOP3__V_CMP_F_F32
1301
1303 {
1304 } // ~Inst_VOP3__V_CMP_F_F32
1305
1306 // --- description from .arch file ---
1307 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
1308 void
1310 {
1311 Wavefront *wf = gpuDynInst->wavefront();
1312 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1313
1314 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1315 if (wf->execMask(lane)) {
1316 sdst.setBit(lane, 0);
1317 }
1318 }
1319
1320 sdst.write();
1321 } // execute
1322 // --- Inst_VOP3__V_CMP_LT_F32 class methods ---
1323
1325 InFmt_VOP3A *iFmt)
1326 : Inst_VOP3A(iFmt, "v_cmp_lt_f32", true)
1327 {
1328 setFlag(ALU);
1329 setFlag(F32);
1330 } // Inst_VOP3__V_CMP_LT_F32
1331
1333 {
1334 } // ~Inst_VOP3__V_CMP_LT_F32
1335
1336 // --- description from .arch file ---
1337 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
1338 void
1340 {
1341 Wavefront *wf = gpuDynInst->wavefront();
1342 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1343 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1344 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1345
1346 src0.readSrc();
1347 src1.readSrc();
1348
1349 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1350 if (wf->execMask(lane)) {
1351 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
1352 }
1353 }
1354
1355 sdst.write();
1356 } // execute
1357 // --- Inst_VOP3__V_CMP_EQ_F32 class methods ---
1358
1360 InFmt_VOP3A *iFmt)
1361 : Inst_VOP3A(iFmt, "v_cmp_eq_f32", true)
1362 {
1363 setFlag(ALU);
1364 setFlag(F32);
1365 } // Inst_VOP3__V_CMP_EQ_F32
1366
1368 {
1369 } // ~Inst_VOP3__V_CMP_EQ_F32
1370
1371 // --- description from .arch file ---
1372 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
1373 void
1375 {
1376 Wavefront *wf = gpuDynInst->wavefront();
1377 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1378 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1379 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1380
1381 src0.readSrc();
1382 src1.readSrc();
1383
1384 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1385 if (wf->execMask(lane)) {
1386 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
1387 }
1388 }
1389
1390 sdst.write();
1391 } // execute
1392 // --- Inst_VOP3__V_CMP_LE_F32 class methods ---
1393
1395 InFmt_VOP3A *iFmt)
1396 : Inst_VOP3A(iFmt, "v_cmp_le_f32", true)
1397 {
1398 setFlag(ALU);
1399 setFlag(F32);
1400 } // Inst_VOP3__V_CMP_LE_F32
1401
1403 {
1404 } // ~Inst_VOP3__V_CMP_LE_F32
1405
1406 // --- description from .arch file ---
1407 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
1408 void
1410 {
1411 Wavefront *wf = gpuDynInst->wavefront();
1412 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1413 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1414 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1415
1416 src0.readSrc();
1417 src1.readSrc();
1418
1419 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1420 if (wf->execMask(lane)) {
1421 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
1422 }
1423 }
1424
1425 sdst.write();
1426 } // execute
1427 // --- Inst_VOP3__V_CMP_GT_F32 class methods ---
1428
1430 InFmt_VOP3A *iFmt)
1431 : Inst_VOP3A(iFmt, "v_cmp_gt_f32", true)
1432 {
1433 setFlag(ALU);
1434 setFlag(F32);
1435 } // Inst_VOP3__V_CMP_GT_F32
1436
1438 {
1439 } // ~Inst_VOP3__V_CMP_GT_F32
1440
1441 // --- description from .arch file ---
1442 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
1443 void
1445 {
1446 Wavefront *wf = gpuDynInst->wavefront();
1447 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1448 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1449 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1450
1451 src0.readSrc();
1452 src1.readSrc();
1453
1454 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1455 if (wf->execMask(lane)) {
1456 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1457 }
1458 }
1459
1460 sdst.write();
1461 } // execute
1462 // --- Inst_VOP3__V_CMP_LG_F32 class methods ---
1463
1465 InFmt_VOP3A *iFmt)
1466 : Inst_VOP3A(iFmt, "v_cmp_lg_f32", true)
1467 {
1468 setFlag(ALU);
1469 setFlag(F32);
1470 } // Inst_VOP3__V_CMP_LG_F32
1471
1473 {
1474 } // ~Inst_VOP3__V_CMP_LG_F32
1475
1476 // --- description from .arch file ---
1477 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
1478 void
1480 {
1481 Wavefront *wf = gpuDynInst->wavefront();
1482 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1483 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1484 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1485
1486 src0.readSrc();
1487 src1.readSrc();
1488
1489 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1490 if (wf->execMask(lane)) {
1491 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
1492 }
1493 }
1494
1495 sdst.write();
1496 } // execute
1497 // --- Inst_VOP3__V_CMP_GE_F32 class methods ---
1498
1500 InFmt_VOP3A *iFmt)
1501 : Inst_VOP3A(iFmt, "v_cmp_ge_f32", true)
1502 {
1503 setFlag(ALU);
1504 setFlag(F32);
1505 } // Inst_VOP3__V_CMP_GE_F32
1506
1508 {
1509 } // ~Inst_VOP3__V_CMP_GE_F32
1510
1511 // --- description from .arch file ---
1512 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
1513 void
1515 {
1516 Wavefront *wf = gpuDynInst->wavefront();
1517 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1518 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1519 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1520
1521 src0.readSrc();
1522 src1.readSrc();
1523
1524 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1525 if (wf->execMask(lane)) {
1526 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
1527 }
1528 }
1529
1530 sdst.write();
1531 } // execute
1532 // --- Inst_VOP3__V_CMP_O_F32 class methods ---
1533
1535 : Inst_VOP3A(iFmt, "v_cmp_o_f32", true)
1536 {
1537 setFlag(ALU);
1538 setFlag(F32);
1539 } // Inst_VOP3__V_CMP_O_F32
1540
1542 {
1543 } // ~Inst_VOP3__V_CMP_O_F32
1544
1545 // --- description from .arch file ---
1546 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
1547 void
1549 {
1550 Wavefront *wf = gpuDynInst->wavefront();
1551 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1552 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1553 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1554
1555 src0.readSrc();
1556 src1.readSrc();
1557
1558 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1559 if (wf->execMask(lane)) {
1560 sdst.setBit(lane, (!std::isnan(src0[lane])
1561 && !std::isnan(src1[lane])) ? 1 : 0);
1562 }
1563 }
1564
1565 sdst.write();
1566 } // execute
1567 // --- Inst_VOP3__V_CMP_U_F32 class methods ---
1568
1570 : Inst_VOP3A(iFmt, "v_cmp_u_f32", true)
1571 {
1572 setFlag(ALU);
1573 setFlag(F32);
1574 } // Inst_VOP3__V_CMP_U_F32
1575
1577 {
1578 } // ~Inst_VOP3__V_CMP_U_F32
1579
1580 // --- description from .arch file ---
1581 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
1582 void
1584 {
1585 Wavefront *wf = gpuDynInst->wavefront();
1586 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1587 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1588 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1589
1590 src0.readSrc();
1591 src1.readSrc();
1592
1593 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1594 if (wf->execMask(lane)) {
1595 sdst.setBit(lane, (std::isnan(src0[lane])
1596 || std::isnan(src1[lane])) ? 1 : 0);
1597 }
1598 }
1599
1600 sdst.write();
1601 } // execute
1602 // --- Inst_VOP3__V_CMP_NGE_F32 class methods ---
1603
1605 InFmt_VOP3A *iFmt)
1606 : Inst_VOP3A(iFmt, "v_cmp_nge_f32", true)
1607 {
1608 setFlag(ALU);
1609 setFlag(F32);
1610 } // Inst_VOP3__V_CMP_NGE_F32
1611
1613 {
1614 } // ~Inst_VOP3__V_CMP_NGE_F32
1615
1616 // --- description from .arch file ---
1617 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
1618 void
1620 {
1621 Wavefront *wf = gpuDynInst->wavefront();
1622 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1623 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1624 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1625
1626 src0.readSrc();
1627 src1.readSrc();
1628
1629 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1630 if (wf->execMask(lane)) {
1631 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
1632 }
1633 }
1634
1635 sdst.write();
1636 } // execute
1637 // --- Inst_VOP3__V_CMP_NLG_F32 class methods ---
1638
1640 InFmt_VOP3A *iFmt)
1641 : Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true)
1642 {
1643 setFlag(ALU);
1644 setFlag(F32);
1645 } // Inst_VOP3__V_CMP_NLG_F32
1646
1648 {
1649 } // ~Inst_VOP3__V_CMP_NLG_F32
1650
1651 // --- description from .arch file ---
1652 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
1653 void
1655 {
1656 Wavefront *wf = gpuDynInst->wavefront();
1657 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1658 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1659 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1660
1661 src0.readSrc();
1662 src1.readSrc();
1663
1664 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1665 if (wf->execMask(lane)) {
1666 sdst.setBit(lane, !(src0[lane] < src1[lane]
1667 || src0[lane] > src1[lane]) ? 1 : 0);
1668 }
1669 }
1670
1671 sdst.write();
1672 } // execute
1673 // --- Inst_VOP3__V_CMP_NGT_F32 class methods ---
1674
1676 InFmt_VOP3A *iFmt)
1677 : Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true)
1678 {
1679 setFlag(ALU);
1680 setFlag(F32);
1681 } // Inst_VOP3__V_CMP_NGT_F32
1682
1684 {
1685 } // ~Inst_VOP3__V_CMP_NGT_F32
1686
1687 // --- description from .arch file ---
1688 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
1689 void
1691 {
1692 Wavefront *wf = gpuDynInst->wavefront();
1693 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1694 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1695 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1696
1697 src0.readSrc();
1698 src1.readSrc();
1699
1700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1701 if (wf->execMask(lane)) {
1702 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
1703 }
1704 }
1705
1706 sdst.write();
1707 } // execute
1708 // --- Inst_VOP3__V_CMP_NLE_F32 class methods ---
1709
1711 InFmt_VOP3A *iFmt)
1712 : Inst_VOP3A(iFmt, "v_cmp_nle_f32", true)
1713 {
1714 setFlag(ALU);
1715 setFlag(F32);
1716 } // Inst_VOP3__V_CMP_NLE_F32
1717
1719 {
1720 } // ~Inst_VOP3__V_CMP_NLE_F32
1721
1722 // --- description from .arch file ---
1723 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
1724 void
1726 {
1727 Wavefront *wf = gpuDynInst->wavefront();
1728 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1729 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1730 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1731
1732 src0.readSrc();
1733 src1.readSrc();
1734
1735 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1736 if (wf->execMask(lane)) {
1737 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
1738 }
1739 }
1740
1741 sdst.write();
1742 } // execute
1743 // --- Inst_VOP3__V_CMP_NEQ_F32 class methods ---
1744
1746 InFmt_VOP3A *iFmt)
1747 : Inst_VOP3A(iFmt, "v_cmp_neq_f32", true)
1748 {
1749 setFlag(ALU);
1750 setFlag(F32);
1751 } // Inst_VOP3__V_CMP_NEQ_F32
1752
1754 {
1755 } // ~Inst_VOP3__V_CMP_NEQ_F32
1756
1757 // --- description from .arch file ---
1758 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
1759 void
1761 {
1762 Wavefront *wf = gpuDynInst->wavefront();
1763 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1764 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1765 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1766
1767 src0.readSrc();
1768 src1.readSrc();
1769
1770 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1771 if (wf->execMask(lane)) {
1772 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
1773 }
1774 }
1775
1776 sdst.write();
1777 } // execute
1778 // --- Inst_VOP3__V_CMP_NLT_F32 class methods ---
1779
1781 InFmt_VOP3A *iFmt)
1782 : Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true)
1783 {
1784 setFlag(ALU);
1785 setFlag(F32);
1786 } // Inst_VOP3__V_CMP_NLT_F32
1787
1789 {
1790 } // ~Inst_VOP3__V_CMP_NLT_F32
1791
1792 // --- description from .arch file ---
1793 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
1794 void
1796 {
1797 Wavefront *wf = gpuDynInst->wavefront();
1798 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1799 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1800 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1801
1802 src0.readSrc();
1803 src1.readSrc();
1804
1805 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1806 if (wf->execMask(lane)) {
1807 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
1808 }
1809 }
1810
1811 sdst.write();
1812 } // execute
1813 // --- Inst_VOP3__V_CMP_TRU_F32 class methods ---
1814
1816 InFmt_VOP3A *iFmt)
1817 : Inst_VOP3A(iFmt, "v_cmp_tru_f32", true)
1818 {
1819 setFlag(ALU);
1820 setFlag(F32);
1821 } // Inst_VOP3__V_CMP_TRU_F32
1822
1824 {
1825 } // ~Inst_VOP3__V_CMP_TRU_F32
1826
1827 // --- description from .arch file ---
1828 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
1829 void
1831 {
1832 Wavefront *wf = gpuDynInst->wavefront();
1833 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1834
1835 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1836 if (wf->execMask(lane)) {
1837 sdst.setBit(lane, 1);
1838 }
1839 }
1840
1841 sdst.write();
1842 } // execute
1843 // --- Inst_VOP3__V_CMPX_F_F32 class methods ---
1844
1846 InFmt_VOP3A *iFmt)
1847 : Inst_VOP3A(iFmt, "v_cmpx_f_f32", true)
1848 {
1849 setFlag(ALU);
1850 setFlag(F32);
1851 setFlag(WritesEXEC);
1852 } // Inst_VOP3__V_CMPX_F_F32
1853
1855 {
1856 } // ~Inst_VOP3__V_CMPX_F_F32
1857
1858 // --- description from .arch file ---
1859 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
1860 void
1862 {
1863 Wavefront *wf = gpuDynInst->wavefront();
1864 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1865
1866 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1867 if (wf->execMask(lane)) {
1868 sdst.setBit(lane, 0);
1869 }
1870 }
1871
1872 wf->execMask() = sdst.rawData();
1873 sdst.write();
1874 } // execute
1875 // --- Inst_VOP3__V_CMPX_LT_F32 class methods ---
1876
1878 InFmt_VOP3A *iFmt)
1879 : Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true)
1880 {
1881 setFlag(ALU);
1882 setFlag(F32);
1883 setFlag(WritesEXEC);
1884 } // Inst_VOP3__V_CMPX_LT_F32
1885
1887 {
1888 } // ~Inst_VOP3__V_CMPX_LT_F32
1889
1890 // --- description from .arch file ---
1891 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
1892 void
1894 {
1895 Wavefront *wf = gpuDynInst->wavefront();
1896 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1897 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1898 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1899
1900 src0.readSrc();
1901 src1.readSrc();
1902
1903 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1904 if (wf->execMask(lane)) {
1905 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
1906 }
1907 }
1908
1909 wf->execMask() = sdst.rawData();
1910 sdst.write();
1911 } // execute
1912 // --- Inst_VOP3__V_CMPX_EQ_F32 class methods ---
1913
1915 InFmt_VOP3A *iFmt)
1916 : Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true)
1917 {
1918 setFlag(ALU);
1919 setFlag(F32);
1920 setFlag(WritesEXEC);
1921 } // Inst_VOP3__V_CMPX_EQ_F32
1922
1924 {
1925 } // ~Inst_VOP3__V_CMPX_EQ_F32
1926
1927 // --- description from .arch file ---
1928 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
1929 void
1931 {
1932 Wavefront *wf = gpuDynInst->wavefront();
1933 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1934 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1935 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1936
1937 src0.readSrc();
1938 src1.readSrc();
1939
1940 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1941 if (wf->execMask(lane)) {
1942 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
1943 }
1944 }
1945
1946 wf->execMask() = sdst.rawData();
1947 sdst.write();
1948 } // execute
1949 // --- Inst_VOP3__V_CMPX_LE_F32 class methods ---
1950
1952 InFmt_VOP3A *iFmt)
1953 : Inst_VOP3A(iFmt, "v_cmpx_le_f32", true)
1954 {
1955 setFlag(ALU);
1956 setFlag(F32);
1957 setFlag(WritesEXEC);
1958 } // Inst_VOP3__V_CMPX_LE_F32
1959
1961 {
1962 } // ~Inst_VOP3__V_CMPX_LE_F32
1963
1964 // --- description from .arch file ---
1965 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
1966 void
1968 {
1969 Wavefront *wf = gpuDynInst->wavefront();
1970 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
1971 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
1972 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
1973
1974 src0.readSrc();
1975 src1.readSrc();
1976
1977 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
1978 if (wf->execMask(lane)) {
1979 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
1980 }
1981 }
1982
1983 wf->execMask() = sdst.rawData();
1984 sdst.write();
1985 } // execute
1986 // --- Inst_VOP3__V_CMPX_GT_F32 class methods ---
1987
1989 InFmt_VOP3A *iFmt)
1990 : Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true)
1991 {
1992 setFlag(ALU);
1993 setFlag(F32);
1994 setFlag(WritesEXEC);
1995 } // Inst_VOP3__V_CMPX_GT_F32
1996
1998 {
1999 } // ~Inst_VOP3__V_CMPX_GT_F32
2000
2001 // --- description from .arch file ---
2002 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
2003 void
2005 {
2006 Wavefront *wf = gpuDynInst->wavefront();
2007 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2008 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2009 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2010
2011 src0.readSrc();
2012 src1.readSrc();
2013
2014 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2015 if (wf->execMask(lane)) {
2016 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
2017 }
2018 }
2019
2020 wf->execMask() = sdst.rawData();
2021 sdst.write();
2022 } // execute
2023 // --- Inst_VOP3__V_CMPX_LG_F32 class methods ---
2024
2026 InFmt_VOP3A *iFmt)
2027 : Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true)
2028 {
2029 setFlag(ALU);
2030 setFlag(F32);
2031 setFlag(WritesEXEC);
2032 } // Inst_VOP3__V_CMPX_LG_F32
2033
2035 {
2036 } // ~Inst_VOP3__V_CMPX_LG_F32
2037
2038 // --- description from .arch file ---
2039 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
2040 void
2042 {
2043 Wavefront *wf = gpuDynInst->wavefront();
2044 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2045 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2046 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2047
2048 src0.readSrc();
2049 src1.readSrc();
2050
2051 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2052 if (wf->execMask(lane)) {
2053 sdst.setBit(lane, (src0[lane] < src1[lane]
2054 || src0[lane] > src1[lane]) ? 1 : 0);
2055 }
2056 }
2057
2058 wf->execMask() = sdst.rawData();
2059 sdst.write();
2060 } // execute
2061 // --- Inst_VOP3__V_CMPX_GE_F32 class methods ---
2062
2064 InFmt_VOP3A *iFmt)
2065 : Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true)
2066 {
2067 setFlag(ALU);
2068 setFlag(F32);
2069 setFlag(WritesEXEC);
2070 } // Inst_VOP3__V_CMPX_GE_F32
2071
2073 {
2074 } // ~Inst_VOP3__V_CMPX_GE_F32
2075
2076 // --- description from .arch file ---
2077 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
2078 void
2080 {
2081 Wavefront *wf = gpuDynInst->wavefront();
2082 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2083 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2084 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2085
2086 src0.readSrc();
2087 src1.readSrc();
2088
2089 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2090 if (wf->execMask(lane)) {
2091 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
2092 }
2093 }
2094
2095 wf->execMask() = sdst.rawData();
2096 sdst.write();
2097 } // execute
2098 // --- Inst_VOP3__V_CMPX_O_F32 class methods ---
2099
2101 InFmt_VOP3A *iFmt)
2102 : Inst_VOP3A(iFmt, "v_cmpx_o_f32", true)
2103 {
2104 setFlag(ALU);
2105 setFlag(F32);
2106 setFlag(WritesEXEC);
2107 } // Inst_VOP3__V_CMPX_O_F32
2108
2110 {
2111 } // ~Inst_VOP3__V_CMPX_O_F32
2112
2113 // --- description from .arch file ---
2114 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
2115 // encoding.
2116 void
2118 {
2119 Wavefront *wf = gpuDynInst->wavefront();
2120 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2121 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2122 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2123
2124 src0.readSrc();
2125 src1.readSrc();
2126
2127 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2128 if (wf->execMask(lane)) {
2129 sdst.setBit(lane, (!std::isnan(src0[lane])
2130 && !std::isnan(src1[lane])) ? 1 : 0);
2131 }
2132 }
2133
2134 wf->execMask() = sdst.rawData();
2135 sdst.write();
2136 } // execute
2137 // --- Inst_VOP3__V_CMPX_U_F32 class methods ---
2138
2140 InFmt_VOP3A *iFmt)
2141 : Inst_VOP3A(iFmt, "v_cmpx_u_f32", true)
2142 {
2143 setFlag(ALU);
2144 setFlag(F32);
2145 setFlag(WritesEXEC);
2146 } // Inst_VOP3__V_CMPX_U_F32
2147
2149 {
2150 } // ~Inst_VOP3__V_CMPX_U_F32
2151
2152 // --- description from .arch file ---
2153 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
2154 // encoding.
2155 void
2157 {
2158 Wavefront *wf = gpuDynInst->wavefront();
2159 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2160 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2161 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2162
2163 src0.readSrc();
2164 src1.readSrc();
2165
2166 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2167 if (wf->execMask(lane)) {
2168 sdst.setBit(lane, (std::isnan(src0[lane])
2169 || std::isnan(src1[lane])) ? 1 : 0);
2170 }
2171 }
2172
2173 wf->execMask() = sdst.rawData();
2174 sdst.write();
2175 } // execute
2176 // --- Inst_VOP3__V_CMPX_NGE_F32 class methods ---
2177
2179 InFmt_VOP3A *iFmt)
2180 : Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true)
2181 {
2182 setFlag(ALU);
2183 setFlag(F32);
2184 setFlag(WritesEXEC);
2185 } // Inst_VOP3__V_CMPX_NGE_F32
2186
2188 {
2189 } // ~Inst_VOP3__V_CMPX_NGE_F32
2190
2191 // --- description from .arch file ---
2192 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
2193 void
2195 {
2196 Wavefront *wf = gpuDynInst->wavefront();
2197 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2198 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2199 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2200
2201 src0.readSrc();
2202 src1.readSrc();
2203
2204 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2205 if (wf->execMask(lane)) {
2206 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
2207 }
2208 }
2209
2210 wf->execMask() = sdst.rawData();
2211 sdst.write();
2212 } // execute
2213 // --- Inst_VOP3__V_CMPX_NLG_F32 class methods ---
2214
2216 InFmt_VOP3A *iFmt)
2217 : Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true)
2218 {
2219 setFlag(ALU);
2220 setFlag(F32);
2221 setFlag(WritesEXEC);
2222 } // Inst_VOP3__V_CMPX_NLG_F32
2223
2225 {
2226 } // ~Inst_VOP3__V_CMPX_NLG_F32
2227
2228 // --- description from .arch file ---
2229 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
2230 void
2232 {
2233 Wavefront *wf = gpuDynInst->wavefront();
2234 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2235 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2236 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2237
2238 src0.readSrc();
2239 src1.readSrc();
2240
2241 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2242 if (wf->execMask(lane)) {
2243 sdst.setBit(lane, !(src0[lane] < src1[lane]
2244 || src0[lane] > src1[lane]) ? 1 : 0);
2245 }
2246 }
2247
2248 wf->execMask() = sdst.rawData();
2249 sdst.write();
2250 } // execute
2251 // --- Inst_VOP3__V_CMPX_NGT_F32 class methods ---
2252
2254 InFmt_VOP3A *iFmt)
2255 : Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true)
2256 {
2257 setFlag(ALU);
2258 setFlag(F32);
2259 setFlag(WritesEXEC);
2260 } // Inst_VOP3__V_CMPX_NGT_F32
2261
2263 {
2264 } // ~Inst_VOP3__V_CMPX_NGT_F32
2265
2266 // --- description from .arch file ---
2267 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
2268 void
2270 {
2271 Wavefront *wf = gpuDynInst->wavefront();
2272 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2273 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2274 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2275
2276 src0.readSrc();
2277 src1.readSrc();
2278
2279 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2280 if (wf->execMask(lane)) {
2281 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
2282 }
2283 }
2284
2285 wf->execMask() = sdst.rawData();
2286 sdst.write();
2287 } // execute
2288 // --- Inst_VOP3__V_CMPX_NLE_F32 class methods ---
2289
2291 InFmt_VOP3A *iFmt)
2292 : Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true)
2293 {
2294 setFlag(ALU);
2295 setFlag(F32);
2296 setFlag(WritesEXEC);
2297 } // Inst_VOP3__V_CMPX_NLE_F32
2298
2300 {
2301 } // ~Inst_VOP3__V_CMPX_NLE_F32
2302
2303 // --- description from .arch file ---
2304 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
2305 void
2307 {
2308 Wavefront *wf = gpuDynInst->wavefront();
2309 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2310 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2311 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2312
2313 src0.readSrc();
2314 src1.readSrc();
2315
2316 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2317 if (wf->execMask(lane)) {
2318 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
2319 }
2320 }
2321
2322 wf->execMask() = sdst.rawData();
2323 sdst.write();
2324 } // execute
2325 // --- Inst_VOP3__V_CMPX_NEQ_F32 class methods ---
2326
2328 InFmt_VOP3A *iFmt)
2329 : Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true)
2330 {
2331 setFlag(ALU);
2332 setFlag(F32);
2333 setFlag(WritesEXEC);
2334 } // Inst_VOP3__V_CMPX_NEQ_F32
2335
2337 {
2338 } // ~Inst_VOP3__V_CMPX_NEQ_F32
2339
2340 // --- description from .arch file ---
2341 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
2342 void
2344 {
2345 Wavefront *wf = gpuDynInst->wavefront();
2346 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2347 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2348 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2349
2350 src0.readSrc();
2351 src1.readSrc();
2352
2353 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2354 if (wf->execMask(lane)) {
2355 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
2356 }
2357 }
2358
2359 wf->execMask() = sdst.rawData();
2360 sdst.write();
2361 } // execute
2362 // --- Inst_VOP3__V_CMPX_NLT_F32 class methods ---
2363
2365 InFmt_VOP3A *iFmt)
2366 : Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true)
2367 {
2368 setFlag(ALU);
2369 setFlag(F32);
2370 setFlag(WritesEXEC);
2371 } // Inst_VOP3__V_CMPX_NLT_F32
2372
2374 {
2375 } // ~Inst_VOP3__V_CMPX_NLT_F32
2376
2377 // --- description from .arch file ---
2378 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
2379 void
2381 {
2382 Wavefront *wf = gpuDynInst->wavefront();
2383 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
2384 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
2385 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2386
2387 src0.readSrc();
2388 src1.readSrc();
2389
2390 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2391 if (wf->execMask(lane)) {
2392 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
2393 }
2394 }
2395
2396 wf->execMask() = sdst.rawData();
2397 sdst.write();
2398 } // execute
2399 // --- Inst_VOP3__V_CMPX_TRU_F32 class methods ---
2400
2402 InFmt_VOP3A *iFmt)
2403 : Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true)
2404 {
2405 setFlag(ALU);
2406 setFlag(F32);
2407 setFlag(WritesEXEC);
2408 } // Inst_VOP3__V_CMPX_TRU_F32
2409
2411 {
2412 } // ~Inst_VOP3__V_CMPX_TRU_F32
2413
2414 // --- description from .arch file ---
2415 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
2416 void
2418 {
2419 Wavefront *wf = gpuDynInst->wavefront();
2420 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2421
2422 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2423 if (wf->execMask(lane)) {
2424 sdst.setBit(lane, 1);
2425 }
2426 }
2427
2428 wf->execMask() = sdst.rawData();
2429 sdst.write();
2430 } // execute
2431 // --- Inst_VOP3__V_CMP_F_F64 class methods ---
2432
2434 : Inst_VOP3A(iFmt, "v_cmp_f_f64", true)
2435 {
2436 setFlag(ALU);
2437 setFlag(F64);
2438 } // Inst_VOP3__V_CMP_F_F64
2439
2441 {
2442 } // ~Inst_VOP3__V_CMP_F_F64
2443
2444 // --- description from .arch file ---
2445 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
2446 void
2448 {
2449 Wavefront *wf = gpuDynInst->wavefront();
2450 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2451
2452 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2453 if (wf->execMask(lane)) {
2454 sdst.setBit(lane, 0);
2455 }
2456 }
2457
2458 sdst.write();
2459 } // execute
2460 // --- Inst_VOP3__V_CMP_LT_F64 class methods ---
2461
2463 InFmt_VOP3A *iFmt)
2464 : Inst_VOP3A(iFmt, "v_cmp_lt_f64", true)
2465 {
2466 setFlag(ALU);
2467 setFlag(F64);
2468 } // Inst_VOP3__V_CMP_LT_F64
2469
2471 {
2472 } // ~Inst_VOP3__V_CMP_LT_F64
2473
2474 // --- description from .arch file ---
2475 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
2476 void
2478 {
2479 Wavefront *wf = gpuDynInst->wavefront();
2480 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2481 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2482 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2483
2484 src0.readSrc();
2485 src1.readSrc();
2486
2487 if (instData.ABS & 0x1) {
2488 src0.absModifier();
2489 }
2490
2491 if (instData.ABS & 0x2) {
2492 src1.absModifier();
2493 }
2494
2495 if (extData.NEG & 0x1) {
2496 src0.negModifier();
2497 }
2498
2499 if (extData.NEG & 0x2) {
2500 src1.negModifier();
2501 }
2502
2506 assert(!(instData.ABS & 0x4));
2507 assert(!(extData.NEG & 0x4));
2508
2509 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2510 if (wf->execMask(lane)) {
2511 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
2512 }
2513 }
2514
2515 sdst.write();
2516 } // execute
2517 // --- Inst_VOP3__V_CMP_EQ_F64 class methods ---
2518
2520 InFmt_VOP3A *iFmt)
2521 : Inst_VOP3A(iFmt, "v_cmp_eq_f64", true)
2522 {
2523 setFlag(ALU);
2524 setFlag(F64);
2525 } // Inst_VOP3__V_CMP_EQ_F64
2526
2528 {
2529 } // ~Inst_VOP3__V_CMP_EQ_F64
2530
2531 // --- description from .arch file ---
2532 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
2533 void
2535 {
2536 Wavefront *wf = gpuDynInst->wavefront();
2537 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2538 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2539 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2540
2541 src0.readSrc();
2542 src1.readSrc();
2543
2544 if (instData.ABS & 0x1) {
2545 src0.absModifier();
2546 }
2547
2548 if (instData.ABS & 0x2) {
2549 src1.absModifier();
2550 }
2551
2552 if (extData.NEG & 0x1) {
2553 src0.negModifier();
2554 }
2555
2556 if (extData.NEG & 0x2) {
2557 src1.negModifier();
2558 }
2559
2563 assert(!(instData.ABS & 0x4));
2564 assert(!(extData.NEG & 0x4));
2565
2566 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2567 if (wf->execMask(lane)) {
2568 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
2569 }
2570 }
2571
2572 sdst.write();
2573 } // execute
2574 // --- Inst_VOP3__V_CMP_LE_F64 class methods ---
2575
2577 InFmt_VOP3A *iFmt)
2578 : Inst_VOP3A(iFmt, "v_cmp_le_f64", true)
2579 {
2580 setFlag(ALU);
2581 setFlag(F64);
2582 } // Inst_VOP3__V_CMP_LE_F64
2583
2585 {
2586 } // ~Inst_VOP3__V_CMP_LE_F64
2587
2588 // --- description from .arch file ---
2589 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
2590 void
2592 {
2593 Wavefront *wf = gpuDynInst->wavefront();
2594 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2595 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2596 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2597
2598 src0.readSrc();
2599 src1.readSrc();
2600
2601 if (instData.ABS & 0x1) {
2602 src0.absModifier();
2603 }
2604
2605 if (instData.ABS & 0x2) {
2606 src1.absModifier();
2607 }
2608
2609 if (extData.NEG & 0x1) {
2610 src0.negModifier();
2611 }
2612
2613 if (extData.NEG & 0x2) {
2614 src1.negModifier();
2615 }
2616
2620 assert(!(instData.ABS & 0x4));
2621 assert(!(extData.NEG & 0x4));
2622
2623 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2624 if (wf->execMask(lane)) {
2625 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
2626 }
2627 }
2628
2629 sdst.write();
2630 } // execute
2631 // --- Inst_VOP3__V_CMP_GT_F64 class methods ---
2632
2634 InFmt_VOP3A *iFmt)
2635 : Inst_VOP3A(iFmt, "v_cmp_gt_f64", true)
2636 {
2637 setFlag(ALU);
2638 setFlag(F64);
2639 } // Inst_VOP3__V_CMP_GT_F64
2640
2642 {
2643 } // ~Inst_VOP3__V_CMP_GT_F64
2644
2645 // --- description from .arch file ---
2646 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
2647 void
2649 {
2650 Wavefront *wf = gpuDynInst->wavefront();
2651 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2652 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2653 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2654
2655 src0.readSrc();
2656 src1.readSrc();
2657
2658 if (instData.ABS & 0x1) {
2659 src0.absModifier();
2660 }
2661
2662 if (instData.ABS & 0x2) {
2663 src1.absModifier();
2664 }
2665
2666 if (extData.NEG & 0x1) {
2667 src0.negModifier();
2668 }
2669
2670 if (extData.NEG & 0x2) {
2671 src1.negModifier();
2672 }
2673
2677 assert(!(instData.ABS & 0x4));
2678 assert(!(extData.NEG & 0x4));
2679
2680 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2681 if (wf->execMask(lane)) {
2682 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
2683 }
2684 }
2685
2686 sdst.write();
2687 } // execute
2688 // --- Inst_VOP3__V_CMP_LG_F64 class methods ---
2689
2691 InFmt_VOP3A *iFmt)
2692 : Inst_VOP3A(iFmt, "v_cmp_lg_f64", true)
2693 {
2694 setFlag(ALU);
2695 setFlag(F64);
2696 } // Inst_VOP3__V_CMP_LG_F64
2697
2699 {
2700 } // ~Inst_VOP3__V_CMP_LG_F64
2701
2702 // --- description from .arch file ---
2703 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
2704 void
2706 {
2707 Wavefront *wf = gpuDynInst->wavefront();
2708 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2709 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2710 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2711
2712 src0.readSrc();
2713 src1.readSrc();
2714
2715 if (instData.ABS & 0x1) {
2716 src0.absModifier();
2717 }
2718
2719 if (instData.ABS & 0x2) {
2720 src1.absModifier();
2721 }
2722
2723 if (extData.NEG & 0x1) {
2724 src0.negModifier();
2725 }
2726
2727 if (extData.NEG & 0x2) {
2728 src1.negModifier();
2729 }
2730
2734 assert(!(instData.ABS & 0x4));
2735 assert(!(extData.NEG & 0x4));
2736
2737 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2738 if (wf->execMask(lane)) {
2739 sdst.setBit(lane, (src0[lane] < src1[lane]
2740 || src0[lane] > src1[lane]) ? 1 : 0);
2741 }
2742 }
2743
2744 sdst.write();
2745 } // execute
2746 // --- Inst_VOP3__V_CMP_GE_F64 class methods ---
2747
2749 InFmt_VOP3A *iFmt)
2750 : Inst_VOP3A(iFmt, "v_cmp_ge_f64", true)
2751 {
2752 setFlag(ALU);
2753 setFlag(F64);
2754 } // Inst_VOP3__V_CMP_GE_F64
2755
2757 {
2758 } // ~Inst_VOP3__V_CMP_GE_F64
2759
2760 // --- description from .arch file ---
2761 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
2762 void
2764 {
2765 Wavefront *wf = gpuDynInst->wavefront();
2766 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2767 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2768 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2769
2770 src0.readSrc();
2771 src1.readSrc();
2772
2773 if (instData.ABS & 0x1) {
2774 src0.absModifier();
2775 }
2776
2777 if (instData.ABS & 0x2) {
2778 src1.absModifier();
2779 }
2780
2781 if (extData.NEG & 0x1) {
2782 src0.negModifier();
2783 }
2784
2785 if (extData.NEG & 0x2) {
2786 src1.negModifier();
2787 }
2788
2792 assert(!(instData.ABS & 0x4));
2793 assert(!(extData.NEG & 0x4));
2794
2795 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2796 if (wf->execMask(lane)) {
2797 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
2798 }
2799 }
2800
2801 sdst.write();
2802 } // execute
2803 // --- Inst_VOP3__V_CMP_O_F64 class methods ---
2804
2806 : Inst_VOP3A(iFmt, "v_cmp_o_f64", true)
2807 {
2808 setFlag(ALU);
2809 setFlag(F64);
2810 } // Inst_VOP3__V_CMP_O_F64
2811
2813 {
2814 } // ~Inst_VOP3__V_CMP_O_F64
2815
2816 // --- description from .arch file ---
2817 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
2818 void
2820 {
2821 Wavefront *wf = gpuDynInst->wavefront();
2822 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2823 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2824 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2825
2826 src0.readSrc();
2827 src1.readSrc();
2828
2829 if (instData.ABS & 0x1) {
2830 src0.absModifier();
2831 }
2832
2833 if (instData.ABS & 0x2) {
2834 src1.absModifier();
2835 }
2836
2837 if (extData.NEG & 0x1) {
2838 src0.negModifier();
2839 }
2840
2841 if (extData.NEG & 0x2) {
2842 src1.negModifier();
2843 }
2844
2848 assert(!(instData.ABS & 0x4));
2849 assert(!(extData.NEG & 0x4));
2850
2851 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2852 if (wf->execMask(lane)) {
2853 sdst.setBit(lane, (!std::isnan(src0[lane])
2854 && !std::isnan(src1[lane])) ? 1 : 0);
2855 }
2856 }
2857
2858 sdst.write();
2859 } // execute
2860 // --- Inst_VOP3__V_CMP_U_F64 class methods ---
2861
2863 : Inst_VOP3A(iFmt, "v_cmp_u_f64", true)
2864 {
2865 setFlag(ALU);
2866 setFlag(F64);
2867 } // Inst_VOP3__V_CMP_U_F64
2868
2870 {
2871 } // ~Inst_VOP3__V_CMP_U_F64
2872
2873 // --- description from .arch file ---
2874 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
2875 void
2877 {
2878 Wavefront *wf = gpuDynInst->wavefront();
2879 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2880 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2881 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2882
2883 src0.readSrc();
2884 src1.readSrc();
2885
2886 if (instData.ABS & 0x1) {
2887 src0.absModifier();
2888 }
2889
2890 if (instData.ABS & 0x2) {
2891 src1.absModifier();
2892 }
2893
2894 if (extData.NEG & 0x1) {
2895 src0.negModifier();
2896 }
2897
2898 if (extData.NEG & 0x2) {
2899 src1.negModifier();
2900 }
2901
2905 assert(!(instData.ABS & 0x4));
2906 assert(!(extData.NEG & 0x4));
2907
2908 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2909 if (wf->execMask(lane)) {
2910 sdst.setBit(lane, (std::isnan(src0[lane])
2911 || std::isnan(src1[lane])) ? 1 : 0);
2912 }
2913 }
2914
2915 sdst.write();
2916 } // execute
2917 // --- Inst_VOP3__V_CMP_NGE_F64 class methods ---
2918
2920 InFmt_VOP3A *iFmt)
2921 : Inst_VOP3A(iFmt, "v_cmp_nge_f64", true)
2922 {
2923 setFlag(ALU);
2924 setFlag(F64);
2925 } // Inst_VOP3__V_CMP_NGE_F64
2926
2928 {
2929 } // ~Inst_VOP3__V_CMP_NGE_F64
2930
2931 // --- description from .arch file ---
2932 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
2933 void
2935 {
2936 Wavefront *wf = gpuDynInst->wavefront();
2937 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2938 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2939 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2940
2941 src0.readSrc();
2942 src1.readSrc();
2943
2944 if (instData.ABS & 0x1) {
2945 src0.absModifier();
2946 }
2947
2948 if (instData.ABS & 0x2) {
2949 src1.absModifier();
2950 }
2951
2952 if (extData.NEG & 0x1) {
2953 src0.negModifier();
2954 }
2955
2956 if (extData.NEG & 0x2) {
2957 src1.negModifier();
2958 }
2959
2963 assert(!(instData.ABS & 0x4));
2964 assert(!(extData.NEG & 0x4));
2965
2966 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
2967 if (wf->execMask(lane)) {
2968 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
2969 }
2970 }
2971
2972 sdst.write();
2973 } // execute
2974 // --- Inst_VOP3__V_CMP_NLG_F64 class methods ---
2975
2977 InFmt_VOP3A *iFmt)
2978 : Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true)
2979 {
2980 setFlag(ALU);
2981 setFlag(F64);
2982 } // Inst_VOP3__V_CMP_NLG_F64
2983
2985 {
2986 } // ~Inst_VOP3__V_CMP_NLG_F64
2987
2988 // --- description from .arch file ---
2989 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
2990 void
2992 {
2993 Wavefront *wf = gpuDynInst->wavefront();
2994 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
2995 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
2996 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
2997
2998 src0.readSrc();
2999 src1.readSrc();
3000
3001 if (instData.ABS & 0x1) {
3002 src0.absModifier();
3003 }
3004
3005 if (instData.ABS & 0x2) {
3006 src1.absModifier();
3007 }
3008
3009 if (extData.NEG & 0x1) {
3010 src0.negModifier();
3011 }
3012
3013 if (extData.NEG & 0x2) {
3014 src1.negModifier();
3015 }
3016
3020 assert(!(instData.ABS & 0x4));
3021 assert(!(extData.NEG & 0x4));
3022
3023 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3024 if (wf->execMask(lane)) {
3025 sdst.setBit(lane, !(src0[lane] < src1[lane]
3026 || src0[lane] > src1[lane]) ? 1 : 0);
3027 }
3028 }
3029
3030 sdst.write();
3031 } // execute
3032 // --- Inst_VOP3__V_CMP_NGT_F64 class methods ---
3033
3035 InFmt_VOP3A *iFmt)
3036 : Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true)
3037 {
3038 setFlag(ALU);
3039 setFlag(F64);
3040 } // Inst_VOP3__V_CMP_NGT_F64
3041
3043 {
3044 } // ~Inst_VOP3__V_CMP_NGT_F64
3045
3046 // --- description from .arch file ---
3047 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
3048 void
3050 {
3051 Wavefront *wf = gpuDynInst->wavefront();
3052 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3053 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3054 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3055
3056 src0.readSrc();
3057 src1.readSrc();
3058
3059 if (instData.ABS & 0x1) {
3060 src0.absModifier();
3061 }
3062
3063 if (instData.ABS & 0x2) {
3064 src1.absModifier();
3065 }
3066
3067 if (extData.NEG & 0x1) {
3068 src0.negModifier();
3069 }
3070
3071 if (extData.NEG & 0x2) {
3072 src1.negModifier();
3073 }
3074
3078 assert(!(instData.ABS & 0x4));
3079 assert(!(extData.NEG & 0x4));
3080
3081 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3082 if (wf->execMask(lane)) {
3083 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
3084 }
3085 }
3086
3087 sdst.write();
3088 } // execute
3089 // --- Inst_VOP3__V_CMP_NLE_F64 class methods ---
3090
3092 InFmt_VOP3A *iFmt)
3093 : Inst_VOP3A(iFmt, "v_cmp_nle_f64", true)
3094 {
3095 setFlag(ALU);
3096 setFlag(F64);
3097 } // Inst_VOP3__V_CMP_NLE_F64
3098
3100 {
3101 } // ~Inst_VOP3__V_CMP_NLE_F64
3102
3103 // --- description from .arch file ---
3104 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
3105 void
3107 {
3108 Wavefront *wf = gpuDynInst->wavefront();
3109 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3110 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3111 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3112
3113 src0.readSrc();
3114 src1.readSrc();
3115
3116 if (instData.ABS & 0x1) {
3117 src0.absModifier();
3118 }
3119
3120 if (instData.ABS & 0x2) {
3121 src1.absModifier();
3122 }
3123
3124 if (extData.NEG & 0x1) {
3125 src0.negModifier();
3126 }
3127
3128 if (extData.NEG & 0x2) {
3129 src1.negModifier();
3130 }
3131
3135 assert(!(instData.ABS & 0x4));
3136 assert(!(extData.NEG & 0x4));
3137
3138 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3139 if (wf->execMask(lane)) {
3140 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
3141 }
3142 }
3143
3144 sdst.write();
3145 } // execute
3146 // --- Inst_VOP3__V_CMP_NEQ_F64 class methods ---
3147
3149 InFmt_VOP3A *iFmt)
3150 : Inst_VOP3A(iFmt, "v_cmp_neq_f64", true)
3151 {
3152 setFlag(ALU);
3153 setFlag(F64);
3154 } // Inst_VOP3__V_CMP_NEQ_F64
3155
3157 {
3158 } // ~Inst_VOP3__V_CMP_NEQ_F64
3159
3160 // --- description from .arch file ---
3161 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
3162 void
3164 {
3165 Wavefront *wf = gpuDynInst->wavefront();
3166 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3167 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3168 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3169
3170 src0.readSrc();
3171 src1.readSrc();
3172
3173 if (instData.ABS & 0x1) {
3174 src0.absModifier();
3175 }
3176
3177 if (instData.ABS & 0x2) {
3178 src1.absModifier();
3179 }
3180
3181 if (extData.NEG & 0x1) {
3182 src0.negModifier();
3183 }
3184
3185 if (extData.NEG & 0x2) {
3186 src1.negModifier();
3187 }
3188
3192 assert(!(instData.ABS & 0x4));
3193 assert(!(extData.NEG & 0x4));
3194
3195 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3196 if (wf->execMask(lane)) {
3197 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
3198 }
3199 }
3200
3201 sdst.write();
3202 } // execute
3203 // --- Inst_VOP3__V_CMP_NLT_F64 class methods ---
3204
3206 InFmt_VOP3A *iFmt)
3207 : Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true)
3208 {
3209 setFlag(ALU);
3210 setFlag(F64);
3211 } // Inst_VOP3__V_CMP_NLT_F64
3212
3214 {
3215 } // ~Inst_VOP3__V_CMP_NLT_F64
3216
3217 // --- description from .arch file ---
3218 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
3219 void
3221 {
3222 Wavefront *wf = gpuDynInst->wavefront();
3223 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3224 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3225 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3226
3227 src0.readSrc();
3228 src1.readSrc();
3229
3230 if (instData.ABS & 0x1) {
3231 src0.absModifier();
3232 }
3233
3234 if (instData.ABS & 0x2) {
3235 src1.absModifier();
3236 }
3237
3238 if (extData.NEG & 0x1) {
3239 src0.negModifier();
3240 }
3241
3242 if (extData.NEG & 0x2) {
3243 src1.negModifier();
3244 }
3245
3249 assert(!(instData.ABS & 0x4));
3250 assert(!(extData.NEG & 0x4));
3251
3252 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3253 if (wf->execMask(lane)) {
3254 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
3255 }
3256 }
3257
3258 sdst.write();
3259 } // execute
3260 // --- Inst_VOP3__V_CMP_TRU_F64 class methods ---
3261
3263 InFmt_VOP3A *iFmt)
3264 : Inst_VOP3A(iFmt, "v_cmp_tru_f64", true)
3265 {
3266 setFlag(ALU);
3267 setFlag(F64);
3268 } // Inst_VOP3__V_CMP_TRU_F64
3269
3271 {
3272 } // ~Inst_VOP3__V_CMP_TRU_F64
3273
3274 // --- description from .arch file ---
3275 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
3276 void
3278 {
3279 Wavefront *wf = gpuDynInst->wavefront();
3280 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3281
3282 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3283 if (wf->execMask(lane)) {
3284 sdst.setBit(lane, 1);
3285 }
3286 }
3287
3288 sdst.write();
3289 } // execute
3290 // --- Inst_VOP3__V_CMPX_F_F64 class methods ---
3291
3293 InFmt_VOP3A *iFmt)
3294 : Inst_VOP3A(iFmt, "v_cmpx_f_f64", true)
3295 {
3296 setFlag(ALU);
3297 setFlag(F64);
3298 setFlag(WritesEXEC);
3299 } // Inst_VOP3__V_CMPX_F_F64
3300
3302 {
3303 } // ~Inst_VOP3__V_CMPX_F_F64
3304
3305 // --- description from .arch file ---
3306 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
3307 void
3309 {
3310 Wavefront *wf = gpuDynInst->wavefront();
3311 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3312
3313 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3314 if (wf->execMask(lane)) {
3315 sdst.setBit(lane, 0);
3316 }
3317 }
3318
3319 wf->execMask() = sdst.rawData();
3320 sdst.write();
3321 } // execute
3322 // --- Inst_VOP3__V_CMPX_LT_F64 class methods ---
3323
3325 InFmt_VOP3A *iFmt)
3326 : Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true)
3327 {
3328 setFlag(ALU);
3329 setFlag(F64);
3330 setFlag(WritesEXEC);
3331 } // Inst_VOP3__V_CMPX_LT_F64
3332
3334 {
3335 } // ~Inst_VOP3__V_CMPX_LT_F64
3336
3337 // --- description from .arch file ---
3338 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
3339 void
3341 {
3342 Wavefront *wf = gpuDynInst->wavefront();
3343 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3344 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3345 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3346
3347 src0.readSrc();
3348 src1.readSrc();
3349
3350 if (instData.ABS & 0x1) {
3351 src0.absModifier();
3352 }
3353
3354 if (instData.ABS & 0x2) {
3355 src1.absModifier();
3356 }
3357
3358 if (extData.NEG & 0x1) {
3359 src0.negModifier();
3360 }
3361
3362 if (extData.NEG & 0x2) {
3363 src1.negModifier();
3364 }
3365
3369 assert(!(instData.ABS & 0x4));
3370 assert(!(extData.NEG & 0x4));
3371
3372 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3373 if (wf->execMask(lane)) {
3374 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
3375 }
3376 }
3377
3378 wf->execMask() = sdst.rawData();
3379 sdst.write();
3380 } // execute
3381 // --- Inst_VOP3__V_CMPX_EQ_F64 class methods ---
3382
3384 InFmt_VOP3A *iFmt)
3385 : Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true)
3386 {
3387 setFlag(ALU);
3388 setFlag(F64);
3389 setFlag(WritesEXEC);
3390 } // Inst_VOP3__V_CMPX_EQ_F64
3391
3393 {
3394 } // ~Inst_VOP3__V_CMPX_EQ_F64
3395
3396 // --- description from .arch file ---
3397 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
3398 void
3400 {
3401 Wavefront *wf = gpuDynInst->wavefront();
3402 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3403 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3404 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3405
3406 src0.readSrc();
3407 src1.readSrc();
3408
3409 if (instData.ABS & 0x1) {
3410 src0.absModifier();
3411 }
3412
3413 if (instData.ABS & 0x2) {
3414 src1.absModifier();
3415 }
3416
3417 if (extData.NEG & 0x1) {
3418 src0.negModifier();
3419 }
3420
3421 if (extData.NEG & 0x2) {
3422 src1.negModifier();
3423 }
3424
3428 assert(!(instData.ABS & 0x4));
3429 assert(!(extData.NEG & 0x4));
3430
3431 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3432 if (wf->execMask(lane)) {
3433 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
3434 }
3435 }
3436
3437 wf->execMask() = sdst.rawData();
3438 sdst.write();
3439 } // execute
3440 // --- Inst_VOP3__V_CMPX_LE_F64 class methods ---
3441
3443 InFmt_VOP3A *iFmt)
3444 : Inst_VOP3A(iFmt, "v_cmpx_le_f64", true)
3445 {
3446 setFlag(ALU);
3447 setFlag(F64);
3448 setFlag(WritesEXEC);
3449 } // Inst_VOP3__V_CMPX_LE_F64
3450
3452 {
3453 } // ~Inst_VOP3__V_CMPX_LE_F64
3454
3455 // --- description from .arch file ---
3456 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
3457 void
3459 {
3460 Wavefront *wf = gpuDynInst->wavefront();
3461 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3462 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3463 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3464
3465 src0.readSrc();
3466 src1.readSrc();
3467
3468 if (instData.ABS & 0x1) {
3469 src0.absModifier();
3470 }
3471
3472 if (instData.ABS & 0x2) {
3473 src1.absModifier();
3474 }
3475
3476 if (extData.NEG & 0x1) {
3477 src0.negModifier();
3478 }
3479
3480 if (extData.NEG & 0x2) {
3481 src1.negModifier();
3482 }
3483
3487 assert(!(instData.ABS & 0x4));
3488 assert(!(extData.NEG & 0x4));
3489
3490 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3491 if (wf->execMask(lane)) {
3492 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
3493 }
3494 }
3495
3496 wf->execMask() = sdst.rawData();
3497 sdst.write();
3498 } // execute
3499 // --- Inst_VOP3__V_CMPX_GT_F64 class methods ---
3500
3502 InFmt_VOP3A *iFmt)
3503 : Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true)
3504 {
3505 setFlag(ALU);
3506 setFlag(F64);
3507 setFlag(WritesEXEC);
3508 } // Inst_VOP3__V_CMPX_GT_F64
3509
3511 {
3512 } // ~Inst_VOP3__V_CMPX_GT_F64
3513
3514 // --- description from .arch file ---
3515 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
3516 void
3518 {
3519 Wavefront *wf = gpuDynInst->wavefront();
3520 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3521 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3522 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3523
3524 src0.readSrc();
3525 src1.readSrc();
3526
3527 if (instData.ABS & 0x1) {
3528 src0.absModifier();
3529 }
3530
3531 if (instData.ABS & 0x2) {
3532 src1.absModifier();
3533 }
3534
3535 if (extData.NEG & 0x1) {
3536 src0.negModifier();
3537 }
3538
3539 if (extData.NEG & 0x2) {
3540 src1.negModifier();
3541 }
3542
3546 assert(!(instData.ABS & 0x4));
3547 assert(!(extData.NEG & 0x4));
3548
3549 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3550 if (wf->execMask(lane)) {
3551 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
3552 }
3553 }
3554
3555 wf->execMask() = sdst.rawData();
3556 sdst.write();
3557 } // execute
3558 // --- Inst_VOP3__V_CMPX_LG_F64 class methods ---
3559
3561 InFmt_VOP3A *iFmt)
3562 : Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true)
3563 {
3564 setFlag(ALU);
3565 setFlag(F64);
3566 setFlag(WritesEXEC);
3567 } // Inst_VOP3__V_CMPX_LG_F64
3568
3570 {
3571 } // ~Inst_VOP3__V_CMPX_LG_F64
3572
3573 // --- description from .arch file ---
3574 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
3575 void
3577 {
3578 Wavefront *wf = gpuDynInst->wavefront();
3579 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3580 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3581 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3582
3583 src0.readSrc();
3584 src1.readSrc();
3585
3586 if (instData.ABS & 0x1) {
3587 src0.absModifier();
3588 }
3589
3590 if (instData.ABS & 0x2) {
3591 src1.absModifier();
3592 }
3593
3594 if (extData.NEG & 0x1) {
3595 src0.negModifier();
3596 }
3597
3598 if (extData.NEG & 0x2) {
3599 src1.negModifier();
3600 }
3601
3605 assert(!(instData.ABS & 0x4));
3606 assert(!(extData.NEG & 0x4));
3607
3608 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3609 if (wf->execMask(lane)) {
3610 sdst.setBit(lane, (src0[lane] < src1[lane]
3611 || src0[lane] > src1[lane]) ? 1 : 0);
3612 }
3613 }
3614
3615 wf->execMask() = sdst.rawData();
3616 sdst.write();
3617 } // execute
3618 // --- Inst_VOP3__V_CMPX_GE_F64 class methods ---
3619
3621 InFmt_VOP3A *iFmt)
3622 : Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true)
3623 {
3624 setFlag(ALU);
3625 setFlag(F64);
3626 setFlag(WritesEXEC);
3627 } // Inst_VOP3__V_CMPX_GE_F64
3628
3630 {
3631 } // ~Inst_VOP3__V_CMPX_GE_F64
3632
3633 // --- description from .arch file ---
3634 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
3635 void
3637 {
3638 Wavefront *wf = gpuDynInst->wavefront();
3639 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3640 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3641 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3642
3643 src0.readSrc();
3644 src1.readSrc();
3645
3646 if (instData.ABS & 0x1) {
3647 src0.absModifier();
3648 }
3649
3650 if (instData.ABS & 0x2) {
3651 src1.absModifier();
3652 }
3653
3654 if (extData.NEG & 0x1) {
3655 src0.negModifier();
3656 }
3657
3658 if (extData.NEG & 0x2) {
3659 src1.negModifier();
3660 }
3661
3665 assert(!(instData.ABS & 0x4));
3666 assert(!(extData.NEG & 0x4));
3667
3668 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3669 if (wf->execMask(lane)) {
3670 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
3671 }
3672 }
3673
3674 wf->execMask() = sdst.rawData();
3675 sdst.write();
3676 } // execute
3677 // --- Inst_VOP3__V_CMPX_O_F64 class methods ---
3678
3680 InFmt_VOP3A *iFmt)
3681 : Inst_VOP3A(iFmt, "v_cmpx_o_f64", true)
3682 {
3683 setFlag(ALU);
3684 setFlag(F64);
3685 setFlag(WritesEXEC);
3686 } // Inst_VOP3__V_CMPX_O_F64
3687
3689 {
3690 } // ~Inst_VOP3__V_CMPX_O_F64
3691
3692 // --- description from .arch file ---
3693 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
3694 // encoding.
3695 void
3697 {
3698 Wavefront *wf = gpuDynInst->wavefront();
3699 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3700 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3701 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3702
3703 src0.readSrc();
3704 src1.readSrc();
3705
3706 if (instData.ABS & 0x1) {
3707 src0.absModifier();
3708 }
3709
3710 if (instData.ABS & 0x2) {
3711 src1.absModifier();
3712 }
3713
3714 if (extData.NEG & 0x1) {
3715 src0.negModifier();
3716 }
3717
3718 if (extData.NEG & 0x2) {
3719 src1.negModifier();
3720 }
3721
3725 assert(!(instData.ABS & 0x4));
3726 assert(!(extData.NEG & 0x4));
3727
3728 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3729 if (wf->execMask(lane)) {
3730 sdst.setBit(lane, (!std::isnan(src0[lane])
3731 && !std::isnan(src1[lane])) ? 1 : 0);
3732 }
3733 }
3734
3735 wf->execMask() = sdst.rawData();
3736 sdst.write();
3737 } // execute
3738 // --- Inst_VOP3__V_CMPX_U_F64 class methods ---
3739
3741 InFmt_VOP3A *iFmt)
3742 : Inst_VOP3A(iFmt, "v_cmpx_u_f64", true)
3743 {
3744 setFlag(ALU);
3745 setFlag(F64);
3746 setFlag(WritesEXEC);
3747 } // Inst_VOP3__V_CMPX_U_F64
3748
3750 {
3751 } // ~Inst_VOP3__V_CMPX_U_F64
3752
3753 // --- description from .arch file ---
3754 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
3755 // encoding.
3756 void
3758 {
3759 Wavefront *wf = gpuDynInst->wavefront();
3760 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3761 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3762 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3763
3764 src0.readSrc();
3765 src1.readSrc();
3766
3767 if (instData.ABS & 0x1) {
3768 src0.absModifier();
3769 }
3770
3771 if (instData.ABS & 0x2) {
3772 src1.absModifier();
3773 }
3774
3775 if (extData.NEG & 0x1) {
3776 src0.negModifier();
3777 }
3778
3779 if (extData.NEG & 0x2) {
3780 src1.negModifier();
3781 }
3782
3786 assert(!(instData.ABS & 0x4));
3787 assert(!(extData.NEG & 0x4));
3788
3789 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3790 if (wf->execMask(lane)) {
3791 sdst.setBit(lane, (std::isnan(src0[lane])
3792 || std::isnan(src1[lane])) ? 1 : 0);
3793 }
3794 }
3795
3796 wf->execMask() = sdst.rawData();
3797 sdst.write();
3798 } // execute
3799 // --- Inst_VOP3__V_CMPX_NGE_F64 class methods ---
3800
3802 InFmt_VOP3A *iFmt)
3803 : Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true)
3804 {
3805 setFlag(ALU);
3806 setFlag(F64);
3807 setFlag(WritesEXEC);
3808 } // Inst_VOP3__V_CMPX_NGE_F64
3809
3811 {
3812 } // ~Inst_VOP3__V_CMPX_NGE_F64
3813
3814 // --- description from .arch file ---
3815 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
3816 void
3818 {
3819 Wavefront *wf = gpuDynInst->wavefront();
3820 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3821 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3822 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3823
3824 src0.readSrc();
3825 src1.readSrc();
3826
3827 if (instData.ABS & 0x1) {
3828 src0.absModifier();
3829 }
3830
3831 if (instData.ABS & 0x2) {
3832 src1.absModifier();
3833 }
3834
3835 if (extData.NEG & 0x1) {
3836 src0.negModifier();
3837 }
3838
3839 if (extData.NEG & 0x2) {
3840 src1.negModifier();
3841 }
3842
3846 assert(!(instData.ABS & 0x4));
3847 assert(!(extData.NEG & 0x4));
3848
3849 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3850 if (wf->execMask(lane)) {
3851 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
3852 }
3853 }
3854
3855 wf->execMask() = sdst.rawData();
3856 sdst.write();
3857 } // execute
3858 // --- Inst_VOP3__V_CMPX_NLG_F64 class methods ---
3859
3861 InFmt_VOP3A *iFmt)
3862 : Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true)
3863 {
3864 setFlag(ALU);
3865 setFlag(F64);
3866 setFlag(WritesEXEC);
3867 } // Inst_VOP3__V_CMPX_NLG_F64
3868
3870 {
3871 } // ~Inst_VOP3__V_CMPX_NLG_F64
3872
3873 // --- description from .arch file ---
3874 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
3875 void
3877 {
3878 Wavefront *wf = gpuDynInst->wavefront();
3879 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3880 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3881 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3882
3883 src0.readSrc();
3884 src1.readSrc();
3885
3886 if (instData.ABS & 0x1) {
3887 src0.absModifier();
3888 }
3889
3890 if (instData.ABS & 0x2) {
3891 src1.absModifier();
3892 }
3893
3894 if (extData.NEG & 0x1) {
3895 src0.negModifier();
3896 }
3897
3898 if (extData.NEG & 0x2) {
3899 src1.negModifier();
3900 }
3901
3905 assert(!(instData.ABS & 0x4));
3906 assert(!(extData.NEG & 0x4));
3907
3908 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3909 if (wf->execMask(lane)) {
3910 sdst.setBit(lane, !(src0[lane] < src1[lane]
3911 || src0[lane] > src1[lane]) ? 1 : 0);
3912 }
3913 }
3914
3915 wf->execMask() = sdst.rawData();
3916 sdst.write();
3917 } // execute
3918 // --- Inst_VOP3__V_CMPX_NGT_F64 class methods ---
3919
3921 InFmt_VOP3A *iFmt)
3922 : Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true)
3923 {
3924 setFlag(ALU);
3925 setFlag(F64);
3926 setFlag(WritesEXEC);
3927 } // Inst_VOP3__V_CMPX_NGT_F64
3928
3930 {
3931 } // ~Inst_VOP3__V_CMPX_NGT_F64
3932
3933 // --- description from .arch file ---
3934 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
3935 void
3937 {
3938 Wavefront *wf = gpuDynInst->wavefront();
3939 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3940 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
3941 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
3942
3943 src0.readSrc();
3944 src1.readSrc();
3945
3946 if (instData.ABS & 0x1) {
3947 src0.absModifier();
3948 }
3949
3950 if (instData.ABS & 0x2) {
3951 src1.absModifier();
3952 }
3953
3954 if (extData.NEG & 0x1) {
3955 src0.negModifier();
3956 }
3957
3958 if (extData.NEG & 0x2) {
3959 src1.negModifier();
3960 }
3961
3965 assert(!(instData.ABS & 0x4));
3966 assert(!(extData.NEG & 0x4));
3967
3968 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
3969 if (wf->execMask(lane)) {
3970 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
3971 }
3972 }
3973
3974 wf->execMask() = sdst.rawData();
3975 sdst.write();
3976 } // execute
3977 // --- Inst_VOP3__V_CMPX_NLE_F64 class methods ---
3978
3980 InFmt_VOP3A *iFmt)
3981 : Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true)
3982 {
3983 setFlag(ALU);
3984 setFlag(F64);
3985 setFlag(WritesEXEC);
3986 } // Inst_VOP3__V_CMPX_NLE_F64
3987
3989 {
3990 } // ~Inst_VOP3__V_CMPX_NLE_F64
3991
3992 // --- description from .arch file ---
3993 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
3994 void
3996 {
3997 Wavefront *wf = gpuDynInst->wavefront();
3998 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
3999 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
4000 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4001
4002 src0.readSrc();
4003 src1.readSrc();
4004
4005 if (instData.ABS & 0x1) {
4006 src0.absModifier();
4007 }
4008
4009 if (instData.ABS & 0x2) {
4010 src1.absModifier();
4011 }
4012
4013 if (extData.NEG & 0x1) {
4014 src0.negModifier();
4015 }
4016
4017 if (extData.NEG & 0x2) {
4018 src1.negModifier();
4019 }
4020
4024 assert(!(instData.ABS & 0x4));
4025 assert(!(extData.NEG & 0x4));
4026
4027 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4028 if (wf->execMask(lane)) {
4029 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
4030 }
4031 }
4032
4033 wf->execMask() = sdst.rawData();
4034 sdst.write();
4035 } // execute
4036 // --- Inst_VOP3__V_CMPX_NEQ_F64 class methods ---
4037
4039 InFmt_VOP3A *iFmt)
4040 : Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true)
4041 {
4042 setFlag(ALU);
4043 setFlag(F64);
4044 setFlag(WritesEXEC);
4045 } // Inst_VOP3__V_CMPX_NEQ_F64
4046
4048 {
4049 } // ~Inst_VOP3__V_CMPX_NEQ_F64
4050
4051 // --- description from .arch file ---
4052 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
4053 void
4055 {
4056 Wavefront *wf = gpuDynInst->wavefront();
4057 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
4058 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
4059 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4060
4061 src0.readSrc();
4062 src1.readSrc();
4063
4064 if (instData.ABS & 0x1) {
4065 src0.absModifier();
4066 }
4067
4068 if (instData.ABS & 0x2) {
4069 src1.absModifier();
4070 }
4071
4072 if (extData.NEG & 0x1) {
4073 src0.negModifier();
4074 }
4075
4076 if (extData.NEG & 0x2) {
4077 src1.negModifier();
4078 }
4079
4083 assert(!(instData.ABS & 0x4));
4084 assert(!(extData.NEG & 0x4));
4085
4086 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4087 if (wf->execMask(lane)) {
4088 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
4089 }
4090 }
4091
4092 wf->execMask() = sdst.rawData();
4093 sdst.write();
4094 } // execute
4095 // --- Inst_VOP3__V_CMPX_NLT_F64 class methods ---
4096
4098 InFmt_VOP3A *iFmt)
4099 : Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true)
4100 {
4101 setFlag(ALU);
4102 setFlag(F64);
4103 setFlag(WritesEXEC);
4104 } // Inst_VOP3__V_CMPX_NLT_F64
4105
4107 {
4108 } // ~Inst_VOP3__V_CMPX_NLT_F64
4109
4110 // --- description from .arch file ---
4111 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
4112 void
4114 {
4115 Wavefront *wf = gpuDynInst->wavefront();
4116 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
4117 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
4118 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4119
4120 src0.readSrc();
4121 src1.readSrc();
4122
4123 if (instData.ABS & 0x1) {
4124 src0.absModifier();
4125 }
4126
4127 if (instData.ABS & 0x2) {
4128 src1.absModifier();
4129 }
4130
4131 if (extData.NEG & 0x1) {
4132 src0.negModifier();
4133 }
4134
4135 if (extData.NEG & 0x2) {
4136 src1.negModifier();
4137 }
4138
4142 assert(!(instData.ABS & 0x4));
4143 assert(!(extData.NEG & 0x4));
4144
4145 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4146 if (wf->execMask(lane)) {
4147 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
4148 }
4149 }
4150
4151 wf->execMask() = sdst.rawData();
4152 sdst.write();
4153 } // execute
4154 // --- Inst_VOP3__V_CMPX_TRU_F64 class methods ---
4155
4157 InFmt_VOP3A *iFmt)
4158 : Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true)
4159 {
4160 setFlag(ALU);
4161 setFlag(F64);
4162 setFlag(WritesEXEC);
4163 } // Inst_VOP3__V_CMPX_TRU_F64
4164
4166 {
4167 } // ~Inst_VOP3__V_CMPX_TRU_F64
4168
4169 // --- description from .arch file ---
4170 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
4171 void
4173 {
4174 Wavefront *wf = gpuDynInst->wavefront();
4175 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4176
4177 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4178 if (wf->execMask(lane)) {
4179 sdst.setBit(lane, 1);
4180 }
4181 }
4182
4183 wf->execMask() = sdst.rawData();
4184 sdst.write();
4185 } // execute
4186 // --- Inst_VOP3__V_CMP_F_I16 class methods ---
4187
4189 : Inst_VOP3A(iFmt, "v_cmp_f_i16", true)
4190 {
4191 setFlag(ALU);
4192 } // Inst_VOP3__V_CMP_F_I16
4193
4195 {
4196 } // ~Inst_VOP3__V_CMP_F_I16
4197
4198 // --- description from .arch file ---
4199 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
4200 void
4202 {
4203 Wavefront *wf = gpuDynInst->wavefront();
4204 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4205
4206 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4207 if (wf->execMask(lane)) {
4208 sdst.setBit(lane, 0);
4209 }
4210 }
4211
4212 sdst.write();
4213 } // execute
4214 // --- Inst_VOP3__V_CMP_LT_I16 class methods ---
4215
4217 InFmt_VOP3A *iFmt)
4218 : Inst_VOP3A(iFmt, "v_cmp_lt_i16", true)
4219 {
4220 setFlag(ALU);
4221 } // Inst_VOP3__V_CMP_LT_I16
4222
4224 {
4225 } // ~Inst_VOP3__V_CMP_LT_I16
4226
4227 // --- description from .arch file ---
4228 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
4229 void
4231 {
4232 Wavefront *wf = gpuDynInst->wavefront();
4233 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4234 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4235 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4236
4237 src0.readSrc();
4238 src1.readSrc();
4239
4243 assert(!(instData.ABS & 0x1));
4244 assert(!(instData.ABS & 0x2));
4245 assert(!(instData.ABS & 0x4));
4246 assert(!(extData.NEG & 0x1));
4247 assert(!(extData.NEG & 0x2));
4248 assert(!(extData.NEG & 0x4));
4249
4250 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4251 if (wf->execMask(lane)) {
4252 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
4253 }
4254 }
4255
4256 sdst.write();
4257 } // execute
4258 // --- Inst_VOP3__V_CMP_EQ_I16 class methods ---
4259
4261 InFmt_VOP3A *iFmt)
4262 : Inst_VOP3A(iFmt, "v_cmp_eq_i16", true)
4263 {
4264 setFlag(ALU);
4265 } // Inst_VOP3__V_CMP_EQ_I16
4266
4268 {
4269 } // ~Inst_VOP3__V_CMP_EQ_I16
4270
4271 // --- description from .arch file ---
4272 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
4273 void
4275 {
4276 Wavefront *wf = gpuDynInst->wavefront();
4277 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4278 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4279 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4280
4281 src0.readSrc();
4282 src1.readSrc();
4283
4287 assert(!(instData.ABS & 0x1));
4288 assert(!(instData.ABS & 0x2));
4289 assert(!(instData.ABS & 0x4));
4290 assert(!(extData.NEG & 0x1));
4291 assert(!(extData.NEG & 0x2));
4292 assert(!(extData.NEG & 0x4));
4293
4294 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4295 if (wf->execMask(lane)) {
4296 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
4297 }
4298 }
4299
4300 sdst.write();
4301 } // execute
4302 // --- Inst_VOP3__V_CMP_LE_I16 class methods ---
4303
4305 InFmt_VOP3A *iFmt)
4306 : Inst_VOP3A(iFmt, "v_cmp_le_i16", true)
4307 {
4308 setFlag(ALU);
4309 } // Inst_VOP3__V_CMP_LE_I16
4310
4312 {
4313 } // ~Inst_VOP3__V_CMP_LE_I16
4314
4315 // --- description from .arch file ---
4316 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
4317 void
4319 {
4320 Wavefront *wf = gpuDynInst->wavefront();
4321 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4322 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4323 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4324
4325 src0.readSrc();
4326 src1.readSrc();
4327
4331 assert(!(instData.ABS & 0x1));
4332 assert(!(instData.ABS & 0x2));
4333 assert(!(instData.ABS & 0x4));
4334 assert(!(extData.NEG & 0x1));
4335 assert(!(extData.NEG & 0x2));
4336 assert(!(extData.NEG & 0x4));
4337
4338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4339 if (wf->execMask(lane)) {
4340 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
4341 }
4342 }
4343
4344 sdst.write();
4345 } // execute
4346 // --- Inst_VOP3__V_CMP_GT_I16 class methods ---
4347
4349 InFmt_VOP3A *iFmt)
4350 : Inst_VOP3A(iFmt, "v_cmp_gt_i16", true)
4351 {
4352 setFlag(ALU);
4353 } // Inst_VOP3__V_CMP_GT_I16
4354
4356 {
4357 } // ~Inst_VOP3__V_CMP_GT_I16
4358
4359 // --- description from .arch file ---
4360 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
4361 void
4363 {
4364 Wavefront *wf = gpuDynInst->wavefront();
4365 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4366 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4367 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4368
4369 src0.readSrc();
4370 src1.readSrc();
4371
4375 assert(!(instData.ABS & 0x1));
4376 assert(!(instData.ABS & 0x2));
4377 assert(!(instData.ABS & 0x4));
4378 assert(!(extData.NEG & 0x1));
4379 assert(!(extData.NEG & 0x2));
4380 assert(!(extData.NEG & 0x4));
4381
4382 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4383 if (wf->execMask(lane)) {
4384 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
4385 }
4386 }
4387
4388 sdst.write();
4389 } // execute
4390 // --- Inst_VOP3__V_CMP_NE_I16 class methods ---
4391
4393 InFmt_VOP3A *iFmt)
4394 : Inst_VOP3A(iFmt, "v_cmp_ne_i16", true)
4395 {
4396 setFlag(ALU);
4397 } // Inst_VOP3__V_CMP_NE_I16
4398
4400 {
4401 } // ~Inst_VOP3__V_CMP_NE_I16
4402
4403 // --- description from .arch file ---
4404 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
4405 void
4407 {
4408 Wavefront *wf = gpuDynInst->wavefront();
4409 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4410 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4411 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4412
4413 src0.readSrc();
4414 src1.readSrc();
4415
4419 assert(!(instData.ABS & 0x1));
4420 assert(!(instData.ABS & 0x2));
4421 assert(!(instData.ABS & 0x4));
4422 assert(!(extData.NEG & 0x1));
4423 assert(!(extData.NEG & 0x2));
4424 assert(!(extData.NEG & 0x4));
4425
4426 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4427 if (wf->execMask(lane)) {
4428 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
4429 }
4430 }
4431
4432 sdst.write();
4433 } // execute
4434 // --- Inst_VOP3__V_CMP_GE_I16 class methods ---
4435
4437 InFmt_VOP3A *iFmt)
4438 : Inst_VOP3A(iFmt, "v_cmp_ge_i16", true)
4439 {
4440 setFlag(ALU);
4441 } // Inst_VOP3__V_CMP_GE_I16
4442
4444 {
4445 } // ~Inst_VOP3__V_CMP_GE_I16
4446
4447 // --- description from .arch file ---
4448 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
4449 void
4451 {
4452 Wavefront *wf = gpuDynInst->wavefront();
4453 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4454 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4455 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4456
4457 src0.readSrc();
4458 src1.readSrc();
4459
4463 assert(!(instData.ABS & 0x1));
4464 assert(!(instData.ABS & 0x2));
4465 assert(!(instData.ABS & 0x4));
4466 assert(!(extData.NEG & 0x1));
4467 assert(!(extData.NEG & 0x2));
4468 assert(!(extData.NEG & 0x4));
4469
4470 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4471 if (wf->execMask(lane)) {
4472 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
4473 }
4474 }
4475
4476 sdst.write();
4477 } // execute
4478 // --- Inst_VOP3__V_CMP_T_I16 class methods ---
4479
4481 : Inst_VOP3A(iFmt, "v_cmp_t_i16", true)
4482 {
4483 setFlag(ALU);
4484 } // Inst_VOP3__V_CMP_T_I16
4485
4487 {
4488 } // ~Inst_VOP3__V_CMP_T_I16
4489
4490 // --- description from .arch file ---
4491 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
4492 void
4494 {
4495 Wavefront *wf = gpuDynInst->wavefront();
4496 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4497
4498 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4499 if (wf->execMask(lane)) {
4500 sdst.setBit(lane, 1);
4501 }
4502 }
4503
4504 sdst.write();
4505 } // execute
4506 // --- Inst_VOP3__V_CMP_F_U16 class methods ---
4507
4509 : Inst_VOP3A(iFmt, "v_cmp_f_u16", true)
4510 {
4511 setFlag(ALU);
4512 } // Inst_VOP3__V_CMP_F_U16
4513
4515 {
4516 } // ~Inst_VOP3__V_CMP_F_U16
4517
4518 // --- description from .arch file ---
4519 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
4520 void
4522 {
4523 Wavefront *wf = gpuDynInst->wavefront();
4524 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4525
4526 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4527 if (wf->execMask(lane)) {
4528 sdst.setBit(lane, 0);
4529 }
4530 }
4531
4532 sdst.write();
4533 } // execute
4534 // --- Inst_VOP3__V_CMP_LT_U16 class methods ---
4535
4537 InFmt_VOP3A *iFmt)
4538 : Inst_VOP3A(iFmt, "v_cmp_lt_u16", true)
4539 {
4540 setFlag(ALU);
4541 } // Inst_VOP3__V_CMP_LT_U16
4542
4544 {
4545 } // ~Inst_VOP3__V_CMP_LT_U16
4546
4547 // --- description from .arch file ---
4548 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
4549 void
4551 {
4552 Wavefront *wf = gpuDynInst->wavefront();
4553 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4554 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4555 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4556
4557 src0.readSrc();
4558 src1.readSrc();
4559
4563 assert(!(instData.ABS & 0x1));
4564 assert(!(instData.ABS & 0x2));
4565 assert(!(instData.ABS & 0x4));
4566 assert(!(extData.NEG & 0x1));
4567 assert(!(extData.NEG & 0x2));
4568 assert(!(extData.NEG & 0x4));
4569
4570 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4571 if (wf->execMask(lane)) {
4572 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
4573 }
4574 }
4575
4576 sdst.write();
4577 } // execute
4578 // --- Inst_VOP3__V_CMP_EQ_U16 class methods ---
4579
4581 InFmt_VOP3A *iFmt)
4582 : Inst_VOP3A(iFmt, "v_cmp_eq_u16", true)
4583 {
4584 setFlag(ALU);
4585 } // Inst_VOP3__V_CMP_EQ_U16
4586
4588 {
4589 } // ~Inst_VOP3__V_CMP_EQ_U16
4590
4591 // --- description from .arch file ---
4592 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
4593 void
4595 {
4596 Wavefront *wf = gpuDynInst->wavefront();
4597 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4598 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4599 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4600
4601 src0.readSrc();
4602 src1.readSrc();
4603
4607 assert(!(instData.ABS & 0x1));
4608 assert(!(instData.ABS & 0x2));
4609 assert(!(instData.ABS & 0x4));
4610 assert(!(extData.NEG & 0x1));
4611 assert(!(extData.NEG & 0x2));
4612 assert(!(extData.NEG & 0x4));
4613
4614 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4615 if (wf->execMask(lane)) {
4616 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
4617 }
4618 }
4619
4620 sdst.write();
4621 } // execute
4622 // --- Inst_VOP3__V_CMP_LE_U16 class methods ---
4623
4625 InFmt_VOP3A *iFmt)
4626 : Inst_VOP3A(iFmt, "v_cmp_le_u16", true)
4627 {
4628 setFlag(ALU);
4629 } // Inst_VOP3__V_CMP_LE_U16
4630
4632 {
4633 } // ~Inst_VOP3__V_CMP_LE_U16
4634
4635 // --- description from .arch file ---
4636 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
4637 void
4639 {
4640 Wavefront *wf = gpuDynInst->wavefront();
4641 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4642 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4643 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4644
4645 src0.readSrc();
4646 src1.readSrc();
4647
4651 assert(!(instData.ABS & 0x1));
4652 assert(!(instData.ABS & 0x2));
4653 assert(!(instData.ABS & 0x4));
4654 assert(!(extData.NEG & 0x1));
4655 assert(!(extData.NEG & 0x2));
4656 assert(!(extData.NEG & 0x4));
4657
4658 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4659 if (wf->execMask(lane)) {
4660 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
4661 }
4662 }
4663
4664 sdst.write();
4665 } // execute
4666 // --- Inst_VOP3__V_CMP_GT_U16 class methods ---
4667
4669 InFmt_VOP3A *iFmt)
4670 : Inst_VOP3A(iFmt, "v_cmp_gt_u16", true)
4671 {
4672 setFlag(ALU);
4673 } // Inst_VOP3__V_CMP_GT_U16
4674
4676 {
4677 } // ~Inst_VOP3__V_CMP_GT_U16
4678
4679 // --- description from .arch file ---
4680 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
4681 void
4683 {
4684 Wavefront *wf = gpuDynInst->wavefront();
4685 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4686 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4687 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4688
4689 src0.readSrc();
4690 src1.readSrc();
4691
4695 assert(!(instData.ABS & 0x1));
4696 assert(!(instData.ABS & 0x2));
4697 assert(!(instData.ABS & 0x4));
4698 assert(!(extData.NEG & 0x1));
4699 assert(!(extData.NEG & 0x2));
4700 assert(!(extData.NEG & 0x4));
4701
4702 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4703 if (wf->execMask(lane)) {
4704 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
4705 }
4706 }
4707
4708 sdst.write();
4709 } // execute
4710 // --- Inst_VOP3__V_CMP_NE_U16 class methods ---
4711
4713 InFmt_VOP3A *iFmt)
4714 : Inst_VOP3A(iFmt, "v_cmp_ne_u16", true)
4715 {
4716 setFlag(ALU);
4717 } // Inst_VOP3__V_CMP_NE_U16
4718
4720 {
4721 } // ~Inst_VOP3__V_CMP_NE_U16
4722
4723 // --- description from .arch file ---
4724 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
4725 void
4727 {
4728 Wavefront *wf = gpuDynInst->wavefront();
4729 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4730 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4731 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4732
4733 src0.readSrc();
4734 src1.readSrc();
4735
4739 assert(!(instData.ABS & 0x1));
4740 assert(!(instData.ABS & 0x2));
4741 assert(!(instData.ABS & 0x4));
4742 assert(!(extData.NEG & 0x1));
4743 assert(!(extData.NEG & 0x2));
4744 assert(!(extData.NEG & 0x4));
4745
4746 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4747 if (wf->execMask(lane)) {
4748 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
4749 }
4750 }
4751
4752 sdst.write();
4753 } // execute
4754 // --- Inst_VOP3__V_CMP_GE_U16 class methods ---
4755
4757 InFmt_VOP3A *iFmt)
4758 : Inst_VOP3A(iFmt, "v_cmp_ge_u16", true)
4759 {
4760 setFlag(ALU);
4761 } // Inst_VOP3__V_CMP_GE_U16
4762
4764 {
4765 } // ~Inst_VOP3__V_CMP_GE_U16
4766
4767 // --- description from .arch file ---
4768 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
4769 void
4771 {
4772 Wavefront *wf = gpuDynInst->wavefront();
4773 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
4774 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
4775 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4776
4777 src0.readSrc();
4778 src1.readSrc();
4779
4783 assert(!(instData.ABS & 0x1));
4784 assert(!(instData.ABS & 0x2));
4785 assert(!(instData.ABS & 0x4));
4786 assert(!(extData.NEG & 0x1));
4787 assert(!(extData.NEG & 0x2));
4788 assert(!(extData.NEG & 0x4));
4789
4790 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4791 if (wf->execMask(lane)) {
4792 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
4793 }
4794 }
4795
4796 sdst.write();
4797 } // execute
4798 // --- Inst_VOP3__V_CMP_T_U16 class methods ---
4799
4801 : Inst_VOP3A(iFmt, "v_cmp_t_u16", true)
4802 {
4803 setFlag(ALU);
4804 } // Inst_VOP3__V_CMP_T_U16
4805
4807 {
4808 } // ~Inst_VOP3__V_CMP_T_U16
4809
4810 // --- description from .arch file ---
4811 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
4812 void
4814 {
4815 Wavefront *wf = gpuDynInst->wavefront();
4816 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4817
4818 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4819 if (wf->execMask(lane)) {
4820 sdst.setBit(lane, 1);
4821 }
4822 }
4823
4824 sdst.write();
4825 } // execute
4826 // --- Inst_VOP3__V_CMPX_F_I16 class methods ---
4827
4829 InFmt_VOP3A *iFmt)
4830 : Inst_VOP3A(iFmt, "v_cmpx_f_i16", true)
4831 {
4832 setFlag(ALU);
4833 setFlag(WritesEXEC);
4834 } // Inst_VOP3__V_CMPX_F_I16
4835
4837 {
4838 } // ~Inst_VOP3__V_CMPX_F_I16
4839
4840 // --- description from .arch file ---
4841 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
4842 void
4844 {
4845 Wavefront *wf = gpuDynInst->wavefront();
4846 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4847
4848 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4849 if (wf->execMask(lane)) {
4850 sdst.setBit(lane, 0);
4851 }
4852 }
4853
4854 wf->execMask() = sdst.rawData();
4855 sdst.write();
4856 } // execute
4857 // --- Inst_VOP3__V_CMPX_LT_I16 class methods ---
4858
4860 InFmt_VOP3A *iFmt)
4861 : Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true)
4862 {
4863 setFlag(ALU);
4864 setFlag(WritesEXEC);
4865 } // Inst_VOP3__V_CMPX_LT_I16
4866
4868 {
4869 } // ~Inst_VOP3__V_CMPX_LT_I16
4870
4871 // --- description from .arch file ---
4872 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
4873 void
4875 {
4876 Wavefront *wf = gpuDynInst->wavefront();
4877 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4878 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4879 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4880
4881 src0.readSrc();
4882 src1.readSrc();
4883
4887 assert(!(instData.ABS & 0x1));
4888 assert(!(instData.ABS & 0x2));
4889 assert(!(instData.ABS & 0x4));
4890 assert(!(extData.NEG & 0x1));
4891 assert(!(extData.NEG & 0x2));
4892 assert(!(extData.NEG & 0x4));
4893
4894 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4895 if (wf->execMask(lane)) {
4896 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
4897 }
4898 }
4899
4900 wf->execMask() = sdst.rawData();
4901 sdst.write();
4902 } // execute
4903 // --- Inst_VOP3__V_CMPX_EQ_I16 class methods ---
4904
4906 InFmt_VOP3A *iFmt)
4907 : Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true)
4908 {
4909 setFlag(ALU);
4910 setFlag(WritesEXEC);
4911 } // Inst_VOP3__V_CMPX_EQ_I16
4912
4914 {
4915 } // ~Inst_VOP3__V_CMPX_EQ_I16
4916
4917 // --- description from .arch file ---
4918 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
4919 void
4921 {
4922 Wavefront *wf = gpuDynInst->wavefront();
4923 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
4924 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
4925 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
4926
4927 src0.readSrc();
4928 src1.readSrc();
4929
4933 assert(!(instData.ABS & 0x1));
4934 assert(!(instData.ABS & 0x2));
4935 assert(!(instData.ABS & 0x4));
4936 assert(!(extData.NEG & 0x1));
4937 assert(!(extData.NEG & 0x2));
4938 assert(!(extData.NEG & 0x4));
4939
4940 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
4941 if (wf->execMask(lane)) {
4942 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
4943 }
4944 }
4945
4946 wf->execMask() = sdst.rawData();
4947 sdst.write();
4948 } // execute
4949 // --- Inst_VOP3__V_CMPX_LE_I16 class methods ---
4950
4952 InFmt_VOP3A *iFmt)
4953 : Inst_VOP3A(iFmt, "v_cmpx_le_i16", true)
4954 {
4955 setFlag(ALU);
4956 setFlag(WritesEXEC);
4957 } // Inst_VOP3__V_CMPX_LE_I16
4958
4960 {
4961 } // ~Inst_VOP3__V_CMPX_LE_I16