73 ? src1[lane] : src0[lane];
129 vdst[lane] = src0[lane] + src1[lane];
186 vdst[lane] = src0[lane] - src1[lane];
243 vdst[lane] = src1[lane] - src0[lane];
302 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
303 std::fpclassify(src0[lane]) == FP_ZERO) &&
304 !std::signbit(src0[lane])) {
307 }
else if (!std::signbit(src1[lane])) {
312 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
313 std::fpclassify(src0[lane]) == FP_ZERO) &&
314 std::signbit(src0[lane])) {
317 }
else if (std::signbit(src1[lane])) {
323 !std::signbit(src0[lane])) {
324 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
325 std::fpclassify(src1[lane]) == FP_ZERO) {
327 }
else if (!std::signbit(src1[lane])) {
328 vdst[lane] = +INFINITY;
330 vdst[lane] = -INFINITY;
333 std::signbit(src0[lane])) {
334 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
335 std::fpclassify(src1[lane]) == FP_ZERO) {
337 }
else if (std::signbit(src1[lane])) {
338 vdst[lane] = +INFINITY;
340 vdst[lane] = -INFINITY;
343 vdst[lane] = src0[lane] * src1[lane];
403 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
404 std::fpclassify(src0[lane]) == FP_ZERO) &&
405 !std::signbit(src0[lane])) {
408 }
else if (!std::signbit(src1[lane])) {
413 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
414 std::fpclassify(src0[lane]) == FP_ZERO) &&
415 std::signbit(src0[lane])) {
418 }
else if (std::signbit(src1[lane])) {
424 !std::signbit(src0[lane])) {
425 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
426 std::fpclassify(src1[lane]) == FP_ZERO) {
428 }
else if (!std::signbit(src1[lane])) {
429 vdst[lane] = +INFINITY;
431 vdst[lane] = -INFINITY;
434 std::signbit(src0[lane])) {
435 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
436 std::fpclassify(src1[lane]) == FP_ZERO) {
438 }
else if (std::signbit(src1[lane])) {
439 vdst[lane] = +INFINITY;
441 vdst[lane] = -INFINITY;
444 vdst[lane] = src0[lane] * src1[lane];
488 vdst[lane] = sext<24>(
bits(src0[lane], 23, 0))
489 * sext<24>(
bits(src1[lane], 23, 0));
537 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
580 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0);
625 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
681 vdst[lane] = std::fmin(src0[lane], src1[lane]);
737 vdst[lane] = std::fmax(src0[lane], src1[lane]);
780 vdst[lane] = std::min(src0[lane], src1[lane]);
823 vdst[lane] = std::max(src0[lane], src1[lane]);
866 vdst[lane] = std::min(src0[lane], src1[lane]);
909 vdst[lane] = std::max(src0[lane], src1[lane]);
954 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
999 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
1023 Wavefront *wf = gpuDynInst->wavefront();
1043 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
1067 Wavefront *wf = gpuDynInst->wavefront();
1087 vdst[lane] = src0[lane] & src1[lane];
1111 Wavefront *wf = gpuDynInst->wavefront();
1131 vdst[lane] = src0[lane] | src1[lane];
1155 Wavefront *wf = gpuDynInst->wavefront();
1177 vdst[lane] = src0[lane] | src1[lane] | src2[lane];
1201 Wavefront *wf = gpuDynInst->wavefront();
1221 vdst[lane] = src0[lane] ^ src1[lane];
1247 Wavefront *wf = gpuDynInst->wavefront();
1280 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1307 Wavefront *wf = gpuDynInst->wavefront();
1325 vdst[lane] = src0[lane] + src1[lane];
1327 + (
VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
1355 Wavefront *wf = gpuDynInst->wavefront();
1373 vdst[lane] = src0[lane] - src1[lane];
1374 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1404 Wavefront *wf = gpuDynInst->wavefront();
1422 vdst[lane] = src1[lane] - src0[lane];
1423 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1453 Wavefront *wf = gpuDynInst->wavefront();
1473 vdst[lane] = src0[lane] + src1[lane]
1478 >= 0x100000000 ? 1 : 0);
1508 Wavefront *wf = gpuDynInst->wavefront();
1528 vdst[lane] = src0[lane] - src1[lane]
1531 > src0[lane] ? 1 : 0);
1562 Wavefront *wf = gpuDynInst->wavefront();
1582 vdst[lane] = src1[lane] - src0[lane]
1585 > src0[lane] ? 1 : 0);
1719 Wavefront *wf = gpuDynInst->wavefront();
1739 vdst[lane] = src0[lane] + src1[lane];
1763 Wavefront *wf = gpuDynInst->wavefront();
1783 vdst[lane] = src0[lane] - src1[lane];
1808 Wavefront *wf = gpuDynInst->wavefront();
1828 vdst[lane] = src1[lane] - src0[lane];
1852 Wavefront *wf = gpuDynInst->wavefront();
1872 vdst[lane] = src0[lane] * src1[lane];
1896 Wavefront *wf = gpuDynInst->wavefront();
1916 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1941 Wavefront *wf = gpuDynInst->wavefront();
1967 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
1992 Wavefront *wf = gpuDynInst->wavefront();
2012 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
2079 Wavefront *wf = gpuDynInst->wavefront();
2105 vdst[lane] = std::max(src0[lane], src1[lane]);
2128 Wavefront *wf = gpuDynInst->wavefront();
2154 vdst[lane] = std::max(src0[lane], src1[lane]);
2177 Wavefront *wf = gpuDynInst->wavefront();
2203 vdst[lane] = std::min(src0[lane], src1[lane]);
2226 Wavefront *wf = gpuDynInst->wavefront();
2252 vdst[lane] = std::min(src0[lane], src1[lane]);
2295 Wavefront *wf = gpuDynInst->wavefront();
2315 vdst[lane] = src0[lane] + src1[lane];
2338 Wavefront *wf = gpuDynInst->wavefront();
2358 vdst[lane] = src0[lane] - src1[lane];
2381 Wavefront *wf = gpuDynInst->wavefront();
2401 vdst[lane] = src1[lane] - src0[lane];
2426 Wavefront *wf = gpuDynInst->wavefront();
2465 float out = std::fma(src0[lane], src1[lane], vdst[lane]);
2468 out = std::clamp(vdst[lane], 0.0f, 1.0f);
2513 Wavefront *wf = gpuDynInst->wavefront();
2521 vdst[lane] = src[lane];
2547 Wavefront *wf = gpuDynInst->wavefront();
2564 std::frexp(src[lane],&exp);
2567 }
else if (
std::isinf(src[lane]) || exp > 30) {
2568 if (std::signbit(src[lane])) {
2569 vdst[lane] = INT_MIN;
2571 vdst[lane] = INT_MAX;
2599 Wavefront *wf = gpuDynInst->wavefront();
2639 Wavefront *wf = gpuDynInst->wavefront();
2681 Wavefront *wf = gpuDynInst->wavefront();
2723 Wavefront *wf = gpuDynInst->wavefront();
2740 std::frexp(src[lane],&exp);
2744 if (std::signbit(src[lane])) {
2747 vdst[lane] = UINT_MAX;
2749 }
else if (exp > 31) {
2750 vdst[lane] = UINT_MAX;
2779 Wavefront *wf = gpuDynInst->wavefront();
2804 std::frexp(src[lane],&exp);
2807 }
else if (
std::isinf(src[lane]) || exp > 30) {
2808 if (std::signbit(src[lane])) {
2809 vdst[lane] = INT_MIN;
2811 vdst[lane] = INT_MAX;
2862 Wavefront *wf = gpuDynInst->wavefront();
2878 float tmp = src0[lane];
2880 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2881 if (neg & 1) tmp = -tmp;
2885 tmp = std::clamp(tmp, 0.0f, 1.0f);
2920 Wavefront *wf = gpuDynInst->wavefront();
2937 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2938 if (neg & 1) tmp = -tmp;
2942 out = std::clamp(out, 0.0f, 1.0f);
2955 :
Inst_VOP3A(iFmt,
"v_cvt_rpi_i32_f32", false)
2970 Wavefront *wf = gpuDynInst->wavefront();
2986 vdst[lane] = (
VecElemI32)std::floor(src[lane] + 0.5);
2996 :
Inst_VOP3A(iFmt,
"v_cvt_flr_i32_f32", false)
3011 Wavefront *wf = gpuDynInst->wavefront();
3027 vdst[lane] = (
VecElemI32)std::floor(src[lane]);
3036 :
Inst_VOP3A(iFmt,
"v_cvt_off_f32_i4", false)
3072 Wavefront *wf = gpuDynInst->wavefront();
3120 Wavefront *wf = gpuDynInst->wavefront();
3153 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte0", false)
3168 Wavefront *wf = gpuDynInst->wavefront();
3193 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte1", false)
3208 Wavefront *wf = gpuDynInst->wavefront();
3233 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte2", false)
3248 Wavefront *wf = gpuDynInst->wavefront();
3273 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte3", false)
3288 Wavefront *wf = gpuDynInst->wavefront();
3330 Wavefront *wf = gpuDynInst->wavefront();
3347 std::frexp(src[lane],&exp);
3351 if (std::signbit(src[lane])) {
3354 vdst[lane] = UINT_MAX;
3356 }
else if (exp > 31) {
3357 vdst[lane] = UINT_MAX;
3384 Wavefront *wf = gpuDynInst->wavefront();
3424 Wavefront *wf = gpuDynInst->wavefront();
3440 vdst[lane] = std::trunc(src[lane]);
3465 Wavefront *wf = gpuDynInst->wavefront();
3481 vdst[lane] = std::ceil(src[lane]);
3505 Wavefront *wf = gpuDynInst->wavefront();
3546 Wavefront *wf = gpuDynInst->wavefront();
3562 vdst[lane] = std::floor(src[lane]);
3586 Wavefront *wf = gpuDynInst->wavefront();
3603 vdst[lane] = std::modf(src[lane], &int_part);
3627 Wavefront *wf = gpuDynInst->wavefront();
3643 vdst[lane] = std::trunc(src[lane]);
3668 Wavefront *wf = gpuDynInst->wavefront();
3684 vdst[lane] = std::ceil(src[lane]);
3708 Wavefront *wf = gpuDynInst->wavefront();
3749 Wavefront *wf = gpuDynInst->wavefront();
3765 vdst[lane] = std::floor(src[lane]);
3789 Wavefront *wf = gpuDynInst->wavefront();
3805 vdst[lane] = std::pow(2.0, src[lane]);
3829 Wavefront *wf = gpuDynInst->wavefront();
3853 vdst[lane] = std::log2(src[lane]);
3877 Wavefront *wf = gpuDynInst->wavefront();
3893 vdst[lane] = 1.0 / src[lane];
3919 Wavefront *wf = gpuDynInst->wavefront();
3935 vdst[lane] = 1.0 / src[lane];
3959 Wavefront *wf = gpuDynInst->wavefront();
3975 vdst[lane] = 1.0 / std::sqrt(src[lane]);
3999 Wavefront *wf = gpuDynInst->wavefront();
4015 if (std::fpclassify(src[lane]) == FP_ZERO) {
4016 vdst[lane] = +INFINITY;
4020 if (std::signbit(src[lane])) {
4026 vdst[lane] = 1.0 / src[lane];
4051 Wavefront *wf = gpuDynInst->wavefront();
4067 if (std::fpclassify(src[lane]) == FP_ZERO) {
4068 vdst[lane] = +INFINITY;
4071 }
else if (
std::isinf(src[lane]) && !std::signbit(src[lane])) {
4073 }
else if (std::signbit(src[lane])) {
4076 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4101 Wavefront *wf = gpuDynInst->wavefront();
4117 vdst[lane] = std::sqrt(src[lane]);
4141 Wavefront *wf = gpuDynInst->wavefront();
4157 vdst[lane] = std::sqrt(src[lane]);
4183 Wavefront *wf = gpuDynInst->wavefront();
4201 vdst[lane] = std::sin(src[lane] * 2 * pi.
rawData());
4227 Wavefront *wf = gpuDynInst->wavefront();
4245 vdst[lane] = std::cos(src[lane] * 2 * pi.
rawData());
4269 Wavefront *wf = gpuDynInst->wavefront();
4285 vdst[lane] = ~src[lane];
4309 Wavefront *wf = gpuDynInst->wavefront();
4341 Wavefront *wf = gpuDynInst->wavefront();
4381 Wavefront *wf = gpuDynInst->wavefront();
4421 Wavefront *wf = gpuDynInst->wavefront();
4447 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f64", false)
4462 Wavefront *wf = gpuDynInst->wavefront();
4482 std::frexp(src[lane], &exp);
4493 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f64", false)
4508 Wavefront *wf = gpuDynInst->wavefront();
4525 vdst[lane] = std::frexp(src[lane], &exp);
4549 Wavefront *wf = gpuDynInst->wavefront();
4566 vdst[lane] = std::modf(src[lane], &int_part);
4576 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f32", false)
4595 Wavefront *wf = gpuDynInst->wavefront();
4615 std::frexp(src[lane], &exp);
4626 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f32", false)
4646 Wavefront *wf = gpuDynInst->wavefront();
4663 vdst[lane] = src[lane];
4666 vdst[lane] = std::frexp(src[lane], &exp);
4893 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f16", false)
4921 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i16_f16", false)
5092 :
Inst_VOP3A(iFmt,
"v_exp_legacy_f32", false)
5107 Wavefront *wf = gpuDynInst->wavefront();
5131 vdst[lane] = std::pow(2.0, src[lane]);
5140 :
Inst_VOP3A(iFmt,
"v_log_legacy_f32", false)
5155 Wavefront *wf = gpuDynInst->wavefront();
5163 vdst[lane] = std::log2(src[lane]);
5172 :
Inst_VOP3A(iFmt,
"v_mad_legacy_f32", false)
5188 Wavefront *wf = gpuDynInst->wavefront();
5224 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5249 Wavefront *wf = gpuDynInst->wavefront();
5285 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5309 Wavefront *wf = gpuDynInst->wavefront();
5331 vdst[lane] = sext<24>(
bits(src0[lane], 23, 0))
5332 * sext<24>(
bits(src1[lane], 23, 0)) + src2[lane];
5356 Wavefront *wf = gpuDynInst->wavefront();
5378 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0)
5487 Wavefront *wf = gpuDynInst->wavefront();
5509 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5510 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5534 Wavefront *wf = gpuDynInst->wavefront();
5556 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5557 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5562 if (vdst[lane] >> (
bits(src2[lane], 4, 0) - 1)) {
5563 vdst[lane] |= 0xffffffff <<
bits(src2[lane], 4, 0);
5587 Wavefront *wf = gpuDynInst->wavefront();
5609 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
5635 Wavefront *wf = gpuDynInst->wavefront();
5671 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5696 Wavefront *wf = gpuDynInst->wavefront();
5732 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5761 Wavefront *wf = gpuDynInst->wavefront();
5783 vdst[lane] = ((
bits(src0[lane], 31, 24)
5784 +
bits(src1[lane], 31, 24) +
bits(src2[lane], 24)) >> 1)
5786 vdst[lane] += ((
bits(src0[lane], 23, 16)
5787 +
bits(src1[lane], 23, 16) +
bits(src2[lane], 16)) >> 1)
5789 vdst[lane] += ((
bits(src0[lane], 15, 8)
5790 +
bits(src1[lane], 15, 8) +
bits(src2[lane], 8)) >> 1)
5792 vdst[lane] += ((
bits(src0[lane], 7, 0) +
bits(src1[lane], 7, 0)
5793 +
bits(src2[lane], 0)) >> 1);
5816 Wavefront *wf = gpuDynInst->wavefront();
5864 Wavefront *wf = gpuDynInst->wavefront();
5914 Wavefront *wf = gpuDynInst->wavefront();
5950 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
5951 vdst[lane] = std::fmin(min_0_1, src2[lane]);
5974 Wavefront *wf = gpuDynInst->wavefront();
5996 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
5997 vdst[lane] = std::min(min_0_1, src2[lane]);
6020 Wavefront *wf = gpuDynInst->wavefront();
6042 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
6043 vdst[lane] = std::min(min_0_1, src2[lane]);
6067 Wavefront *wf = gpuDynInst->wavefront();
6103 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
6104 vdst[lane] = std::fmax(max_0_1, src2[lane]);
6127 Wavefront *wf = gpuDynInst->wavefront();
6149 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
6150 vdst[lane] = std::max(max_0_1, src2[lane]);
6173 Wavefront *wf = gpuDynInst->wavefront();
6195 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
6196 vdst[lane] = std::max(max_0_1, src2[lane]);
6220 Wavefront *wf = gpuDynInst->wavefront();
6256 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6279 Wavefront *wf = gpuDynInst->wavefront();
6301 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6324 Wavefront *wf = gpuDynInst->wavefront();
6346 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6372 Wavefront *wf = gpuDynInst->wavefront();
6394 vdst[lane] = std::abs(
bits(src0[lane], 31, 24)
6395 -
bits(src1[lane], 31, 24))
6396 + std::abs(
bits(src0[lane], 23, 16)
6397 -
bits(src1[lane], 23, 16))
6398 + std::abs(
bits(src0[lane], 15, 8)
6399 -
bits(src1[lane], 15, 8))
6400 + std::abs(
bits(src0[lane], 7, 0)
6401 -
bits(src1[lane], 7, 0)) + src2[lane];
6425 Wavefront *wf = gpuDynInst->wavefront();
6447 vdst[lane] = (((
bits(src0[lane], 31, 24)
6448 -
bits(src1[lane], 31, 24)) + (
bits(src0[lane], 23, 16)
6449 -
bits(src1[lane], 23, 16)) + (
bits(src0[lane], 15, 8)
6450 -
bits(src1[lane], 15, 8)) + (
bits(src0[lane], 7, 0)
6451 -
bits(src1[lane], 7, 0))) << 16) + src2[lane];
6476 Wavefront *wf = gpuDynInst->wavefront();
6498 vdst[lane] = std::abs(
bits(src0[lane], 31, 16)
6499 -
bits(src1[lane], 31, 16))
6500 + std::abs(
bits(src0[lane], 15, 0)
6501 -
bits(src1[lane], 15, 0)) + src2[lane];
6525 Wavefront *wf = gpuDynInst->wavefront();
6547 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
6574 Wavefront *wf = gpuDynInst->wavefront();
6603 vdst[lane] = (((
VecElemU8)src0[lane] & 0xff)
6604 << (8 *
bits(src1[lane], 1, 0)))
6605 | (src2[lane] & ~(0xff << (8 *
bits(src1[lane], 1, 0))));
6631 Wavefront *wf = gpuDynInst->wavefront();
6667 if (std::fpclassify(src1[lane]) == FP_ZERO) {
6668 if (std::signbit(src1[lane])) {
6669 vdst[lane] = -INFINITY;
6671 vdst[lane] = +INFINITY;
6676 if (std::signbit(src1[lane])) {
6677 vdst[lane] = -INFINITY;
6679 vdst[lane] = +INFINITY;
6682 vdst[lane] = src2[lane] / src1[lane];
6709 Wavefront *wf = gpuDynInst->wavefront();
6745 int sign_out = std::signbit(src1[lane])
6746 ^ std::signbit(src2[lane]);
6749 std::frexp(src1[lane], &exp1);
6750 std::frexp(src2[lane], &exp2);
6753 vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
6754 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6755 && std::fpclassify(src2[lane]) == FP_ZERO) {
6757 = std::numeric_limits<VecElemF64>::signaling_NaN();
6760 = std::numeric_limits<VecElemF64>::signaling_NaN();
6761 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6763 vdst[lane] = sign_out ? -INFINITY : +INFINITY;
6765 || std::fpclassify(src2[lane]) == FP_ZERO) {
6766 vdst[lane] = sign_out ? -0.0 : +0.0;
6767 }
else if (exp2 - exp1 < -1075) {
6768 vdst[lane] = src0[lane];
6769 }
else if (exp1 == 2047) {
6770 vdst[lane] = src0[lane];
6772 vdst[lane] = sign_out ? -std::fabs(src0[lane])
6773 : std::fabs(src0[lane]);
6804 Wavefront *wf = gpuDynInst->wavefront();
6829 vdst[lane] = src0[lane];
6861 Wavefront *wf = gpuDynInst->wavefront();
6888 std::frexp(src1[lane], &exp1);
6889 std::frexp(src2[lane], &exp2);
6892 if (std::fpclassify(src1[lane]) == FP_ZERO
6893 || std::fpclassify(src2[lane]) == FP_ZERO) {
6895 }
else if (exp2 - exp1 >= 768) {
6897 if (src0[lane] == src1[lane]) {
6898 vdst[lane] = std::ldexp(src0[lane], 128);
6900 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
6901 vdst[lane] = std::ldexp(src0[lane], 128);
6902 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
6903 && std::fpclassify(src2[lane] / src1[lane])
6906 if (src0[lane] == src1[lane]) {
6907 vdst[lane] = std::ldexp(src0[lane], 128);
6909 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
6910 vdst[lane] = std::ldexp(src0[lane], -128);
6911 }
else if (std::fpclassify(src2[lane] / src1[lane])
6914 if (src0[lane] == src2[lane]) {
6915 vdst[lane] = std::ldexp(src0[lane], 128);
6917 }
else if (exp2 <= 53) {
6918 vdst[lane] = std::ldexp(src0[lane], 128);
6947 Wavefront *wf = gpuDynInst->wavefront();
6983 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7010 Wavefront *wf = gpuDynInst->wavefront();
7049 vdst[lane] = std::pow(2, 64)
7050 * std::fma(src0[lane], src1[lane], src2[lane]);
7052 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7081 :
Inst_VOP3A(iFmt,
"v_qsad_pk_u16_u8", false)
7102 :
Inst_VOP3A(iFmt,
"v_mqsad_pk_u16_u8", false)
7159 Wavefront *wf = gpuDynInst->wavefront();
7180 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7208 Wavefront *wf = gpuDynInst->wavefront();
7228 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7253 Wavefront *wf = gpuDynInst->wavefront();
7275 vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane];
7298 Wavefront *wf = gpuDynInst->wavefront();
7320 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7344 Wavefront *wf = gpuDynInst->wavefront();
7367 (src0[lane] + src1[lane]) <<
bits(src2[lane], 4, 0);
7390 Wavefront *wf = gpuDynInst->wavefront();
7412 vdst[lane] = src0[lane] + src1[lane] + src2[lane];
7435 Wavefront *wf = gpuDynInst->wavefront();
7457 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7482 Wavefront *wf = gpuDynInst->wavefront();
7504 vdst[lane] = (src0[lane] & src1[lane]) | src2[lane];
7551 Wavefront *wf = gpuDynInst->wavefront();
7573 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7598 Wavefront *wf = gpuDynInst->wavefront();
7620 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7656 Wavefront *wf = gpuDynInst->wavefront();
7672 DPRINTF(VEGA,
"Executing v_perm_b32 src_0 0x%08x, src_1 "
7673 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
7674 src1[lane], src2[lane], vdst[lane]);
7677 for (
int i = 0;
i < 4 ; ++
i) {
7680 vdst[lane] |= (permuted_val << (8 *
i));
7683 DPRINTF(VEGA,
"v_perm result: 0x%08x\n", vdst[lane]);
7776 Wavefront *wf = gpuDynInst->wavefront();
7798 int shift_amount =
bits(src1[lane], 2, 0);
7799 shift_amount = shift_amount > 4 ? 0 : shift_amount;
7800 vdst[lane] = (src0[lane] << shift_amount)
7811 :
Inst_VOP3A(iFmt,
"v_cvt_pkaccum_u8_f32", false)
7887 :
Inst_VOP3A(iFmt,
"v_interp_mov_f32", false)
7908 :
Inst_VOP3A(iFmt,
"v_interp_p1ll_f16", false)
7937 :
Inst_VOP3A(iFmt,
"v_interp_p1lv_f16", false)
8008 Wavefront *wf = gpuDynInst->wavefront();
8045 if (std::signbit(src0[lane]) !=
8046 std::signbit(src1[lane])) {
8049 vdst[lane] = src0[lane];
8052 vdst[lane] = src0[lane];
8054 vdst[lane] = src1[lane];
8055 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8056 std::fpclassify(src0[lane]) == FP_ZERO) {
8057 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8058 std::fpclassify(src1[lane]) == FP_ZERO) {
8059 if (std::signbit(src0[lane]) &&
8060 std::signbit(src1[lane])) {
8066 vdst[lane] = src1[lane];
8068 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8069 std::fpclassify(src1[lane]) == FP_ZERO) {
8070 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8071 std::fpclassify(src0[lane]) == FP_ZERO) {
8072 if (std::signbit(src0[lane]) &&
8073 std::signbit(src1[lane])) {
8079 vdst[lane] = src0[lane];
8082 vdst[lane] = src0[lane] + src1[lane];
8107 Wavefront *wf = gpuDynInst->wavefront();
8142 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8143 std::fpclassify(src0[lane]) == FP_ZERO) &&
8144 !std::signbit(src0[lane])) {
8147 }
else if (!std::signbit(src1[lane])) {
8152 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8153 std::fpclassify(src0[lane]) == FP_ZERO) &&
8154 std::signbit(src0[lane])) {
8157 }
else if (std::signbit(src1[lane])) {
8163 !std::signbit(src0[lane])) {
8164 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8165 std::fpclassify(src1[lane]) == FP_ZERO) {
8167 }
else if (!std::signbit(src1[lane])) {
8168 vdst[lane] = +INFINITY;
8170 vdst[lane] = -INFINITY;
8173 std::signbit(src0[lane])) {
8174 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8175 std::fpclassify(src1[lane]) == FP_ZERO) {
8177 }
else if (std::signbit(src1[lane])) {
8178 vdst[lane] = +INFINITY;
8180 vdst[lane] = -INFINITY;
8183 vdst[lane] = src0[lane] * src1[lane];
8208 Wavefront *wf = gpuDynInst->wavefront();
8240 vdst[lane] = std::fmin(src0[lane], src1[lane]);
8264 Wavefront *wf = gpuDynInst->wavefront();
8296 vdst[lane] = std::fmax(src0[lane], src1[lane]);
8320 Wavefront *wf = gpuDynInst->wavefront();
8347 vdst[lane] = src0[lane];
8348 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
8349 || std::fpclassify(src0[lane]) == FP_ZERO) {
8350 if (std::signbit(src0[lane])) {
8356 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
8380 Wavefront *wf = gpuDynInst->wavefront();
8402 vdst[lane] = (
VecElemU32)((s0 * s1) & 0xffffffffLL);
8425 Wavefront *wf = gpuDynInst->wavefront();
8448 = (
VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
8471 Wavefront *wf = gpuDynInst->wavefront();
8494 = (
VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
8518 Wavefront *wf = gpuDynInst->wavefront();
8536 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
8569 sdst = src0[src1.
rawData() & 0x3f];
8624 Wavefront *wf = gpuDynInst->wavefront();
8644 vdst[lane] =
popCount(src0[lane]) + src1[lane];
8654 :
Inst_VOP3A(iFmt,
"v_mbcnt_lo_u32_b32", false)
8671 Wavefront *wf = gpuDynInst->wavefront();
8675 uint64_t threadMask = 0;
8692 threadMask = ((1ULL << lane) - 1ULL);
8693 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 31, 0)) +
8704 :
Inst_VOP3A(iFmt,
"v_mbcnt_hi_u32_b32", false)
8721 Wavefront *wf = gpuDynInst->wavefront();
8725 uint64_t threadMask = 0;
8742 threadMask = ((1ULL << lane) - 1ULL);
8743 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 63, 32)) +
8768 Wavefront *wf = gpuDynInst->wavefront();
8788 vdst[lane] = src1[lane] <<
bits(src0[lane], 5, 0);
8813 Wavefront *wf = gpuDynInst->wavefront();
8833 vdst[lane] = src1[lane] >>
bits(src0[lane], 5, 0);
8858 Wavefront *wf = gpuDynInst->wavefront();
8879 = src1[lane] >>
bits(src0[lane], 5, 0);
8888 :
Inst_VOP3A(iFmt,
"v_trig_preop_f64", false)
8928 Wavefront *wf = gpuDynInst->wavefront();
8948 vdst[lane] = ((1 <<
bits(src0[lane], 4, 0)) - 1)
8949 <<
bits(src1[lane], 4, 0);
8959 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_i16_f32", false)
8980 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_u16_f32", false)
9001 :
Inst_VOP3A(iFmt,
"v_cvt_pkrtz_f16_f32", false)
9024 :
Inst_VOP3A(iFmt,
"v_cvt_pk_u16_u32", false)
9043 :
Inst_VOP3A(iFmt,
"v_cvt_pk_i16_i32", false)
9062 :
Inst_VOP3A(iFmt,
"v_cvt_pk_fp8_f32", false)
9074 Wavefront *wf = gpuDynInst->wavefront();
9096 if ((abs & 1) && (tmp0 < 0)) tmp0 = -tmp0;
9097 if ((abs & 2) && (tmp1 < 0)) tmp1 = -tmp1;
9098 if (neg & 1) tmp0 = -tmp0;
9099 if (neg & 2) tmp1 = -tmp1;
9101 uint16_t packed_data = (
bits(tmp0.data, 31, 24) << 8)
const std::string _opcode
T omodModifier(T val, unsigned omod)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_ADDC_CO_U32()
Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F16(InFmt_VOP3A *)
Inst_VOP3__V_ADD_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F64(InFmt_VOP3A *)
~Inst_VOP3__V_ADD_LSHL_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *)
Inst_VOP3__V_ADD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ALIGNBIT_B32()
Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_ALIGNBYTE_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_AND_B32(InFmt_VOP3A *)
Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_AND_OR_B32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I16()
Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I64()
Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BCNT_U32_B32()
Inst_VOP3__V_BFE_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFE_U32(InFmt_VOP3A *)
Inst_VOP3__V_BFI_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BFREV_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CLREXCP(InFmt_VOP3A *)
Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *)
~Inst_VOP3__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F32(InFmt_VOP3A *)
Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBEID_F32()
~Inst_VOP3__V_CUBEMA_F32()
Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBESC_F32()
Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CUBETC_F32()
~Inst_VOP3__V_CVT_F16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F16_I16()
Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F16_U16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_F16()
~Inst_VOP3__V_CVT_F32_F64()
Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_U32()
Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE0()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE1()
Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE2()
Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE3()
Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F64_F32()
Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F64_I32()
~Inst_VOP3__V_CVT_F64_U32()
Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_FLR_I32_F32()
Inst_VOP3__V_CVT_FLR_I32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I16_F16()
Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_I32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_OFF_F32_I4()
Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *)
Inst_VOP3__V_CVT_PKACCUM_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_I16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKNORM_I16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_U16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PKNORM_U16_F32()
~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
Inst_VOP3__V_CVT_PKRTZ_F16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_FP8_F32()
Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_I16_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U16_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U8_F32()
Inst_VOP3__V_CVT_RPI_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_RPI_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_U16_F16()
Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_U32_F32()
~Inst_VOP3__V_CVT_U32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FIXUP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *)
Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FIXUP_F32()
~Inst_VOP3__V_DIV_FIXUP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FMAS_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FMAS_F64()
Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_SCALE_F32()
Inst_VOP3__V_DIV_SCALE_F32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_SCALE_F64()
Inst_VOP3__V_DIV_SCALE_F64(InFmt_VOP3B *)
Inst_VOP3__V_EXP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_EXP_LEGACY_F32()
Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FLOOR_F16()
Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *)
Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F64()
Inst_VOP3__V_FMAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F32(InFmt_VOP3A *)
Inst_VOP3__V_FMA_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F16()
Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F32()
Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FRACT_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I16_F16()
Inst_VOP3__V_FREXP_EXP_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FREXP_EXP_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_EXP_I32_F64()
Inst_VOP3__V_FREXP_EXP_I32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F16()
Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_MANT_F32()
Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F64()
Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_MOV_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LL_F16(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_P1LL_F16()
~Inst_VOP3__V_INTERP_P1LV_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LV_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P1_F32()
Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *)
Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F32()
Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LERP_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LOG_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B32()
Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B64()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHL_ADD_U32()
Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_ADD_U64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A *)
Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_OR_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B16()
~Inst_VOP3__V_LSHRREV_B32()
Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B64()
Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAC_F16(InFmt_VOP3A *)
Inst_VOP3__V_MAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAD_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_I32_I24()
~Inst_VOP3__V_MAD_I64_I32()
Inst_VOP3__V_MAD_I64_I32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_LEGACY_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_U16(InFmt_VOP3A *)
Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_U32_U24()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_U64_U32()
Inst_VOP3__V_MAD_U64_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MBCNT_HI_U32_B32()
Inst_VOP3__V_MBCNT_HI_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MBCNT_LO_U32_B32()
Inst_VOP3__V_MBCNT_LO_U32_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MED3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F32(InFmt_VOP3A *)
Inst_VOP3__V_MIN_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_U16(InFmt_VOP3A *)
Inst_VOP3__V_MIN_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MOV_FED_B32()
~Inst_VOP3__V_MQSAD_PK_U16_U8()
Inst_VOP3__V_MQSAD_PK_U16_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MQSAD_U32_U8()
Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_F32(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32_I24()
Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_HI_U32_U24()
Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_U32()
Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_I32_I24()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LEGACY_F32()
Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_LO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_U32_U24()
Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOP(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOT_B32(InFmt_VOP3A *)
Inst_VOP3__V_OR3_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_OR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
uint8_t permute(uint64_t in_dword2x, uint32_t sel)
Inst_VOP3__V_PERM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_QSAD_PK_U16_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *)
Inst_VOP3__V_RCP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RCP_IFLAG_F32()
Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_READLANE_B32()
Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F16()
Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *)
~Inst_VOP3__V_RNDNE_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F64()
Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *)
Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *)
Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *)
~Inst_VOP3__V_SAD_HI_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SIN_F16(InFmt_VOP3A *)
Inst_VOP3__V_SIN_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *)
Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBBREV_CO_U32()
Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_SUBB_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_CO_U32()
Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B *)
Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_U32()
Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUB_CO_U32()
Inst_VOP3__V_SUB_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUB_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_TRIG_PREOP_F64()
Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *)
Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F16()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F64()
Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_WRITELANE_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_XAD_U32(InFmt_VOP3A *)
Inst_VOP3__V_XOR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr int popCount(uint64_t val)
Returns the number of set ones in the provided value.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
ScalarRegI32 findFirstOne(T val)
T median(T val_0, T val_1, T val_2)
ScalarRegI32 findFirstOneMsb(T val)
T roundNearestEven(T val)
VecElemU32 muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1, VecElemU64 val_2)
Bitfield< 31, 16 > selector
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)