73 ? src1[lane] : src0[lane];
129 vdst[lane] = src0[lane] + src1[lane];
186 vdst[lane] = src0[lane] - src1[lane];
243 vdst[lane] = src1[lane] - src0[lane];
302 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
303 std::fpclassify(src0[lane]) == FP_ZERO) &&
304 !std::signbit(src0[lane])) {
307 }
else if (!std::signbit(src1[lane])) {
312 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
313 std::fpclassify(src0[lane]) == FP_ZERO) &&
314 std::signbit(src0[lane])) {
317 }
else if (std::signbit(src1[lane])) {
323 !std::signbit(src0[lane])) {
324 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
325 std::fpclassify(src1[lane]) == FP_ZERO) {
327 }
else if (!std::signbit(src1[lane])) {
328 vdst[lane] = +INFINITY;
330 vdst[lane] = -INFINITY;
333 std::signbit(src0[lane])) {
334 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
335 std::fpclassify(src1[lane]) == FP_ZERO) {
337 }
else if (std::signbit(src1[lane])) {
338 vdst[lane] = +INFINITY;
340 vdst[lane] = -INFINITY;
343 vdst[lane] = src0[lane] * src1[lane];
403 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
404 std::fpclassify(src0[lane]) == FP_ZERO) &&
405 !std::signbit(src0[lane])) {
408 }
else if (!std::signbit(src1[lane])) {
413 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
414 std::fpclassify(src0[lane]) == FP_ZERO) &&
415 std::signbit(src0[lane])) {
418 }
else if (std::signbit(src1[lane])) {
424 !std::signbit(src0[lane])) {
425 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
426 std::fpclassify(src1[lane]) == FP_ZERO) {
428 }
else if (!std::signbit(src1[lane])) {
429 vdst[lane] = +INFINITY;
431 vdst[lane] = -INFINITY;
434 std::signbit(src0[lane])) {
435 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
436 std::fpclassify(src1[lane]) == FP_ZERO) {
438 }
else if (std::signbit(src1[lane])) {
439 vdst[lane] = +INFINITY;
441 vdst[lane] = -INFINITY;
444 vdst[lane] = src0[lane] * src1[lane];
537 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
580 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0);
625 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
681 vdst[lane] = std::fmin(src0[lane], src1[lane]);
737 vdst[lane] = std::fmax(src0[lane], src1[lane]);
780 vdst[lane] = std::min(src0[lane], src1[lane]);
823 vdst[lane] = std::max(src0[lane], src1[lane]);
866 vdst[lane] = std::min(src0[lane], src1[lane]);
909 vdst[lane] = std::max(src0[lane], src1[lane]);
954 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
999 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
1023 Wavefront *wf = gpuDynInst->wavefront();
1043 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
1067 Wavefront *wf = gpuDynInst->wavefront();
1087 vdst[lane] = src0[lane] & src1[lane];
1111 Wavefront *wf = gpuDynInst->wavefront();
1131 vdst[lane] = src0[lane] | src1[lane];
1155 Wavefront *wf = gpuDynInst->wavefront();
1177 vdst[lane] = src0[lane] | src1[lane] | src2[lane];
1201 Wavefront *wf = gpuDynInst->wavefront();
1221 vdst[lane] = src0[lane] ^ src1[lane];
1247 Wavefront *wf = gpuDynInst->wavefront();
1280 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1307 Wavefront *wf = gpuDynInst->wavefront();
1325 vdst[lane] = src0[lane] + src1[lane];
1327 + (
VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
1355 Wavefront *wf = gpuDynInst->wavefront();
1373 vdst[lane] = src0[lane] - src1[lane];
1374 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1404 Wavefront *wf = gpuDynInst->wavefront();
1422 vdst[lane] = src1[lane] - src0[lane];
1423 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1453 Wavefront *wf = gpuDynInst->wavefront();
1473 vdst[lane] = src0[lane] + src1[lane]
1478 >= 0x100000000 ? 1 : 0);
1508 Wavefront *wf = gpuDynInst->wavefront();
1528 vdst[lane] = src0[lane] - src1[lane]
1531 > src0[lane] ? 1 : 0);
1562 Wavefront *wf = gpuDynInst->wavefront();
1582 vdst[lane] = src1[lane] - src0[lane]
1585 > src0[lane] ? 1 : 0);
1719 Wavefront *wf = gpuDynInst->wavefront();
1739 vdst[lane] = src0[lane] + src1[lane];
1763 Wavefront *wf = gpuDynInst->wavefront();
1783 vdst[lane] = src0[lane] - src1[lane];
1808 Wavefront *wf = gpuDynInst->wavefront();
1828 vdst[lane] = src1[lane] - src0[lane];
1852 Wavefront *wf = gpuDynInst->wavefront();
1872 vdst[lane] = src0[lane] * src1[lane];
1896 Wavefront *wf = gpuDynInst->wavefront();
1916 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1941 Wavefront *wf = gpuDynInst->wavefront();
1967 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
1992 Wavefront *wf = gpuDynInst->wavefront();
2012 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
2079 Wavefront *wf = gpuDynInst->wavefront();
2105 vdst[lane] = std::max(src0[lane], src1[lane]);
2128 Wavefront *wf = gpuDynInst->wavefront();
2154 vdst[lane] = std::max(src0[lane], src1[lane]);
2177 Wavefront *wf = gpuDynInst->wavefront();
2203 vdst[lane] = std::min(src0[lane], src1[lane]);
2226 Wavefront *wf = gpuDynInst->wavefront();
2252 vdst[lane] = std::min(src0[lane], src1[lane]);
2295 Wavefront *wf = gpuDynInst->wavefront();
2315 vdst[lane] = src0[lane] + src1[lane];
2338 Wavefront *wf = gpuDynInst->wavefront();
2358 vdst[lane] = src0[lane] - src1[lane];
2381 Wavefront *wf = gpuDynInst->wavefront();
2401 vdst[lane] = src1[lane] - src0[lane];
2426 Wavefront *wf = gpuDynInst->wavefront();
2465 float out = std::fma(src0[lane], src1[lane], vdst[lane]);
2467 out = std::clamp(vdst[lane], 0.0f, 1.0f);
2511 Wavefront *wf = gpuDynInst->wavefront();
2519 vdst[lane] = src[lane];
2545 Wavefront *wf = gpuDynInst->wavefront();
2562 std::frexp(src[lane],&exp);
2565 }
else if (
std::isinf(src[lane]) || exp > 30) {
2566 if (std::signbit(src[lane])) {
2567 vdst[lane] = INT_MIN;
2569 vdst[lane] = INT_MAX;
2597 Wavefront *wf = gpuDynInst->wavefront();
2637 Wavefront *wf = gpuDynInst->wavefront();
2679 Wavefront *wf = gpuDynInst->wavefront();
2721 Wavefront *wf = gpuDynInst->wavefront();
2738 std::frexp(src[lane],&exp);
2742 if (std::signbit(src[lane])) {
2745 vdst[lane] = UINT_MAX;
2747 }
else if (exp > 31) {
2748 vdst[lane] = UINT_MAX;
2777 Wavefront *wf = gpuDynInst->wavefront();
2802 std::frexp(src[lane],&exp);
2805 }
else if (
std::isinf(src[lane]) || exp > 30) {
2806 if (std::signbit(src[lane])) {
2807 vdst[lane] = INT_MIN;
2809 vdst[lane] = INT_MAX;
2860 Wavefront *wf = gpuDynInst->wavefront();
2876 float tmp = src0[lane];
2878 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2879 if (neg & 1) tmp = -tmp;
2882 tmp = std::clamp(tmp, 0.0f, 1.0f);
2916 Wavefront *wf = gpuDynInst->wavefront();
2933 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2934 if (neg & 1) tmp = -tmp;
2937 out = std::clamp(out, 0.0f, 1.0f);
2949 :
Inst_VOP3A(iFmt,
"v_cvt_rpi_i32_f32", false)
2964 Wavefront *wf = gpuDynInst->wavefront();
2980 vdst[lane] = (
VecElemI32)std::floor(src[lane] + 0.5);
2990 :
Inst_VOP3A(iFmt,
"v_cvt_flr_i32_f32", false)
3005 Wavefront *wf = gpuDynInst->wavefront();
3021 vdst[lane] = (
VecElemI32)std::floor(src[lane]);
3030 :
Inst_VOP3A(iFmt,
"v_cvt_off_f32_i4", false)
3066 Wavefront *wf = gpuDynInst->wavefront();
3114 Wavefront *wf = gpuDynInst->wavefront();
3147 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte0", false)
3162 Wavefront *wf = gpuDynInst->wavefront();
3187 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte1", false)
3202 Wavefront *wf = gpuDynInst->wavefront();
3227 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte2", false)
3242 Wavefront *wf = gpuDynInst->wavefront();
3267 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte3", false)
3282 Wavefront *wf = gpuDynInst->wavefront();
3324 Wavefront *wf = gpuDynInst->wavefront();
3341 std::frexp(src[lane],&exp);
3345 if (std::signbit(src[lane])) {
3348 vdst[lane] = UINT_MAX;
3350 }
else if (exp > 31) {
3351 vdst[lane] = UINT_MAX;
3378 Wavefront *wf = gpuDynInst->wavefront();
3418 Wavefront *wf = gpuDynInst->wavefront();
3434 vdst[lane] = std::trunc(src[lane]);
3459 Wavefront *wf = gpuDynInst->wavefront();
3475 vdst[lane] = std::ceil(src[lane]);
3499 Wavefront *wf = gpuDynInst->wavefront();
3540 Wavefront *wf = gpuDynInst->wavefront();
3556 vdst[lane] = std::floor(src[lane]);
3580 Wavefront *wf = gpuDynInst->wavefront();
3597 vdst[lane] = std::modf(src[lane], &int_part);
3621 Wavefront *wf = gpuDynInst->wavefront();
3637 vdst[lane] = std::trunc(src[lane]);
3662 Wavefront *wf = gpuDynInst->wavefront();
3678 vdst[lane] = std::ceil(src[lane]);
3702 Wavefront *wf = gpuDynInst->wavefront();
3743 Wavefront *wf = gpuDynInst->wavefront();
3759 vdst[lane] = std::floor(src[lane]);
3783 Wavefront *wf = gpuDynInst->wavefront();
3799 vdst[lane] = std::pow(2.0, src[lane]);
3823 Wavefront *wf = gpuDynInst->wavefront();
3847 vdst[lane] = std::log2(src[lane]);
3871 Wavefront *wf = gpuDynInst->wavefront();
3887 vdst[lane] = 1.0 / src[lane];
3913 Wavefront *wf = gpuDynInst->wavefront();
3929 vdst[lane] = 1.0 / src[lane];
3953 Wavefront *wf = gpuDynInst->wavefront();
3969 vdst[lane] = 1.0 / std::sqrt(src[lane]);
3993 Wavefront *wf = gpuDynInst->wavefront();
4009 if (std::fpclassify(src[lane]) == FP_ZERO) {
4010 vdst[lane] = +INFINITY;
4014 if (std::signbit(src[lane])) {
4020 vdst[lane] = 1.0 / src[lane];
4045 Wavefront *wf = gpuDynInst->wavefront();
4061 if (std::fpclassify(src[lane]) == FP_ZERO) {
4062 vdst[lane] = +INFINITY;
4065 }
else if (
std::isinf(src[lane]) && !std::signbit(src[lane])) {
4067 }
else if (std::signbit(src[lane])) {
4070 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4095 Wavefront *wf = gpuDynInst->wavefront();
4111 vdst[lane] = std::sqrt(src[lane]);
4135 Wavefront *wf = gpuDynInst->wavefront();
4151 vdst[lane] = std::sqrt(src[lane]);
4177 Wavefront *wf = gpuDynInst->wavefront();
4195 vdst[lane] = std::sin(src[lane] * 2 * pi.
rawData());
4221 Wavefront *wf = gpuDynInst->wavefront();
4239 vdst[lane] = std::cos(src[lane] * 2 * pi.
rawData());
4263 Wavefront *wf = gpuDynInst->wavefront();
4279 vdst[lane] = ~src[lane];
4303 Wavefront *wf = gpuDynInst->wavefront();
4335 Wavefront *wf = gpuDynInst->wavefront();
4375 Wavefront *wf = gpuDynInst->wavefront();
4415 Wavefront *wf = gpuDynInst->wavefront();
4441 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f64", false)
4456 Wavefront *wf = gpuDynInst->wavefront();
4476 std::frexp(src[lane], &exp);
4487 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f64", false)
4502 Wavefront *wf = gpuDynInst->wavefront();
4519 vdst[lane] = std::frexp(src[lane], &exp);
4543 Wavefront *wf = gpuDynInst->wavefront();
4560 vdst[lane] = std::modf(src[lane], &int_part);
4570 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f32", false)
4589 Wavefront *wf = gpuDynInst->wavefront();
4609 std::frexp(src[lane], &exp);
4620 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f32", false)
4640 Wavefront *wf = gpuDynInst->wavefront();
4657 vdst[lane] = src[lane];
4660 vdst[lane] = std::frexp(src[lane], &exp);
4887 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f16", false)
4915 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i16_f16", false)
5086 :
Inst_VOP3A(iFmt,
"v_exp_legacy_f32", false)
5101 Wavefront *wf = gpuDynInst->wavefront();
5125 vdst[lane] = std::pow(2.0, src[lane]);
5134 :
Inst_VOP3A(iFmt,
"v_log_legacy_f32", false)
5149 Wavefront *wf = gpuDynInst->wavefront();
5157 vdst[lane] = std::log2(src[lane]);
5166 :
Inst_VOP3A(iFmt,
"v_mad_legacy_f32", false)
5182 Wavefront *wf = gpuDynInst->wavefront();
5218 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5243 Wavefront *wf = gpuDynInst->wavefront();
5279 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5303 Wavefront *wf = gpuDynInst->wavefront();
5350 Wavefront *wf = gpuDynInst->wavefront();
5372 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0)
5481 Wavefront *wf = gpuDynInst->wavefront();
5503 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5504 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5528 Wavefront *wf = gpuDynInst->wavefront();
5550 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5551 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5556 if (vdst[lane] >> (
bits(src2[lane], 4, 0) - 1)) {
5557 vdst[lane] |= 0xffffffff <<
bits(src2[lane], 4, 0);
5581 Wavefront *wf = gpuDynInst->wavefront();
5603 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
5629 Wavefront *wf = gpuDynInst->wavefront();
5665 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5690 Wavefront *wf = gpuDynInst->wavefront();
5726 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5755 Wavefront *wf = gpuDynInst->wavefront();
5777 vdst[lane] = ((
bits(src0[lane], 31, 24)
5778 +
bits(src1[lane], 31, 24) +
bits(src2[lane], 24)) >> 1)
5780 vdst[lane] += ((
bits(src0[lane], 23, 16)
5781 +
bits(src1[lane], 23, 16) +
bits(src2[lane], 16)) >> 1)
5783 vdst[lane] += ((
bits(src0[lane], 15, 8)
5784 +
bits(src1[lane], 15, 8) +
bits(src2[lane], 8)) >> 1)
5786 vdst[lane] += ((
bits(src0[lane], 7, 0) +
bits(src1[lane], 7, 0)
5787 +
bits(src2[lane], 0)) >> 1);
5810 Wavefront *wf = gpuDynInst->wavefront();
5858 Wavefront *wf = gpuDynInst->wavefront();
5908 Wavefront *wf = gpuDynInst->wavefront();
5944 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
5945 vdst[lane] = std::fmin(min_0_1, src2[lane]);
5968 Wavefront *wf = gpuDynInst->wavefront();
5990 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
5991 vdst[lane] = std::min(min_0_1, src2[lane]);
6014 Wavefront *wf = gpuDynInst->wavefront();
6036 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
6037 vdst[lane] = std::min(min_0_1, src2[lane]);
6061 Wavefront *wf = gpuDynInst->wavefront();
6097 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
6098 vdst[lane] = std::fmax(max_0_1, src2[lane]);
6121 Wavefront *wf = gpuDynInst->wavefront();
6143 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
6144 vdst[lane] = std::max(max_0_1, src2[lane]);
6167 Wavefront *wf = gpuDynInst->wavefront();
6189 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
6190 vdst[lane] = std::max(max_0_1, src2[lane]);
6214 Wavefront *wf = gpuDynInst->wavefront();
6250 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6273 Wavefront *wf = gpuDynInst->wavefront();
6295 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6318 Wavefront *wf = gpuDynInst->wavefront();
6340 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6366 Wavefront *wf = gpuDynInst->wavefront();
6388 vdst[lane] = std::abs(
bits(src0[lane], 31, 24)
6389 -
bits(src1[lane], 31, 24))
6390 + std::abs(
bits(src0[lane], 23, 16)
6391 -
bits(src1[lane], 23, 16))
6392 + std::abs(
bits(src0[lane], 15, 8)
6393 -
bits(src1[lane], 15, 8))
6394 + std::abs(
bits(src0[lane], 7, 0)
6395 -
bits(src1[lane], 7, 0)) + src2[lane];
6419 Wavefront *wf = gpuDynInst->wavefront();
6441 vdst[lane] = (((
bits(src0[lane], 31, 24)
6442 -
bits(src1[lane], 31, 24)) + (
bits(src0[lane], 23, 16)
6443 -
bits(src1[lane], 23, 16)) + (
bits(src0[lane], 15, 8)
6444 -
bits(src1[lane], 15, 8)) + (
bits(src0[lane], 7, 0)
6445 -
bits(src1[lane], 7, 0))) << 16) + src2[lane];
6470 Wavefront *wf = gpuDynInst->wavefront();
6492 vdst[lane] = std::abs(
bits(src0[lane], 31, 16)
6493 -
bits(src1[lane], 31, 16))
6494 + std::abs(
bits(src0[lane], 15, 0)
6495 -
bits(src1[lane], 15, 0)) + src2[lane];
6519 Wavefront *wf = gpuDynInst->wavefront();
6541 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
6568 Wavefront *wf = gpuDynInst->wavefront();
6597 vdst[lane] = (((
VecElemU8)src0[lane] & 0xff)
6598 << (8 *
bits(src1[lane], 1, 0)))
6599 | (src2[lane] & ~(0xff << (8 *
bits(src1[lane], 1, 0))));
6625 Wavefront *wf = gpuDynInst->wavefront();
6661 if (std::fpclassify(src1[lane]) == FP_ZERO) {
6662 if (std::signbit(src1[lane])) {
6663 vdst[lane] = -INFINITY;
6665 vdst[lane] = +INFINITY;
6670 if (std::signbit(src1[lane])) {
6671 vdst[lane] = -INFINITY;
6673 vdst[lane] = +INFINITY;
6676 vdst[lane] = src2[lane] / src1[lane];
6703 Wavefront *wf = gpuDynInst->wavefront();
6739 int sign_out = std::signbit(src1[lane])
6740 ^ std::signbit(src2[lane]);
6743 std::frexp(src1[lane], &exp1);
6744 std::frexp(src2[lane], &exp2);
6747 vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
6748 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6749 && std::fpclassify(src2[lane]) == FP_ZERO) {
6751 = std::numeric_limits<VecElemF64>::signaling_NaN();
6754 = std::numeric_limits<VecElemF64>::signaling_NaN();
6755 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6757 vdst[lane] = sign_out ? -INFINITY : +INFINITY;
6759 || std::fpclassify(src2[lane]) == FP_ZERO) {
6760 vdst[lane] = sign_out ? -0.0 : +0.0;
6761 }
else if (exp2 - exp1 < -1075) {
6762 vdst[lane] = src0[lane];
6763 }
else if (exp1 == 2047) {
6764 vdst[lane] = src0[lane];
6766 vdst[lane] = sign_out ? -std::fabs(src0[lane])
6767 : std::fabs(src0[lane]);
6798 Wavefront *wf = gpuDynInst->wavefront();
6823 vdst[lane] = src0[lane];
6855 Wavefront *wf = gpuDynInst->wavefront();
6882 std::frexp(src1[lane], &exp1);
6883 std::frexp(src2[lane], &exp2);
6886 if (std::fpclassify(src1[lane]) == FP_ZERO
6887 || std::fpclassify(src2[lane]) == FP_ZERO) {
6889 }
else if (exp2 - exp1 >= 768) {
6891 if (src0[lane] == src1[lane]) {
6892 vdst[lane] = std::ldexp(src0[lane], 128);
6894 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
6895 vdst[lane] = std::ldexp(src0[lane], 128);
6896 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
6897 && std::fpclassify(src2[lane] / src1[lane])
6900 if (src0[lane] == src1[lane]) {
6901 vdst[lane] = std::ldexp(src0[lane], 128);
6903 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
6904 vdst[lane] = std::ldexp(src0[lane], -128);
6905 }
else if (std::fpclassify(src2[lane] / src1[lane])
6908 if (src0[lane] == src2[lane]) {
6909 vdst[lane] = std::ldexp(src0[lane], 128);
6911 }
else if (exp2 <= 53) {
6912 vdst[lane] = std::ldexp(src0[lane], 128);
6941 Wavefront *wf = gpuDynInst->wavefront();
6977 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7004 Wavefront *wf = gpuDynInst->wavefront();
7043 vdst[lane] = std::pow(2, 64)
7044 * std::fma(src0[lane], src1[lane], src2[lane]);
7046 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7075 :
Inst_VOP3A(iFmt,
"v_qsad_pk_u16_u8", false)
7096 :
Inst_VOP3A(iFmt,
"v_mqsad_pk_u16_u8", false)
7153 Wavefront *wf = gpuDynInst->wavefront();
7174 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7202 Wavefront *wf = gpuDynInst->wavefront();
7222 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7247 Wavefront *wf = gpuDynInst->wavefront();
7269 vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane];
7292 Wavefront *wf = gpuDynInst->wavefront();
7314 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7338 Wavefront *wf = gpuDynInst->wavefront();
7361 (src0[lane] + src1[lane]) <<
bits(src2[lane], 4, 0);
7384 Wavefront *wf = gpuDynInst->wavefront();
7406 vdst[lane] = src0[lane] + src1[lane] + src2[lane];
7429 Wavefront *wf = gpuDynInst->wavefront();
7451 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7476 Wavefront *wf = gpuDynInst->wavefront();
7498 vdst[lane] = (src0[lane] & src1[lane]) | src2[lane];
7545 Wavefront *wf = gpuDynInst->wavefront();
7567 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7592 Wavefront *wf = gpuDynInst->wavefront();
7614 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7650 Wavefront *wf = gpuDynInst->wavefront();
7666 DPRINTF(VEGA,
"Executing v_perm_b32 src_0 0x%08x, src_1 "
7667 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
7668 src1[lane], src2[lane], vdst[lane]);
7671 for (
int i = 0;
i < 4 ; ++
i) {
7674 vdst[lane] |= (permuted_val << (8 *
i));
7677 DPRINTF(VEGA,
"v_perm result: 0x%08x\n", vdst[lane]);
7770 Wavefront *wf = gpuDynInst->wavefront();
7792 int shift_amount =
bits(src1[lane], 2, 0);
7793 shift_amount = shift_amount > 4 ? 0 : shift_amount;
7794 vdst[lane] = (src0[lane] << shift_amount)
7805 :
Inst_VOP3A(iFmt,
"v_cvt_pkaccum_u8_f32", false)
7881 :
Inst_VOP3A(iFmt,
"v_interp_mov_f32", false)
7902 :
Inst_VOP3A(iFmt,
"v_interp_p1ll_f16", false)
7931 :
Inst_VOP3A(iFmt,
"v_interp_p1lv_f16", false)
8002 Wavefront *wf = gpuDynInst->wavefront();
8039 if (std::signbit(src0[lane]) !=
8040 std::signbit(src1[lane])) {
8043 vdst[lane] = src0[lane];
8046 vdst[lane] = src0[lane];
8048 vdst[lane] = src1[lane];
8049 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8050 std::fpclassify(src0[lane]) == FP_ZERO) {
8051 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8052 std::fpclassify(src1[lane]) == FP_ZERO) {
8053 if (std::signbit(src0[lane]) &&
8054 std::signbit(src1[lane])) {
8060 vdst[lane] = src1[lane];
8062 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8063 std::fpclassify(src1[lane]) == FP_ZERO) {
8064 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8065 std::fpclassify(src0[lane]) == FP_ZERO) {
8066 if (std::signbit(src0[lane]) &&
8067 std::signbit(src1[lane])) {
8073 vdst[lane] = src0[lane];
8076 vdst[lane] = src0[lane] + src1[lane];
8101 Wavefront *wf = gpuDynInst->wavefront();
8136 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8137 std::fpclassify(src0[lane]) == FP_ZERO) &&
8138 !std::signbit(src0[lane])) {
8141 }
else if (!std::signbit(src1[lane])) {
8146 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8147 std::fpclassify(src0[lane]) == FP_ZERO) &&
8148 std::signbit(src0[lane])) {
8151 }
else if (std::signbit(src1[lane])) {
8157 !std::signbit(src0[lane])) {
8158 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8159 std::fpclassify(src1[lane]) == FP_ZERO) {
8161 }
else if (!std::signbit(src1[lane])) {
8162 vdst[lane] = +INFINITY;
8164 vdst[lane] = -INFINITY;
8167 std::signbit(src0[lane])) {
8168 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8169 std::fpclassify(src1[lane]) == FP_ZERO) {
8171 }
else if (std::signbit(src1[lane])) {
8172 vdst[lane] = +INFINITY;
8174 vdst[lane] = -INFINITY;
8177 vdst[lane] = src0[lane] * src1[lane];
8202 Wavefront *wf = gpuDynInst->wavefront();
8234 vdst[lane] = std::fmin(src0[lane], src1[lane]);
8258 Wavefront *wf = gpuDynInst->wavefront();
8290 vdst[lane] = std::fmax(src0[lane], src1[lane]);
8314 Wavefront *wf = gpuDynInst->wavefront();
8341 vdst[lane] = src0[lane];
8342 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
8343 || std::fpclassify(src0[lane]) == FP_ZERO) {
8344 if (std::signbit(src0[lane])) {
8350 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
8374 Wavefront *wf = gpuDynInst->wavefront();
8396 vdst[lane] = (
VecElemU32)((s0 * s1) & 0xffffffffLL);
8419 Wavefront *wf = gpuDynInst->wavefront();
8442 = (
VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
8465 Wavefront *wf = gpuDynInst->wavefront();
8488 = (
VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
8512 Wavefront *wf = gpuDynInst->wavefront();
8530 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
8563 sdst = src0[src1.
rawData() & 0x3f];
8618 Wavefront *wf = gpuDynInst->wavefront();
8638 vdst[lane] =
popCount(src0[lane]) + src1[lane];
8648 :
Inst_VOP3A(iFmt,
"v_mbcnt_lo_u32_b32", false)
8665 Wavefront *wf = gpuDynInst->wavefront();
8669 uint64_t threadMask = 0;
8686 threadMask = ((1ULL << lane) - 1ULL);
8687 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 31, 0)) +
8698 :
Inst_VOP3A(iFmt,
"v_mbcnt_hi_u32_b32", false)
8715 Wavefront *wf = gpuDynInst->wavefront();
8719 uint64_t threadMask = 0;
8736 threadMask = ((1ULL << lane) - 1ULL);
8737 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 63, 32)) +
8762 Wavefront *wf = gpuDynInst->wavefront();
8782 vdst[lane] = src1[lane] <<
bits(src0[lane], 5, 0);
8807 Wavefront *wf = gpuDynInst->wavefront();
8827 vdst[lane] = src1[lane] >>
bits(src0[lane], 5, 0);
8852 Wavefront *wf = gpuDynInst->wavefront();
8873 = src1[lane] >>
bits(src0[lane], 5, 0);
8882 :
Inst_VOP3A(iFmt,
"v_trig_preop_f64", false)
8922 Wavefront *wf = gpuDynInst->wavefront();
8942 vdst[lane] = ((1 <<
bits(src0[lane], 4, 0)) - 1)
8943 <<
bits(src1[lane], 4, 0);
8953 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_i16_f32", false)
8974 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_u16_f32", false)
8995 :
Inst_VOP3A(iFmt,
"v_cvt_pkrtz_f16_f32", false)
9018 :
Inst_VOP3A(iFmt,
"v_cvt_pk_u16_u32", false)
9037 :
Inst_VOP3A(iFmt,
"v_cvt_pk_i16_i32", false)
9056 :
Inst_VOP3A(iFmt,
"v_cvt_pk_fp8_f32", false)
9068 Wavefront *wf = gpuDynInst->wavefront();
9090 if ((abs & 1) && (tmp0 < 0)) tmp0 = -tmp0;
9091 if ((abs & 2) && (tmp1 < 0)) tmp1 = -tmp1;
9092 if (neg & 1) tmp0 = -tmp0;
9093 if (neg & 2) tmp1 = -tmp1;
9095 uint16_t packed_data = (
bits(tmp0.data, 31, 24) << 8)
const std::string _opcode
T omodModifier(T val, unsigned omod)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_ADDC_CO_U32()
Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F16(InFmt_VOP3A *)
Inst_VOP3__V_ADD_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F64(InFmt_VOP3A *)
~Inst_VOP3__V_ADD_LSHL_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *)
Inst_VOP3__V_ADD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ALIGNBIT_B32()
Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_ALIGNBYTE_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_AND_B32(InFmt_VOP3A *)
Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_AND_OR_B32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I16()
Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I64()
Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BCNT_U32_B32()
Inst_VOP3__V_BFE_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFE_U32(InFmt_VOP3A *)
Inst_VOP3__V_BFI_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BFREV_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CLREXCP(InFmt_VOP3A *)
Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *)
~Inst_VOP3__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F32(InFmt_VOP3A *)
Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBEID_F32()
~Inst_VOP3__V_CUBEMA_F32()
Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBESC_F32()
Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CUBETC_F32()
~Inst_VOP3__V_CVT_F16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F16_I16()
Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F16_U16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_F16()
~Inst_VOP3__V_CVT_F32_F64()
Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_U32()
Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE0()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE1()
Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE2()
Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE3()
Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F64_F32()
Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F64_I32()
~Inst_VOP3__V_CVT_F64_U32()
Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_FLR_I32_F32()
Inst_VOP3__V_CVT_FLR_I32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I16_F16()
Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_I32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_OFF_F32_I4()
Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *)
Inst_VOP3__V_CVT_PKACCUM_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_I16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKNORM_I16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_U16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PKNORM_U16_F32()
~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
Inst_VOP3__V_CVT_PKRTZ_F16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_FP8_F32()
Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_I16_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U16_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U8_F32()
Inst_VOP3__V_CVT_RPI_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_RPI_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_U16_F16()
Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_U32_F32()
~Inst_VOP3__V_CVT_U32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FIXUP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *)
Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FIXUP_F32()
~Inst_VOP3__V_DIV_FIXUP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FMAS_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FMAS_F64()
Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_SCALE_F32()
Inst_VOP3__V_DIV_SCALE_F32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_SCALE_F64()
Inst_VOP3__V_DIV_SCALE_F64(InFmt_VOP3B *)
Inst_VOP3__V_EXP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_EXP_LEGACY_F32()
Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FLOOR_F16()
Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *)
Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F64()
Inst_VOP3__V_FMAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F32(InFmt_VOP3A *)
Inst_VOP3__V_FMA_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F16()
Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F32()
Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FRACT_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I16_F16()
Inst_VOP3__V_FREXP_EXP_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FREXP_EXP_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_EXP_I32_F64()
Inst_VOP3__V_FREXP_EXP_I32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F16()
Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_MANT_F32()
Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F64()
Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_MOV_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LL_F16(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_P1LL_F16()
~Inst_VOP3__V_INTERP_P1LV_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LV_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P1_F32()
Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *)
Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F32()
Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LERP_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LOG_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B32()
Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B64()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHL_ADD_U32()
Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_ADD_U64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A *)
Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_OR_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B16()
~Inst_VOP3__V_LSHRREV_B32()
Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B64()
Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAC_F16(InFmt_VOP3A *)
Inst_VOP3__V_MAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAD_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_I32_I24()
~Inst_VOP3__V_MAD_I64_I32()
Inst_VOP3__V_MAD_I64_I32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_LEGACY_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_U16(InFmt_VOP3A *)
Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_U32_U24()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_U64_U32()
Inst_VOP3__V_MAD_U64_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MBCNT_HI_U32_B32()
Inst_VOP3__V_MBCNT_HI_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MBCNT_LO_U32_B32()
Inst_VOP3__V_MBCNT_LO_U32_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MED3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F32(InFmt_VOP3A *)
Inst_VOP3__V_MIN_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_U16(InFmt_VOP3A *)
Inst_VOP3__V_MIN_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MOV_FED_B32()
~Inst_VOP3__V_MQSAD_PK_U16_U8()
Inst_VOP3__V_MQSAD_PK_U16_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MQSAD_U32_U8()
Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_F32(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32_I24()
Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_HI_U32_U24()
Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_U32()
Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_I32_I24()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LEGACY_F32()
Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_LO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_U32_U24()
Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOP(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOT_B32(InFmt_VOP3A *)
Inst_VOP3__V_OR3_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_OR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
uint8_t permute(uint64_t in_dword2x, uint32_t sel)
Inst_VOP3__V_PERM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_QSAD_PK_U16_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *)
Inst_VOP3__V_RCP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RCP_IFLAG_F32()
Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_READLANE_B32()
Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F16()
Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *)
~Inst_VOP3__V_RNDNE_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F64()
Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *)
Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *)
Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *)
~Inst_VOP3__V_SAD_HI_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SIN_F16(InFmt_VOP3A *)
Inst_VOP3__V_SIN_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *)
Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBBREV_CO_U32()
Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_SUBB_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_CO_U32()
Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B *)
Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_U32()
Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUB_CO_U32()
Inst_VOP3__V_SUB_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUB_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_TRIG_PREOP_F64()
Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *)
Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F16()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F64()
Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_WRITELANE_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_XAD_U32(InFmt_VOP3A *)
Inst_VOP3__V_XOR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr int popCount(uint64_t val)
Returns the number of set ones in the provided value.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
constexpr unsigned NumVecElemPerVecReg
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
ScalarRegI32 findFirstOne(T val)
T median(T val_0, T val_1, T val_2)
ScalarRegI32 findFirstOneMsb(T val)
T roundNearestEven(T val)
VecElemU32 muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1, VecElemU64 val_2)
Bitfield< 31, 16 > selector
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)