73 ? src1[lane] : src0[lane];
129 vdst[lane] = src0[lane] + src1[lane];
186 vdst[lane] = src0[lane] - src1[lane];
243 vdst[lane] = src1[lane] - src0[lane];
302 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
303 std::fpclassify(src0[lane]) == FP_ZERO) &&
304 !std::signbit(src0[lane])) {
307 }
else if (!std::signbit(src1[lane])) {
312 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
313 std::fpclassify(src0[lane]) == FP_ZERO) &&
314 std::signbit(src0[lane])) {
317 }
else if (std::signbit(src1[lane])) {
323 !std::signbit(src0[lane])) {
324 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
325 std::fpclassify(src1[lane]) == FP_ZERO) {
327 }
else if (!std::signbit(src1[lane])) {
328 vdst[lane] = +INFINITY;
330 vdst[lane] = -INFINITY;
333 std::signbit(src0[lane])) {
334 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
335 std::fpclassify(src1[lane]) == FP_ZERO) {
337 }
else if (std::signbit(src1[lane])) {
338 vdst[lane] = +INFINITY;
340 vdst[lane] = -INFINITY;
343 vdst[lane] = src0[lane] * src1[lane];
403 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
404 std::fpclassify(src0[lane]) == FP_ZERO) &&
405 !std::signbit(src0[lane])) {
408 }
else if (!std::signbit(src1[lane])) {
413 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
414 std::fpclassify(src0[lane]) == FP_ZERO) &&
415 std::signbit(src0[lane])) {
418 }
else if (std::signbit(src1[lane])) {
424 !std::signbit(src0[lane])) {
425 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
426 std::fpclassify(src1[lane]) == FP_ZERO) {
428 }
else if (!std::signbit(src1[lane])) {
429 vdst[lane] = +INFINITY;
431 vdst[lane] = -INFINITY;
434 std::signbit(src0[lane])) {
435 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
436 std::fpclassify(src1[lane]) == FP_ZERO) {
438 }
else if (std::signbit(src1[lane])) {
439 vdst[lane] = +INFINITY;
441 vdst[lane] = -INFINITY;
444 vdst[lane] = src0[lane] * src1[lane];
537 vdst[lane] = (
VecElemI32)((tmp_src0 * tmp_src1) >> 32);
580 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0);
625 vdst[lane] = (
VecElemU32)((tmp_src0 * tmp_src1) >> 32);
681 vdst[lane] = std::fmin(src0[lane], src1[lane]);
737 vdst[lane] = std::fmax(src0[lane], src1[lane]);
780 vdst[lane] = std::min(src0[lane], src1[lane]);
823 vdst[lane] = std::max(src0[lane], src1[lane]);
866 vdst[lane] = std::min(src0[lane], src1[lane]);
909 vdst[lane] = std::max(src0[lane], src1[lane]);
954 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
999 vdst[lane] = src1[lane] >>
bits(src0[lane], 4, 0);
1023 Wavefront *wf = gpuDynInst->wavefront();
1043 vdst[lane] = src1[lane] <<
bits(src0[lane], 4, 0);
1067 Wavefront *wf = gpuDynInst->wavefront();
1087 vdst[lane] = src0[lane] & src1[lane];
1111 Wavefront *wf = gpuDynInst->wavefront();
1131 vdst[lane] = src0[lane] | src1[lane];
1155 Wavefront *wf = gpuDynInst->wavefront();
1177 vdst[lane] = src0[lane] | src1[lane] | src2[lane];
1201 Wavefront *wf = gpuDynInst->wavefront();
1221 vdst[lane] = src0[lane] ^ src1[lane];
1230 :
Inst_VOP3A(iFmt,
"v_dot2c_f32_bf16", false)
1242 Wavefront *wf = gpuDynInst->wavefront();
1254 a1.data = uint16_t(
bits(src0[lane], 15, 0));
1255 a2.
data = uint16_t(
bits(src0[lane], 31, 16));
1256 b1.data = uint16_t(
bits(src1[lane], 15, 0));
1257 b2.data = uint16_t(
bits(src1[lane], 31, 16));
1265 vdst[lane] += float(
a1) * float(
b1);
1266 vdst[lane] += float(a2) * float(
b2);
1292 Wavefront *wf = gpuDynInst->wavefront();
1325 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
1352 Wavefront *wf = gpuDynInst->wavefront();
1370 vdst[lane] = src0[lane] + src1[lane];
1372 + (
VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
1400 Wavefront *wf = gpuDynInst->wavefront();
1418 vdst[lane] = src0[lane] - src1[lane];
1419 vcc.
setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
1449 Wavefront *wf = gpuDynInst->wavefront();
1467 vdst[lane] = src1[lane] - src0[lane];
1468 vcc.
setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
1498 Wavefront *wf = gpuDynInst->wavefront();
1518 vdst[lane] = src0[lane] + src1[lane]
1523 >= 0x100000000 ? 1 : 0);
1553 Wavefront *wf = gpuDynInst->wavefront();
1573 vdst[lane] = src0[lane] - src1[lane]
1576 > src0[lane] ? 1 : 0);
1607 Wavefront *wf = gpuDynInst->wavefront();
1627 vdst[lane] = src1[lane] - src0[lane]
1630 > src0[lane] ? 1 : 0);
1764 Wavefront *wf = gpuDynInst->wavefront();
1784 vdst[lane] = src0[lane] + src1[lane];
1808 Wavefront *wf = gpuDynInst->wavefront();
1828 vdst[lane] = src0[lane] - src1[lane];
1853 Wavefront *wf = gpuDynInst->wavefront();
1873 vdst[lane] = src1[lane] - src0[lane];
1897 Wavefront *wf = gpuDynInst->wavefront();
1917 vdst[lane] = src0[lane] * src1[lane];
1941 Wavefront *wf = gpuDynInst->wavefront();
1961 vdst[lane] = src1[lane] <<
bits(src0[lane], 3, 0);
1986 Wavefront *wf = gpuDynInst->wavefront();
2012 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
2037 Wavefront *wf = gpuDynInst->wavefront();
2057 vdst[lane] = src1[lane] >>
bits(src0[lane], 3, 0);
2124 Wavefront *wf = gpuDynInst->wavefront();
2150 vdst[lane] = std::max(src0[lane], src1[lane]);
2173 Wavefront *wf = gpuDynInst->wavefront();
2199 vdst[lane] = std::max(src0[lane], src1[lane]);
2222 Wavefront *wf = gpuDynInst->wavefront();
2248 vdst[lane] = std::min(src0[lane], src1[lane]);
2271 Wavefront *wf = gpuDynInst->wavefront();
2297 vdst[lane] = std::min(src0[lane], src1[lane]);
2340 Wavefront *wf = gpuDynInst->wavefront();
2360 vdst[lane] = src0[lane] + src1[lane];
2383 Wavefront *wf = gpuDynInst->wavefront();
2403 vdst[lane] = src0[lane] - src1[lane];
2426 Wavefront *wf = gpuDynInst->wavefront();
2446 vdst[lane] = src1[lane] - src0[lane];
2471 Wavefront *wf = gpuDynInst->wavefront();
2510 float out = std::fma(src0[lane], src1[lane], vdst[lane]);
2513 out = std::clamp(vdst[lane], 0.0f, 1.0f);
2558 Wavefront *wf = gpuDynInst->wavefront();
2566 vdst[lane] = src[lane];
2592 Wavefront *wf = gpuDynInst->wavefront();
2609 std::frexp(src[lane],&exp);
2612 }
else if (
std::isinf(src[lane]) || exp > 30) {
2613 if (std::signbit(src[lane])) {
2614 vdst[lane] = INT_MIN;
2616 vdst[lane] = INT_MAX;
2644 Wavefront *wf = gpuDynInst->wavefront();
2684 Wavefront *wf = gpuDynInst->wavefront();
2726 Wavefront *wf = gpuDynInst->wavefront();
2768 Wavefront *wf = gpuDynInst->wavefront();
2785 std::frexp(src[lane],&exp);
2789 if (std::signbit(src[lane])) {
2792 vdst[lane] = UINT_MAX;
2794 }
else if (exp > 31) {
2795 vdst[lane] = UINT_MAX;
2824 Wavefront *wf = gpuDynInst->wavefront();
2849 std::frexp(src[lane],&exp);
2852 }
else if (
std::isinf(src[lane]) || exp > 30) {
2853 if (std::signbit(src[lane])) {
2854 vdst[lane] = INT_MIN;
2856 vdst[lane] = INT_MAX;
2907 Wavefront *wf = gpuDynInst->wavefront();
2923 float tmp = src0[lane];
2925 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2926 if (neg & 1) tmp = -tmp;
2930 tmp = std::clamp(tmp, 0.0f, 1.0f);
2965 Wavefront *wf = gpuDynInst->wavefront();
2982 if ((abs & 1) && (tmp < 0)) tmp = -tmp;
2983 if (neg & 1) tmp = -tmp;
2987 out = std::clamp(out, 0.0f, 1.0f);
3000 :
Inst_VOP3A(iFmt,
"v_cvt_rpi_i32_f32", false)
3015 Wavefront *wf = gpuDynInst->wavefront();
3031 vdst[lane] = (
VecElemI32)std::floor(src[lane] + 0.5);
3041 :
Inst_VOP3A(iFmt,
"v_cvt_flr_i32_f32", false)
3056 Wavefront *wf = gpuDynInst->wavefront();
3072 vdst[lane] = (
VecElemI32)std::floor(src[lane]);
3081 :
Inst_VOP3A(iFmt,
"v_cvt_off_f32_i4", false)
3117 Wavefront *wf = gpuDynInst->wavefront();
3165 Wavefront *wf = gpuDynInst->wavefront();
3198 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte0", false)
3213 Wavefront *wf = gpuDynInst->wavefront();
3238 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte1", false)
3253 Wavefront *wf = gpuDynInst->wavefront();
3278 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte2", false)
3293 Wavefront *wf = gpuDynInst->wavefront();
3318 :
Inst_VOP3A(iFmt,
"v_cvt_f32_ubyte3", false)
3333 Wavefront *wf = gpuDynInst->wavefront();
3375 Wavefront *wf = gpuDynInst->wavefront();
3392 std::frexp(src[lane],&exp);
3396 if (std::signbit(src[lane])) {
3399 vdst[lane] = UINT_MAX;
3401 }
else if (exp > 31) {
3402 vdst[lane] = UINT_MAX;
3429 Wavefront *wf = gpuDynInst->wavefront();
3469 Wavefront *wf = gpuDynInst->wavefront();
3485 vdst[lane] = std::trunc(src[lane]);
3510 Wavefront *wf = gpuDynInst->wavefront();
3526 vdst[lane] = std::ceil(src[lane]);
3550 Wavefront *wf = gpuDynInst->wavefront();
3591 Wavefront *wf = gpuDynInst->wavefront();
3607 vdst[lane] = std::floor(src[lane]);
3631 Wavefront *wf = gpuDynInst->wavefront();
3648 vdst[lane] = std::modf(src[lane], &int_part);
3672 Wavefront *wf = gpuDynInst->wavefront();
3688 vdst[lane] = std::trunc(src[lane]);
3713 Wavefront *wf = gpuDynInst->wavefront();
3729 vdst[lane] = std::ceil(src[lane]);
3753 Wavefront *wf = gpuDynInst->wavefront();
3794 Wavefront *wf = gpuDynInst->wavefront();
3810 vdst[lane] = std::floor(src[lane]);
3834 Wavefront *wf = gpuDynInst->wavefront();
3850 vdst[lane] = std::pow(2.0, src[lane]);
3874 Wavefront *wf = gpuDynInst->wavefront();
3898 vdst[lane] = std::log2(src[lane]);
3922 Wavefront *wf = gpuDynInst->wavefront();
3938 vdst[lane] = 1.0 / src[lane];
3964 Wavefront *wf = gpuDynInst->wavefront();
3980 vdst[lane] = 1.0 / src[lane];
4004 Wavefront *wf = gpuDynInst->wavefront();
4020 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4044 Wavefront *wf = gpuDynInst->wavefront();
4060 if (std::fpclassify(src[lane]) == FP_ZERO) {
4061 vdst[lane] = +INFINITY;
4065 if (std::signbit(src[lane])) {
4071 vdst[lane] = 1.0 / src[lane];
4096 Wavefront *wf = gpuDynInst->wavefront();
4112 if (std::fpclassify(src[lane]) == FP_ZERO) {
4113 vdst[lane] = +INFINITY;
4116 }
else if (
std::isinf(src[lane]) && !std::signbit(src[lane])) {
4118 }
else if (std::signbit(src[lane])) {
4121 vdst[lane] = 1.0 / std::sqrt(src[lane]);
4146 Wavefront *wf = gpuDynInst->wavefront();
4162 vdst[lane] = std::sqrt(src[lane]);
4186 Wavefront *wf = gpuDynInst->wavefront();
4202 vdst[lane] = std::sqrt(src[lane]);
4228 Wavefront *wf = gpuDynInst->wavefront();
4246 vdst[lane] = std::sin(src[lane] * 2 * pi.
rawData());
4272 Wavefront *wf = gpuDynInst->wavefront();
4290 vdst[lane] = std::cos(src[lane] * 2 * pi.
rawData());
4314 Wavefront *wf = gpuDynInst->wavefront();
4330 vdst[lane] = ~src[lane];
4354 Wavefront *wf = gpuDynInst->wavefront();
4386 Wavefront *wf = gpuDynInst->wavefront();
4426 Wavefront *wf = gpuDynInst->wavefront();
4466 Wavefront *wf = gpuDynInst->wavefront();
4492 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f64", false)
4507 Wavefront *wf = gpuDynInst->wavefront();
4527 std::frexp(src[lane], &exp);
4538 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f64", false)
4553 Wavefront *wf = gpuDynInst->wavefront();
4570 vdst[lane] = std::frexp(src[lane], &exp);
4594 Wavefront *wf = gpuDynInst->wavefront();
4611 vdst[lane] = std::modf(src[lane], &int_part);
4621 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i32_f32", false)
4640 Wavefront *wf = gpuDynInst->wavefront();
4660 std::frexp(src[lane], &exp);
4671 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f32", false)
4691 Wavefront *wf = gpuDynInst->wavefront();
4708 vdst[lane] = src[lane];
4711 vdst[lane] = std::frexp(src[lane], &exp);
4938 :
Inst_VOP3A(iFmt,
"v_frexp_mant_f16", false)
4966 :
Inst_VOP3A(iFmt,
"v_frexp_exp_i16_f16", false)
5137 :
Inst_VOP3A(iFmt,
"v_exp_legacy_f32", false)
5152 Wavefront *wf = gpuDynInst->wavefront();
5176 vdst[lane] = std::pow(2.0, src[lane]);
5185 :
Inst_VOP3A(iFmt,
"v_log_legacy_f32", false)
5200 Wavefront *wf = gpuDynInst->wavefront();
5208 vdst[lane] = std::log2(src[lane]);
5238 Wavefront *wf = gpuDynInst->wavefront();
5253 return ((in << 1) ^ (((in >> 31) & 1) ? 0xc5 : 0x00));
5258 vdst[lane] = randFunc(src[lane]);
5280 Wavefront *wf = gpuDynInst->wavefront();
5294 uint16_t s0 = (opsel & 1) ?
bits(src[lane], 31, 16)
5295 :
bits(src[lane], 15, 0);
5300 float f32 = float(tmp);
5302 if (abs & 1)
f32 = std::fabs(
f32);
5305 if (clamp)
f32 = std::clamp(
f32, 0.0f, 1.0f);
5316 :
Inst_VOP3A(iFmt,
"v_mad_legacy_f32", false)
5332 Wavefront *wf = gpuDynInst->wavefront();
5368 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5393 Wavefront *wf = gpuDynInst->wavefront();
5429 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5453 Wavefront *wf = gpuDynInst->wavefront();
5500 Wavefront *wf = gpuDynInst->wavefront();
5522 vdst[lane] =
bits(src0[lane], 23, 0) *
bits(src1[lane], 23, 0)
5631 Wavefront *wf = gpuDynInst->wavefront();
5653 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5654 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5678 Wavefront *wf = gpuDynInst->wavefront();
5700 vdst[lane] = (src0[lane] >>
bits(src1[lane], 4, 0))
5701 & ((1 <<
bits(src2[lane], 4, 0)) - 1);
5706 if (vdst[lane] >> (
bits(src2[lane], 4, 0) - 1)) {
5707 vdst[lane] |= 0xffffffff <<
bits(src2[lane], 4, 0);
5731 Wavefront *wf = gpuDynInst->wavefront();
5753 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
5779 Wavefront *wf = gpuDynInst->wavefront();
5815 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5840 Wavefront *wf = gpuDynInst->wavefront();
5876 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
5905 Wavefront *wf = gpuDynInst->wavefront();
5927 vdst[lane] = ((
bits(src0[lane], 31, 24)
5928 +
bits(src1[lane], 31, 24) +
bits(src2[lane], 24)) >> 1)
5930 vdst[lane] += ((
bits(src0[lane], 23, 16)
5931 +
bits(src1[lane], 23, 16) +
bits(src2[lane], 16)) >> 1)
5933 vdst[lane] += ((
bits(src0[lane], 15, 8)
5934 +
bits(src1[lane], 15, 8) +
bits(src2[lane], 8)) >> 1)
5936 vdst[lane] += ((
bits(src0[lane], 7, 0) +
bits(src1[lane], 7, 0)
5937 +
bits(src2[lane], 0)) >> 1);
5960 Wavefront *wf = gpuDynInst->wavefront();
6008 Wavefront *wf = gpuDynInst->wavefront();
6058 Wavefront *wf = gpuDynInst->wavefront();
6094 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
6095 vdst[lane] = std::fmin(min_0_1, src2[lane]);
6118 Wavefront *wf = gpuDynInst->wavefront();
6140 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
6141 vdst[lane] = std::min(min_0_1, src2[lane]);
6164 Wavefront *wf = gpuDynInst->wavefront();
6186 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
6187 vdst[lane] = std::min(min_0_1, src2[lane]);
6211 Wavefront *wf = gpuDynInst->wavefront();
6247 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
6248 vdst[lane] = std::fmax(max_0_1, src2[lane]);
6271 Wavefront *wf = gpuDynInst->wavefront();
6293 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
6294 vdst[lane] = std::max(max_0_1, src2[lane]);
6317 Wavefront *wf = gpuDynInst->wavefront();
6339 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
6340 vdst[lane] = std::max(max_0_1, src2[lane]);
6364 Wavefront *wf = gpuDynInst->wavefront();
6400 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6423 Wavefront *wf = gpuDynInst->wavefront();
6445 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6468 Wavefront *wf = gpuDynInst->wavefront();
6490 vdst[lane] =
median(src0[lane], src1[lane], src2[lane]);
6516 Wavefront *wf = gpuDynInst->wavefront();
6538 vdst[lane] = std::abs(
bits(src0[lane], 31, 24)
6539 -
bits(src1[lane], 31, 24))
6540 + std::abs(
bits(src0[lane], 23, 16)
6541 -
bits(src1[lane], 23, 16))
6542 + std::abs(
bits(src0[lane], 15, 8)
6543 -
bits(src1[lane], 15, 8))
6544 + std::abs(
bits(src0[lane], 7, 0)
6545 -
bits(src1[lane], 7, 0)) + src2[lane];
6569 Wavefront *wf = gpuDynInst->wavefront();
6591 vdst[lane] = (((
bits(src0[lane], 31, 24)
6592 -
bits(src1[lane], 31, 24)) + (
bits(src0[lane], 23, 16)
6593 -
bits(src1[lane], 23, 16)) + (
bits(src0[lane], 15, 8)
6594 -
bits(src1[lane], 15, 8)) + (
bits(src0[lane], 7, 0)
6595 -
bits(src1[lane], 7, 0))) << 16) + src2[lane];
6620 Wavefront *wf = gpuDynInst->wavefront();
6642 vdst[lane] = std::abs(
bits(src0[lane], 31, 16)
6643 -
bits(src1[lane], 31, 16))
6644 + std::abs(
bits(src0[lane], 15, 0)
6645 -
bits(src1[lane], 15, 0)) + src2[lane];
6669 Wavefront *wf = gpuDynInst->wavefront();
6691 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
6718 Wavefront *wf = gpuDynInst->wavefront();
6747 vdst[lane] = (((
VecElemU8)src0[lane] & 0xff)
6748 << (8 *
bits(src1[lane], 1, 0)))
6749 | (src2[lane] & ~(0xff << (8 *
bits(src1[lane], 1, 0))));
6775 Wavefront *wf = gpuDynInst->wavefront();
6811 if (std::fpclassify(src1[lane]) == FP_ZERO) {
6812 if (std::signbit(src1[lane])) {
6813 vdst[lane] = -INFINITY;
6815 vdst[lane] = +INFINITY;
6820 if (std::signbit(src1[lane])) {
6821 vdst[lane] = -INFINITY;
6823 vdst[lane] = +INFINITY;
6826 vdst[lane] = src2[lane] / src1[lane];
6853 Wavefront *wf = gpuDynInst->wavefront();
6889 int sign_out = std::signbit(src1[lane])
6890 ^ std::signbit(src2[lane]);
6893 std::frexp(src1[lane], &exp1);
6894 std::frexp(src2[lane], &exp2);
6897 vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
6898 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6899 && std::fpclassify(src2[lane]) == FP_ZERO) {
6901 = std::numeric_limits<VecElemF64>::signaling_NaN();
6904 = std::numeric_limits<VecElemF64>::signaling_NaN();
6905 }
else if (std::fpclassify(src1[lane]) == FP_ZERO
6907 vdst[lane] = sign_out ? -INFINITY : +INFINITY;
6909 || std::fpclassify(src2[lane]) == FP_ZERO) {
6910 vdst[lane] = sign_out ? -0.0 : +0.0;
6911 }
else if (exp2 - exp1 < -1075) {
6912 vdst[lane] = src0[lane];
6913 }
else if (exp1 == 2047) {
6914 vdst[lane] = src0[lane];
6916 vdst[lane] = sign_out ? -std::fabs(src0[lane])
6917 : std::fabs(src0[lane]);
6948 Wavefront *wf = gpuDynInst->wavefront();
6973 vdst[lane] = src0[lane];
7005 Wavefront *wf = gpuDynInst->wavefront();
7032 std::frexp(src1[lane], &exp1);
7033 std::frexp(src2[lane], &exp2);
7036 if (std::fpclassify(src1[lane]) == FP_ZERO
7037 || std::fpclassify(src2[lane]) == FP_ZERO) {
7039 }
else if (exp2 - exp1 >= 768) {
7041 if (src0[lane] == src1[lane]) {
7042 vdst[lane] = std::ldexp(src0[lane], 128);
7044 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
7045 vdst[lane] = std::ldexp(src0[lane], 128);
7046 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
7047 && std::fpclassify(src2[lane] / src1[lane])
7050 if (src0[lane] == src1[lane]) {
7051 vdst[lane] = std::ldexp(src0[lane], 128);
7053 }
else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
7054 vdst[lane] = std::ldexp(src0[lane], -128);
7055 }
else if (std::fpclassify(src2[lane] / src1[lane])
7058 if (src0[lane] == src2[lane]) {
7059 vdst[lane] = std::ldexp(src0[lane], 128);
7061 }
else if (exp2 <= 53) {
7062 vdst[lane] = std::ldexp(src0[lane], 128);
7091 Wavefront *wf = gpuDynInst->wavefront();
7127 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7154 Wavefront *wf = gpuDynInst->wavefront();
7193 vdst[lane] = std::pow(2, 64)
7194 * std::fma(src0[lane], src1[lane], src2[lane]);
7196 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
7225 :
Inst_VOP3A(iFmt,
"v_qsad_pk_u16_u8", false)
7246 :
Inst_VOP3A(iFmt,
"v_mqsad_pk_u16_u8", false)
7303 Wavefront *wf = gpuDynInst->wavefront();
7324 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7352 Wavefront *wf = gpuDynInst->wavefront();
7372 vcc.
setBit(lane,
muladd(vdst[lane], src0[lane], src1[lane],
7397 Wavefront *wf = gpuDynInst->wavefront();
7419 vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane];
7442 Wavefront *wf = gpuDynInst->wavefront();
7464 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7488 Wavefront *wf = gpuDynInst->wavefront();
7511 (src0[lane] + src1[lane]) <<
bits(src2[lane], 4, 0);
7534 Wavefront *wf = gpuDynInst->wavefront();
7556 vdst[lane] = src0[lane] + src1[lane] + src2[lane];
7579 Wavefront *wf = gpuDynInst->wavefront();
7601 vdst[lane] = (src0[lane] <<
bits(src1[lane], 4, 0))
7626 Wavefront *wf = gpuDynInst->wavefront();
7648 vdst[lane] = (src0[lane] & src1[lane]) | src2[lane];
7695 Wavefront *wf = gpuDynInst->wavefront();
7717 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7742 Wavefront *wf = gpuDynInst->wavefront();
7764 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
7800 Wavefront *wf = gpuDynInst->wavefront();
7816 DPRINTF(VEGA,
"Executing v_perm_b32 src_0 0x%08x, src_1 "
7817 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
7818 src1[lane], src2[lane], vdst[lane]);
7821 for (
int i = 0;
i < 4 ; ++
i) {
7824 vdst[lane] |= (permuted_val << (8 *
i));
7827 DPRINTF(VEGA,
"v_perm result: 0x%08x\n", vdst[lane]);
7920 Wavefront *wf = gpuDynInst->wavefront();
7942 int shift_amount =
bits(src1[lane], 2, 0);
7943 shift_amount = shift_amount > 4 ? 0 : shift_amount;
7944 vdst[lane] = (src0[lane] << shift_amount)
7955 :
Inst_VOP3A(iFmt,
"v_cvt_pkaccum_u8_f32", false)
7992 Wavefront *wf = gpuDynInst->wavefront();
8016 uint16_t s0 = (opsel & 1) ?
bits(src0[lane], 31, 16)
8017 :
bits(src0[lane], 15, 0);
8018 uint16_t s1 = (opsel & 2) ?
bits(src1[lane], 31, 16)
8019 :
bits(src1[lane], 15, 0);
8020 uint16_t s2 = (opsel & 4) ?
bits(src2[lane], 31, 16)
8021 :
bits(src2[lane], 15, 0);
8024 tmp |= (ttbl & 0x01) ? (~s0 & ~s1 & ~s2) : 0;
8025 tmp |= (ttbl & 0x02) ? (~s0 & ~s1 & s2) : 0;
8026 tmp |= (ttbl & 0x04) ? (~s0 & s1 & ~s2) : 0;
8027 tmp |= (ttbl & 0x08) ? (~s0 & s1 & s2) : 0;
8028 tmp |= (ttbl & 0x10) ? ( s0 & ~s1 & ~s2) : 0;
8029 tmp |= (ttbl & 0x20) ? ( s0 & ~s1 & s2) : 0;
8030 tmp |= (ttbl & 0x40) ? ( s0 & s1 & ~s2) : 0;
8031 tmp |= (ttbl & 0x80) ? ( s0 & s1 & s2) : 0;
8059 Wavefront *wf = gpuDynInst->wavefront();
8081 uint32_t s0 = src0[lane];
8082 uint32_t s1 = src1[lane];
8083 uint32_t s2 = src2[lane];
8086 tmp |= (ttbl & 0x01) ? (~s0 & ~s1 & ~s2) : 0;
8087 tmp |= (ttbl & 0x02) ? (~s0 & ~s1 & s2) : 0;
8088 tmp |= (ttbl & 0x04) ? (~s0 & s1 & ~s2) : 0;
8089 tmp |= (ttbl & 0x08) ? (~s0 & s1 & s2) : 0;
8090 tmp |= (ttbl & 0x10) ? ( s0 & ~s1 & ~s2) : 0;
8091 tmp |= (ttbl & 0x20) ? ( s0 & ~s1 & s2) : 0;
8092 tmp |= (ttbl & 0x40) ? ( s0 & s1 & ~s2) : 0;
8093 tmp |= (ttbl & 0x80) ? ( s0 & s1 & s2) : 0;
8104 :
Inst_VOP3A(iFmt,
"v_ashr_pk_i8_i32", false)
8116 Wavefront *wf = gpuDynInst->wavefront();
8127 auto sat8 = [](int32_t
n) -> uint8_t {
8128 if (
n <= -128)
return 0x80;
8129 else if (
n >= 127)
return 0x7f;
8130 else return n & 0xff;
8141 uint8_t lower = sat8(src0[lane] >>
bits(src2[lane], 4, 0));
8142 uint8_t upper = sat8(src0[lane] >>
bits(src2[lane], 4, 0));
8145 uint16_t result = uint16_t(upper) << 8 | uint16_t(lower);
8160 :
Inst_VOP3A(iFmt,
"v_ashr_pk_u8_i32", false)
8172 Wavefront *wf = gpuDynInst->wavefront();
8183 auto sat8 = [](int32_t
n) -> uint8_t {
8184 if (
n <= 0)
return 0;
8185 else if (
n >= 255)
return 0xff;
8186 else return n & 0xff;
8197 uint8_t lower = sat8(src0[lane] >>
bits(src2[lane], 4, 0));
8198 uint8_t upper = sat8(src0[lane] >>
bits(src2[lane], 4, 0));
8201 uint16_t result = uint16_t(upper) << 8 | uint16_t(lower);
8217 :
Inst_VOP3A(iFmt,
"v_cvt_pk_f16_f32", false)
8229 Wavefront *wf = gpuDynInst->wavefront();
8254 if (abs & 1) tmp0.fabs();
8255 if (abs & 2) tmp1.
fabs();
8256 if (neg & 1) tmp0.neg();
8257 if (neg & 2) tmp1.
neg();
8258 tmp0.omodModifier(omod);
8263 uint32_t lower_word = tmp0.data;
8264 uint32_t upper_word = tmp1.
data;
8266 vdst[lane] = (upper_word << 16) | lower_word;
8276 :
Inst_VOP3A(iFmt,
"v_cvt_pk_bf16_f32", false)
8288 Wavefront *wf = gpuDynInst->wavefront();
8313 if (abs & 1) tmp0.fabs();
8314 if (abs & 2) tmp1.
fabs();
8315 if (neg & 1) tmp0.neg();
8316 if (neg & 2) tmp1.
neg();
8317 tmp0.omodModifier(omod);
8322 uint32_t lower_word = tmp0.data;
8323 uint32_t upper_word = tmp1.
data;
8325 vdst[lane] = (upper_word << 16) | lower_word;
8386 :
Inst_VOP3A(iFmt,
"v_interp_mov_f32", false)
8407 :
Inst_VOP3A(iFmt,
"v_interp_p1ll_f16", false)
8436 :
Inst_VOP3A(iFmt,
"v_interp_p1lv_f16", false)
8507 Wavefront *wf = gpuDynInst->wavefront();
8544 if (std::signbit(src0[lane]) !=
8545 std::signbit(src1[lane])) {
8548 vdst[lane] = src0[lane];
8551 vdst[lane] = src0[lane];
8553 vdst[lane] = src1[lane];
8554 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8555 std::fpclassify(src0[lane]) == FP_ZERO) {
8556 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8557 std::fpclassify(src1[lane]) == FP_ZERO) {
8558 if (std::signbit(src0[lane]) &&
8559 std::signbit(src1[lane])) {
8565 vdst[lane] = src1[lane];
8567 }
else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8568 std::fpclassify(src1[lane]) == FP_ZERO) {
8569 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8570 std::fpclassify(src0[lane]) == FP_ZERO) {
8571 if (std::signbit(src0[lane]) &&
8572 std::signbit(src1[lane])) {
8578 vdst[lane] = src0[lane];
8581 vdst[lane] = src0[lane] + src1[lane];
8606 Wavefront *wf = gpuDynInst->wavefront();
8641 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8642 std::fpclassify(src0[lane]) == FP_ZERO) &&
8643 !std::signbit(src0[lane])) {
8646 }
else if (!std::signbit(src1[lane])) {
8651 }
else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
8652 std::fpclassify(src0[lane]) == FP_ZERO) &&
8653 std::signbit(src0[lane])) {
8656 }
else if (std::signbit(src1[lane])) {
8662 !std::signbit(src0[lane])) {
8663 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8664 std::fpclassify(src1[lane]) == FP_ZERO) {
8666 }
else if (!std::signbit(src1[lane])) {
8667 vdst[lane] = +INFINITY;
8669 vdst[lane] = -INFINITY;
8672 std::signbit(src0[lane])) {
8673 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
8674 std::fpclassify(src1[lane]) == FP_ZERO) {
8676 }
else if (std::signbit(src1[lane])) {
8677 vdst[lane] = +INFINITY;
8679 vdst[lane] = -INFINITY;
8682 vdst[lane] = src0[lane] * src1[lane];
8707 Wavefront *wf = gpuDynInst->wavefront();
8739 vdst[lane] = std::fmin(src0[lane], src1[lane]);
8763 Wavefront *wf = gpuDynInst->wavefront();
8795 vdst[lane] = std::fmax(src0[lane], src1[lane]);
8819 Wavefront *wf = gpuDynInst->wavefront();
8846 vdst[lane] = src0[lane];
8847 }
else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
8848 || std::fpclassify(src0[lane]) == FP_ZERO) {
8849 if (std::signbit(src0[lane])) {
8855 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
8879 Wavefront *wf = gpuDynInst->wavefront();
8901 vdst[lane] = (
VecElemU32)((s0 * s1) & 0xffffffffLL);
8924 Wavefront *wf = gpuDynInst->wavefront();
8947 = (
VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
8970 Wavefront *wf = gpuDynInst->wavefront();
8993 = (
VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
9017 Wavefront *wf = gpuDynInst->wavefront();
9035 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
9068 sdst = src0[src1.
rawData() & 0x3f];
9123 Wavefront *wf = gpuDynInst->wavefront();
9143 vdst[lane] =
popCount(src0[lane]) + src1[lane];
9153 :
Inst_VOP3A(iFmt,
"v_mbcnt_lo_u32_b32", false)
9170 Wavefront *wf = gpuDynInst->wavefront();
9174 uint64_t threadMask = 0;
9191 threadMask = ((1ULL << lane) - 1ULL);
9192 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 31, 0)) +
9203 :
Inst_VOP3A(iFmt,
"v_mbcnt_hi_u32_b32", false)
9220 Wavefront *wf = gpuDynInst->wavefront();
9224 uint64_t threadMask = 0;
9241 threadMask = ((1ULL << lane) - 1ULL);
9242 vdst[lane] =
popCount(src0[lane] &
bits(threadMask, 63, 32)) +
9267 Wavefront *wf = gpuDynInst->wavefront();
9287 vdst[lane] = src1[lane] <<
bits(src0[lane], 5, 0);
9312 Wavefront *wf = gpuDynInst->wavefront();
9332 vdst[lane] = src1[lane] >>
bits(src0[lane], 5, 0);
9357 Wavefront *wf = gpuDynInst->wavefront();
9378 = src1[lane] >>
bits(src0[lane], 5, 0);
9387 :
Inst_VOP3A(iFmt,
"v_trig_preop_f64", false)
9427 Wavefront *wf = gpuDynInst->wavefront();
9447 vdst[lane] = ((1 <<
bits(src0[lane], 4, 0)) - 1)
9448 <<
bits(src1[lane], 4, 0);
9458 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_i16_f32", false)
9479 :
Inst_VOP3A(iFmt,
"v_cvt_pknorm_u16_f32", false)
9500 :
Inst_VOP3A(iFmt,
"v_cvt_pkrtz_f16_f32", false)
9523 :
Inst_VOP3A(iFmt,
"v_cvt_pk_u16_u32", false)
9542 :
Inst_VOP3A(iFmt,
"v_cvt_pk_i16_i32", false)
9561 :
Inst_VOP3A(iFmt,
"v_cvt_pk_fp8_f32", false)
9573 Wavefront *wf = gpuDynInst->wavefront();
9595 if (abs & 1) tmp0.fabs();
9596 if (abs & 2) tmp1.
fabs();
9597 if (neg & 1) tmp0.neg();
9598 if (neg & 2) tmp1.
neg();
9600 uint16_t packed_data = (
bits(tmp1.
data, 31, 24) << 8)
9601 |
bits(tmp0.data, 31, 24);
9616 :
Inst_VOP3A(iFmt,
"v_cvt_pk_bf8_f32", false)
9628 Wavefront *wf = gpuDynInst->wavefront();
9650 if (abs & 1) tmp0.fabs();
9651 if (abs & 2) tmp1.
fabs();
9652 if (neg & 1) tmp0.neg();
9653 if (neg & 2) tmp1.
neg();
9655 uint16_t packed_data = (
bits(tmp1.
data, 31, 24) << 8)
9656 |
bits(tmp0.data, 31, 24);
9672 :
Inst_VOP3A(iFmt,
"v_cvt_sr_fp8_f32", false)
9684 Wavefront *wf = gpuDynInst->wavefront();
9702 opsel =
bits(opsel, 3, 2);
9709 if (abs & 1) in = std::fabs(src0[lane]);
9710 if (neg & 1) in = -in;
9712 using sInfo =
decltype(in.
getFmt());
9713 using dInfo =
decltype(cvt.
getFmt());
9721 }
else if (opsel == 1) {
9723 }
else if (opsel == 2) {
9737 :
Inst_VOP3A(iFmt,
"v_cvt_sr_fp8_f32", false)
9749 Wavefront *wf = gpuDynInst->wavefront();
9767 opsel =
bits(opsel, 3, 2);
9774 if (abs & 1) in = std::fabs(src0[lane]);
9775 if (neg & 1) in = -in;
9777 using sInfo =
decltype(in.
getFmt());
9778 using dInfo =
decltype(cvt.
getFmt());
9786 }
else if (opsel == 1) {
9788 }
else if (opsel == 2) {
9802 :
Inst_VOP3A(iFmt,
"v_cvt_sr_f16_f32", false)
9814 Wavefront *wf = gpuDynInst->wavefront();
9838 if (abs & 1) in = std::fabs(src0[lane]);
9839 if (neg & 1) in = -in;
9841 using sInfo =
decltype(in.
getFmt());
9842 using dInfo =
decltype(cvt.
getFmt());
9862 :
Inst_VOP3A(iFmt,
"v_cvt_sr_bf16_f32", false)
9876 Wavefront *wf = gpuDynInst->wavefront();
9900 if (abs & 1) in = std::fabs(src0[lane]);
9901 if (neg & 1) in = -in;
9903 using sInfo =
decltype(in.
getFmt());
9904 using dInfo =
decltype(cvt.
getFmt());
9924 :
Inst_VOP3A(iFmt,
"v_permlane16_swap_b32", false)
9954 for (
int pass = 0; pass < 2; ++pass) {
9955 for (
int lane = 0; lane < 16; ++lane) {
9956 int dlane = pass * 32 + lane + 16;
9957 int slane = pass * 32 + lane;
9960 src0[slane] = vdst[dlane];
9972 :
Inst_VOP3A(iFmt,
"v_permlane32_swap_b32", false)
9997 for (
int lane = 0; lane < 32; ++lane) {
9999 src0[lane] = vdst[lane + 32];
10000 vdst[lane + 32] = tmp;
void clamp(bool do_clamp)
void omodModifier(unsigned omod)
const std::string _opcode
Inst_VOP3A(InFmt_VOP3A *, const std::string &opcode, bool sgpr_dst)
T omodModifier(T val, unsigned omod)
Inst_VOP3B(InFmt_VOP3B *, const std::string &opcode)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_ADDC_CO_U32()
Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ADD_CO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F16(InFmt_VOP3A *)
Inst_VOP3__V_ADD_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_F64(InFmt_VOP3A *)
~Inst_VOP3__V_ADD_LSHL_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *)
Inst_VOP3__V_ADD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ADD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ALIGNBIT_B32()
Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_ALIGNBYTE_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_AND_B32(InFmt_VOP3A *)
Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_AND_OR_B32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I16()
Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_ASHRREV_I64()
Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ASHR_PK_I8_I32(InFmt_VOP3A *)
~Inst_VOP3__V_ASHR_PK_I8_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_ASHR_PK_U8_I32(InFmt_VOP3A *)
~Inst_VOP3__V_ASHR_PK_U8_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BCNT_U32_B32()
Inst_VOP3__V_BFE_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFE_U32(InFmt_VOP3A *)
Inst_VOP3__V_BFI_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BFREV_B32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_BITOP3_B16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BITOP3_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_BITOP3_B32(InFmt_VOP3A *)
~Inst_VOP3__V_BITOP3_B32()
Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CLREXCP(InFmt_VOP3A *)
Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *)
~Inst_VOP3__V_CNDMASK_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_COS_F32(InFmt_VOP3A *)
Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBEID_F32()
~Inst_VOP3__V_CUBEMA_F32()
Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CUBESC_F32()
Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CUBETC_F32()
~Inst_VOP3__V_CVT_F16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F16_I16()
Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F16_U16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_BF16()
Inst_VOP3__V_CVT_F32_BF16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_F16()
~Inst_VOP3__V_CVT_F32_F64()
Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_U32()
Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE0()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE1()
Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F32_UBYTE2()
Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F32_UBYTE3()
Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_F64_F32()
Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_F64_I32()
~Inst_VOP3__V_CVT_F64_U32()
Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_FLR_I32_F32()
Inst_VOP3__V_CVT_FLR_I32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I16_F16()
Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_I32_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_I32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_OFF_F32_I4()
Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *)
Inst_VOP3__V_CVT_PKACCUM_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_I16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PKNORM_I16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PKNORM_U16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PKNORM_U16_F32()
~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
Inst_VOP3__V_CVT_PKRTZ_F16_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_BF16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_BF16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_BF8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_BF8_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_F16_F32()
Inst_VOP3__V_CVT_PK_F16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_FP8_F32()
Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_PK_I16_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U16_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_PK_U8_F32()
Inst_VOP3__V_CVT_RPI_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_RPI_I32_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_SR_BF16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_SR_BF16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_SR_BF8_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_SR_BF8_F32(InFmt_VOP3A *)
Inst_VOP3__V_CVT_SR_F16_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_SR_F16_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_SR_FP8_F32(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_SR_FP8_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *)
~Inst_VOP3__V_CVT_U16_F16()
Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_CVT_U32_F32()
~Inst_VOP3__V_CVT_U32_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FIXUP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *)
Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FIXUP_F32()
~Inst_VOP3__V_DIV_FIXUP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_FMAS_F32()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_FMAS_F64()
Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *)
~Inst_VOP3__V_DIV_SCALE_F32()
Inst_VOP3__V_DIV_SCALE_F32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_DIV_SCALE_F64()
Inst_VOP3__V_DIV_SCALE_F64(InFmt_VOP3B *)
~Inst_VOP3__V_DOT2C_F32_BF16()
Inst_VOP3__V_DOT2C_F32_BF16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_EXP_LEGACY_F32()
Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FLOOR_F16()
Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *)
Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FLOOR_F64()
Inst_VOP3__V_FMAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FMA_F32(InFmt_VOP3A *)
Inst_VOP3__V_FMA_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F16()
Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FRACT_F32()
Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FRACT_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I16_F16()
Inst_VOP3__V_FREXP_EXP_I16_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_FREXP_EXP_I32_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_EXP_I32_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_EXP_I32_F64()
Inst_VOP3__V_FREXP_EXP_I32_F64(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F16()
Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_FREXP_MANT_F32()
Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *)
~Inst_VOP3__V_FREXP_MANT_F64()
Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_MOV_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LL_F16(InFmt_VOP3A *)
~Inst_VOP3__V_INTERP_P1LL_F16()
~Inst_VOP3__V_INTERP_P1LV_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_INTERP_P1LV_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P1_F32()
Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *)
Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_INTERP_P2_F32()
Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *)
~Inst_VOP3__V_LDEXP_F64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LERP_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LOG_LEGACY_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B16()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B32()
Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *)
~Inst_VOP3__V_LSHLREV_B64()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHL_ADD_U32()
Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_ADD_U64()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A *)
Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *)
~Inst_VOP3__V_LSHL_OR_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B16()
~Inst_VOP3__V_LSHRREV_B32()
Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_LSHRREV_B64()
Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAC_F16(InFmt_VOP3A *)
Inst_VOP3__V_MAC_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAD_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_I32_I24()
~Inst_VOP3__V_MAD_I64_I32()
Inst_VOP3__V_MAD_I64_I32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_LEGACY_F32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAD_U16(InFmt_VOP3A *)
Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *)
~Inst_VOP3__V_MAD_U32_U24()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MAD_U64_U32()
Inst_VOP3__V_MAD_U64_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_F32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_I32(InFmt_VOP3A *)
Inst_VOP3__V_MAX_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MAX_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MBCNT_HI_U32_B32()
Inst_VOP3__V_MBCNT_HI_U32_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MBCNT_LO_U32_B32()
Inst_VOP3__V_MBCNT_LO_U32_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MED3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MED3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *)
Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_F32(InFmt_VOP3A *)
Inst_VOP3__V_MIN_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_I32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MIN_U16(InFmt_VOP3A *)
Inst_VOP3__V_MIN_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *)
~Inst_VOP3__V_MOV_FED_B32()
~Inst_VOP3__V_MQSAD_PK_U16_U8()
Inst_VOP3__V_MQSAD_PK_U16_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MQSAD_U32_U8()
Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_F32(InFmt_VOP3A *)
Inst_VOP3__V_MUL_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32_I24()
Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_I32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_HI_U32_U24()
Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_HI_U32()
Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_I32_I24()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LEGACY_F32()
Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *)
~Inst_VOP3__V_MUL_LO_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_LO_U32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_MUL_U32_U24()
Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOP(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_NOT_B32(InFmt_VOP3A *)
Inst_VOP3__V_OR3_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_OR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_PERMLANE16_SWAP_B32(InFmt_VOP3A *)
~Inst_VOP3__V_PERMLANE16_SWAP_B32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_PERMLANE32_SWAP_B32(InFmt_VOP3A *)
~Inst_VOP3__V_PERMLANE32_SWAP_B32()
void execute(GPUDynInstPtr) override
uint8_t permute(uint64_t in_dword2x, uint32_t sel)
Inst_VOP3__V_PERM_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_PRNG_B32(InFmt_VOP3A *)
~Inst_VOP3__V_QSAD_PK_U16_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *)
Inst_VOP3__V_RCP_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RCP_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RCP_IFLAG_F32()
Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_READLANE_B32()
Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F16()
Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *)
~Inst_VOP3__V_RNDNE_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_RNDNE_F64()
Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *)
Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *)
Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *)
~Inst_VOP3__V_SAD_HI_U8()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SAD_U8(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SIN_F16(InFmt_VOP3A *)
Inst_VOP3__V_SIN_F32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *)
Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBBREV_CO_U32()
Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *)
~Inst_VOP3__V_SUBB_CO_U32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_CO_U32()
Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B *)
Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_F16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *)
~Inst_VOP3__V_SUBREV_U16()
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUBREV_U32()
Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *)
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_SUB_CO_U32()
Inst_VOP3__V_SUB_F16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_F32(InFmt_VOP3A *)
Inst_VOP3__V_SUB_U16(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
Inst_VOP3__V_SUB_U32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_VOP3__V_TRIG_PREOP_F64()
Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *)
Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F16()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F32()
void execute(GPUDynInstPtr) override
Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *)
~Inst_VOP3__V_TRUNC_F64()
Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *)
~Inst_VOP3__V_WRITELANE_B32()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_VOP3__V_XAD_U32(InFmt_VOP3A *)
Inst_VOP3__V_XOR_B32(InFmt_VOP3A *)
void execute(GPUDynInstPtr) override
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
void panicUnimplemented() const
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
void write() override
write to the vrf.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr int popCount(uint64_t val)
Returns the number of set ones in the provided value.
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
mxfp< fp8_e4m3_info > mxfloat8
mxfp< binary32 > mxfloat32
mxfp< fp16_e8m7_info > mxbfloat16
mxfp< fp16_e5m10_info > mxfloat16
dFMT convertMXFP(sFMT in, mxfpRoundingMode mode=roundTiesToEven, uint32_t seed=0)
mxfp< fp8_e5m2_info > mxbfloat8
static constexpr float32_t f32(uint32_t v)
classes that represnt vector/scalar operands in VEGA ISA.
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
VecOperand< VecElemF32, true > ConstVecOperandF32
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
VecOperand< VecElemU32, false > VecOperandU32
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
VecOperand< VecElemF64, true > ConstVecOperandF64
VecOperand< VecElemI16, false, 1 > VecOperandI16
VecOperand< VecElemI32, true > ConstVecOperandI32
VecOperand< VecElemU32, true > ConstVecOperandU32
ScalarRegI32 findFirstOne(T val)
T median(T val_0, T val_1, T val_2)
ScalarRegI32 findFirstOneMsb(T val)
T roundNearestEven(T val)
VecOperand< VecElemI64, true > ConstVecOperandI64
VecOperand< VecElemU16, false, 1 > VecOperandU16
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
ScalarOperand< ScalarRegF32, true > ConstScalarOperandF32
ScalarOperand< ScalarRegU32, false > ScalarOperandU32
VecOperand< VecElemI64, false > VecOperandI64
const int NumVecElemPerVecReg(64)
VecOperand< VecElemU64, false > VecOperandU64
VecOperand< VecElemI32, false > VecOperandI32
VecElemU32 muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1, VecElemU64 val_2)
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
VecOperand< VecElemU64, true > ConstVecOperandU64
VecOperand< VecElemF64, false > VecOperandF64
VecOperand< VecElemF32, false > VecOperandF32
Bitfield< 31, 16 > selector
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)