41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
42 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(
InFmt_MUBUF *iFmt)
41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X {
…}
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
74 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(
InFmt_MUBUF *iFmt)
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY {
…}
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
106 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(
InFmt_MUBUF *iFmt)
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ {
…}
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
138 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(
InFmt_MUBUF *iFmt)
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW {
…}
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X
170 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(
InFmt_MUBUF *iFmt)
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X {
…}
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
202 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(
InFmt_MUBUF *iFmt)
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY {
…}
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
234 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(
InFmt_MUBUF *iFmt)
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ {
…}
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
266 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(
InFmt_MUBUF *iFmt)
267 :
Inst_MUBUF(iFmt,
"buffer_store_format_xyzw")
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW {
…}
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
275 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW {
…}
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
299 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(
InFmt_MUBUF *iFmt)
300 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_x")
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X {
…}
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
308 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X {
…}
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
332 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(
InFmt_MUBUF *iFmt)
333 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xy")
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY {
…}
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
341 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY {
…}
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
367 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(
InFmt_MUBUF *iFmt)
368 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xyz")
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ {
…}
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
376 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ {
…}
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
402 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(
InFmt_MUBUF *iFmt)
403 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xyzw")
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW {
…}
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
411 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW {
…}
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
437 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(
InFmt_MUBUF *iFmt)
438 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_x")
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X {
…}
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
444 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X {
…}
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
470 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(
InFmt_MUBUF *iFmt)
471 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xy")
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY {
…}
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
479 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY {
…}
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
505 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(
InFmt_MUBUF *iFmt)
506 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xyz")
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ {
…}
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
514 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ {
…}
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
540 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(
InFmt_MUBUF *iFmt)
541 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xyzw")
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW {
…}
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
549 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW {
…}
574 Inst_MUBUF__BUFFER_LOAD_UBYTE
574 Inst_MUBUF__BUFFER_LOAD_UBYTE {
…}
598 if (gpuDynInst->exec_mask.none()) {
604 gpuDynInst->latency.init(gpuDynInst->computeUnit());
605 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
620 addr0, addr1, rsrcDesc,
offset, inst_offset);
625 addr0, addr1, rsrcDesc,
offset, inst_offset);
630 addr1, addr0, rsrcDesc,
offset, inst_offset);
636 addr1, addr0, rsrcDesc,
offset, inst_offset);
639 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
645 initMemRead<VecElemU8>(gpuDynInst);
654 if (gpuDynInst->exec_mask[lane]) {
657 gpuDynInst->d_data))[lane]);
669 Inst_MUBUF__BUFFER_LOAD_SBYTE
669 Inst_MUBUF__BUFFER_LOAD_SBYTE {
…}
701 Inst_MUBUF__BUFFER_LOAD_USHORT
702 ::Inst_MUBUF__BUFFER_LOAD_USHORT(
InFmt_MUBUF *iFmt)
701 Inst_MUBUF__BUFFER_LOAD_USHORT {
…}
725 if (gpuDynInst->exec_mask.none()) {
731 gpuDynInst->latency.init(gpuDynInst->computeUnit());
732 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
747 addr0, addr1, rsrcDesc,
offset, inst_offset);
752 addr0, addr1, rsrcDesc,
offset, inst_offset);
757 addr1, addr0, rsrcDesc,
offset, inst_offset);
763 addr1, addr0, rsrcDesc,
offset, inst_offset);
766 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
772 initMemRead<VecElemU16>(gpuDynInst);
781 if (gpuDynInst->exec_mask[lane]) {
784 gpuDynInst->d_data))[lane]);
796 Inst_MUBUF__BUFFER_LOAD_SSHORT
797 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(
InFmt_MUBUF *iFmt)
796 Inst_MUBUF__BUFFER_LOAD_SSHORT {
…}
828 Inst_MUBUF__BUFFER_LOAD_SHORT_D16
829 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(
InFmt_MUBUF *iFmt)
836 warn(
"BUFFER.LDS not implemented!");
828 Inst_MUBUF__BUFFER_LOAD_SHORT_D16 {
…}
854 if (gpuDynInst->exec_mask.none()) {
860 gpuDynInst->latency.init(gpuDynInst->computeUnit());
861 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
880 addr0, addr1, rsrcDesc,
offset, inst_offset);
885 addr0, addr1, rsrcDesc,
offset, inst_offset);
890 addr1, addr0, rsrcDesc,
offset, inst_offset);
896 addr1, addr0, rsrcDesc,
offset, inst_offset);
899 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
905 initMemRead<VecElemU16>(gpuDynInst);
914 if (gpuDynInst->exec_mask[lane]) {
917 gpuDynInst->d_data))[lane];
929 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
930 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(
InFmt_MUBUF *iFmt)
931 :
Inst_MUBUF(iFmt,
"buffer_load_short_d16_hi")
937 warn(
"BUFFER.LDS not implemented!");
929 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI {
…}
956 if (gpuDynInst->exec_mask.none()) {
962 gpuDynInst->latency.init(gpuDynInst->computeUnit());
963 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
982 addr0, addr1, rsrcDesc,
offset, inst_offset);
987 addr0, addr1, rsrcDesc,
offset, inst_offset);
992 addr1, addr0, rsrcDesc,
offset, inst_offset);
998 addr1, addr0, rsrcDesc,
offset, inst_offset);
1001 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1007 initMemRead<VecElemU16>(gpuDynInst);
1016 if (gpuDynInst->exec_mask[lane]) {
1019 gpuDynInst->d_data))[lane];
1031 Inst_MUBUF__BUFFER_LOAD_DWORD
1032 ::Inst_MUBUF__BUFFER_LOAD_DWORD(
InFmt_MUBUF *iFmt)
1031 Inst_MUBUF__BUFFER_LOAD_DWORD {
…}
1053 Wavefront *wf = gpuDynInst->wavefront();
1055 if (gpuDynInst->exec_mask.none()) {
1061 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1062 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1081 addr0, addr1, rsrcDesc,
offset, inst_offset);
1086 addr0, addr1, rsrcDesc,
offset, inst_offset);
1091 addr1, addr0, rsrcDesc,
offset, inst_offset);
1097 addr1, addr0, rsrcDesc,
offset, inst_offset);
1100 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1106 initMemRead<VecElemU32>(gpuDynInst);
1115 if (gpuDynInst->exec_mask[lane]) {
1118 gpuDynInst->d_data))[lane];
1129 Inst_MUBUF__BUFFER_LOAD_DWORDX2
1130 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(
InFmt_MUBUF *iFmt)
1129 Inst_MUBUF__BUFFER_LOAD_DWORDX2 {
…}
1151 Wavefront *wf = gpuDynInst->wavefront();
1153 if (gpuDynInst->exec_mask.none()) {
1159 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1160 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1175 addr0, addr1, rsrcDesc,
offset, inst_offset);
1180 addr0, addr1, rsrcDesc,
offset, inst_offset);
1185 addr1, addr0, rsrcDesc,
offset, inst_offset);
1191 addr1, addr0, rsrcDesc,
offset, inst_offset);
1194 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1200 initMemRead<2>(gpuDynInst);
1210 if (gpuDynInst->exec_mask[lane]) {
1212 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1213 gpuDynInst->d_data))[lane * 2];
1214 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1215 gpuDynInst->d_data))[lane * 2 + 1];
1228 Inst_MUBUF__BUFFER_LOAD_DWORDX3
1229 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(
InFmt_MUBUF *iFmt)
1228 Inst_MUBUF__BUFFER_LOAD_DWORDX3 {
…}
1250 Wavefront *wf = gpuDynInst->wavefront();
1252 if (gpuDynInst->exec_mask.none()) {
1258 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1259 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1274 addr0, addr1, rsrcDesc,
offset, inst_offset);
1279 addr0, addr1, rsrcDesc,
offset, inst_offset);
1284 addr1, addr0, rsrcDesc,
offset, inst_offset);
1290 addr1, addr0, rsrcDesc,
offset, inst_offset);
1293 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1299 initMemRead<3>(gpuDynInst);
1310 if (gpuDynInst->exec_mask[lane]) {
1312 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1313 gpuDynInst->d_data))[lane * 3];
1314 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1315 gpuDynInst->d_data))[lane * 3 + 1];
1316 vdst2[lane] = (
reinterpret_cast<VecElemU32*
>(
1317 gpuDynInst->d_data))[lane * 3 + 2];
1332 Inst_MUBUF__BUFFER_LOAD_DWORDX4
1333 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(
InFmt_MUBUF *iFmt)
1332 Inst_MUBUF__BUFFER_LOAD_DWORDX4 {
…}
1354 Wavefront *wf = gpuDynInst->wavefront();
1356 if (gpuDynInst->exec_mask.none()) {
1362 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1363 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1378 addr0, addr1, rsrcDesc,
offset, inst_offset);
1383 addr0, addr1, rsrcDesc,
offset, inst_offset);
1388 addr1, addr0, rsrcDesc,
offset, inst_offset);
1394 addr1, addr0, rsrcDesc,
offset, inst_offset);
1397 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1403 initMemRead<4>(gpuDynInst);
1415 if (gpuDynInst->exec_mask[lane]) {
1417 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1418 gpuDynInst->d_data))[lane * 4];
1419 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1420 gpuDynInst->d_data))[lane * 4 + 1];
1421 vdst2[lane] = (
reinterpret_cast<VecElemU32*
>(
1422 gpuDynInst->d_data))[lane * 4 + 2];
1423 vdst3[lane] = (
reinterpret_cast<VecElemU32*
>(
1424 gpuDynInst->d_data))[lane * 4 + 3];
1441 Inst_MUBUF__BUFFER_STORE_BYTE
1442 ::Inst_MUBUF__BUFFER_STORE_BYTE(
InFmt_MUBUF *iFmt)
1441 Inst_MUBUF__BUFFER_STORE_BYTE {
…}
1463 Wavefront *wf = gpuDynInst->wavefront();
1465 if (gpuDynInst->exec_mask.none()) {
1472 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1473 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1490 addr0, addr1, rsrcDesc,
offset, inst_offset);
1495 addr0, addr1, rsrcDesc,
offset, inst_offset);
1500 addr1, addr0, rsrcDesc,
offset, inst_offset);
1506 addr1, addr0, rsrcDesc,
offset, inst_offset);
1509 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1512 if (gpuDynInst->exec_mask[lane]) {
1513 (
reinterpret_cast<VecElemI8*
>(gpuDynInst->d_data))[lane]
1522 initMemWrite<VecElemI8>(gpuDynInst);
1531 Inst_MUBUF__BUFFER_STORE_SHORT
1532 ::Inst_MUBUF__BUFFER_STORE_SHORT(
InFmt_MUBUF *iFmt)
1531 Inst_MUBUF__BUFFER_STORE_SHORT {
…}
1553 Wavefront *wf = gpuDynInst->wavefront();
1555 if (gpuDynInst->exec_mask.none()) {
1562 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1563 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1580 addr0, addr1, rsrcDesc,
offset, inst_offset);
1585 addr0, addr1, rsrcDesc,
offset, inst_offset);
1590 addr1, addr0, rsrcDesc,
offset, inst_offset);
1596 addr1, addr0, rsrcDesc,
offset, inst_offset);
1599 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1602 if (gpuDynInst->exec_mask[lane]) {
1603 (
reinterpret_cast<VecElemI16*
>(gpuDynInst->d_data))[lane]
1612 initMemWrite<VecElemI16>(gpuDynInst);
1643 Wavefront *wf = gpuDynInst->wavefront();
1645 if (gpuDynInst->exec_mask.none()) {
1652 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1653 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1670 addr0, addr1, rsrcDesc,
offset, inst_offset);
1675 addr0, addr1, rsrcDesc,
offset, inst_offset);
1680 addr1, addr0, rsrcDesc,
offset, inst_offset);
1686 addr1, addr0, rsrcDesc,
offset, inst_offset);
1689 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1692 if (gpuDynInst->exec_mask[lane]) {
1693 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane]
1702 initMemWrite<VecElemU32>(gpuDynInst);
1711 Inst_MUBUF__BUFFER_STORE_DWORDX2
1712 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(
InFmt_MUBUF *iFmt)
1711 Inst_MUBUF__BUFFER_STORE_DWORDX2 {
…}
1733 Wavefront *wf = gpuDynInst->wavefront();
1735 if (gpuDynInst->exec_mask.none()) {
1742 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1743 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1762 addr0, addr1, rsrcDesc,
offset, inst_offset);
1767 addr0, addr1, rsrcDesc,
offset, inst_offset);
1772 addr1, addr0, rsrcDesc,
offset, inst_offset);
1778 addr1, addr0, rsrcDesc,
offset, inst_offset);
1781 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1784 if (gpuDynInst->exec_mask[lane]) {
1785 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 4]
1787 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 1]
1796 initMemWrite<2>(gpuDynInst);
1805 Inst_MUBUF__BUFFER_STORE_DWORDX3
1806 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(
InFmt_MUBUF *iFmt)
1805 Inst_MUBUF__BUFFER_STORE_DWORDX3 {
…}
1827 Wavefront *wf = gpuDynInst->wavefront();
1829 if (gpuDynInst->exec_mask.none()) {
1836 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1837 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1858 addr0, addr1, rsrcDesc,
offset, inst_offset);
1863 addr0, addr1, rsrcDesc,
offset, inst_offset);
1868 addr1, addr0, rsrcDesc,
offset, inst_offset);
1874 addr1, addr0, rsrcDesc,
offset, inst_offset);
1877 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1880 if (gpuDynInst->exec_mask[lane]) {
1881 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 4]
1883 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 1]
1885 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 2]
1894 initMemWrite<3>(gpuDynInst);
1903 Inst_MUBUF__BUFFER_STORE_DWORDX4
1904 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(
InFmt_MUBUF *iFmt)
1903 Inst_MUBUF__BUFFER_STORE_DWORDX4 {
…}
1925 Wavefront *wf = gpuDynInst->wavefront();
1927 if (gpuDynInst->exec_mask.none()) {
1934 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1935 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1958 addr0, addr1, rsrcDesc,
offset, inst_offset);
1963 addr0, addr1, rsrcDesc,
offset, inst_offset);
1968 addr1, addr0, rsrcDesc,
offset, inst_offset);
1974 addr1, addr0, rsrcDesc,
offset, inst_offset);
1977 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1980 if (gpuDynInst->exec_mask[lane]) {
1981 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 4]
1983 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 1]
1985 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 2]
1987 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 3]
1996 initMemWrite<4>(gpuDynInst);
2005 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2006 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(
InFmt_MUBUF *iFmt)
2005 Inst_MUBUF__BUFFER_STORE_LDS_DWORD {
…}
2031 setFlag(GPUStaticInst::MemSync);
2046 Wavefront *wf = gpuDynInst->wavefront();
2048 if (gpuDynInst->exec_mask.none()) {
2054 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2055 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2057 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2058 gpuDynInst->computeUnit()->globalMemoryPipe.
2059 issueRequest(gpuDynInst);
2061 fatal(
"Unsupported scope for flat instruction.\n");
2080 Inst_MUBUF__BUFFER_WBINVL1_VOL
2081 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(
InFmt_MUBUF*iFmt)
2089 setFlag(GPUStaticInst::MemSync);
2080 Inst_MUBUF__BUFFER_WBINVL1_VOL {
…}
2105 Wavefront *wf = gpuDynInst->wavefront();
2107 if (gpuDynInst->exec_mask.none()) {
2113 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2114 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2116 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2117 gpuDynInst->computeUnit()->globalMemoryPipe.
2118 issueRequest(gpuDynInst);
2120 fatal(
"Unsupported scope for flat instruction.\n");
2134 Inst_MUBUF__BUFFER_ATOMIC_SWAP
2135 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(
InFmt_MUBUF *iFmt)
2134 Inst_MUBUF__BUFFER_ATOMIC_SWAP {
…}
2164 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2165 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(
InFmt_MUBUF *iFmt)
2164 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP {
…}
2192 Wavefront *wf = gpuDynInst->wavefront();
2194 if (gpuDynInst->exec_mask.none()) {
2200 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2201 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2220 addr0, addr1, rsrcDesc,
offset, inst_offset);
2225 addr0, addr1, rsrcDesc,
offset, inst_offset);
2230 addr1, addr0, rsrcDesc,
offset, inst_offset);
2236 addr1, addr0, rsrcDesc,
offset, inst_offset);
2240 if (gpuDynInst->exec_mask[lane]) {
2241 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->x_data))[lane]
2243 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->a_data))[lane]
2248 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2254 initAtomicAccess<VecElemU32>(gpuDynInst);
2264 if (gpuDynInst->exec_mask[lane]) {
2266 gpuDynInst->d_data))[lane];
2275 Inst_MUBUF__BUFFER_ATOMIC_ADD
2276 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(
InFmt_MUBUF *iFmt)
2275 Inst_MUBUF__BUFFER_ATOMIC_ADD {
…}
2305 Inst_MUBUF__BUFFER_ATOMIC_SUB
2306 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(
InFmt_MUBUF *iFmt)
2305 Inst_MUBUF__BUFFER_ATOMIC_SUB {
…}
2335 Inst_MUBUF__BUFFER_ATOMIC_SMIN
2336 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(
InFmt_MUBUF *iFmt)
2335 Inst_MUBUF__BUFFER_ATOMIC_SMIN {
…}
2365 Inst_MUBUF__BUFFER_ATOMIC_UMIN
2366 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(
InFmt_MUBUF *iFmt)
2365 Inst_MUBUF__BUFFER_ATOMIC_UMIN {
…}
2395 Inst_MUBUF__BUFFER_ATOMIC_SMAX
2396 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(
InFmt_MUBUF *iFmt)
2395 Inst_MUBUF__BUFFER_ATOMIC_SMAX {
…}
2425 Inst_MUBUF__BUFFER_ATOMIC_UMAX
2426 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(
InFmt_MUBUF *iFmt)
2425 Inst_MUBUF__BUFFER_ATOMIC_UMAX {
…}
2455 Inst_MUBUF__BUFFER_ATOMIC_AND
2456 ::Inst_MUBUF__BUFFER_ATOMIC_AND(
InFmt_MUBUF *iFmt)
2455 Inst_MUBUF__BUFFER_ATOMIC_AND {
…}
2485 Inst_MUBUF__BUFFER_ATOMIC_OR
2485 Inst_MUBUF__BUFFER_ATOMIC_OR {
…}
2515 Inst_MUBUF__BUFFER_ATOMIC_XOR
2516 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(
InFmt_MUBUF *iFmt)
2515 Inst_MUBUF__BUFFER_ATOMIC_XOR {
…}
2545 Inst_MUBUF__BUFFER_ATOMIC_INC
2546 ::Inst_MUBUF__BUFFER_ATOMIC_INC(
InFmt_MUBUF *iFmt)
2545 Inst_MUBUF__BUFFER_ATOMIC_INC {
…}
2575 Inst_MUBUF__BUFFER_ATOMIC_DEC
2576 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(
InFmt_MUBUF *iFmt)
2575 Inst_MUBUF__BUFFER_ATOMIC_DEC {
…}
2605 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2606 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(
InFmt_MUBUF *iFmt)
2605 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 {
…}
2635 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2636 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(
InFmt_MUBUF *iFmt)
2637 :
Inst_MUBUF(iFmt,
"buffer_atomic_cmpswap_x2")
2635 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 {
…}
2649 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2650 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
2649 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 {
…}
2668 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2669 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(
InFmt_MUBUF *iFmt)
2668 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 {
…}
2698 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2699 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(
InFmt_MUBUF *iFmt)
2698 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 {
…}
2728 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2729 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(
InFmt_MUBUF *iFmt)
2728 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 {
…}
2758 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2759 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(
InFmt_MUBUF *iFmt)
2758 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 {
…}
2788 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2789 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(
InFmt_MUBUF *iFmt)
2788 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 {
…}
2818 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2819 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(
InFmt_MUBUF *iFmt)
2818 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 {
…}
2848 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2849 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(
InFmt_MUBUF *iFmt)
2848 Inst_MUBUF__BUFFER_ATOMIC_AND_X2 {
…}
2878 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
2879 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(
InFmt_MUBUF *iFmt)
2878 Inst_MUBUF__BUFFER_ATOMIC_OR_X2 {
…}
2906 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
2907 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(
InFmt_MUBUF *iFmt)
2906 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 {
…}
2936 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
2937 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(
InFmt_MUBUF *iFmt)
2936 Inst_MUBUF__BUFFER_ATOMIC_INC_X2 {
…}
2966 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
2967 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(
InFmt_MUBUF *iFmt)
2966 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 {
…}
~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_ADD()
~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_AND()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_DEC()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_INC()
~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_OR()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SUB()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_XOR()
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORD()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SBYTE()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SHORT_D16()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SSHORT()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_UBYTE()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_USHORT()
~Inst_MUBUF__BUFFER_STORE_BYTE()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX3()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX4()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORD()
void initiateAcc(GPUDynInstPtr) override
Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *)
~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_SHORT()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_WBINVL1_VOL()
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_WBINVL1()
void execute(GPUDynInstPtr) override
Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *)
void completeAcc(GPUDynInstPtr) override
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void read() override
read from and write to the underlying register(s) that this operand is referring to.
void panicUnimplemented() const
void read() override
read from the vrf.
void write() override
write to the vrf.
void decVMemInstsIssued()
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define fatal(...)
This implements a cprintf based fatal() function.
constexpr unsigned NumVecElemPerVecReg
VecOperand< VecElemU32, true > ConstVecOperandU32
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr