41 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
42 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(
InFmt_MUBUF *iFmt)
73 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
74 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(
InFmt_MUBUF *iFmt)
105 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
106 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(
InFmt_MUBUF *iFmt)
137 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
138 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(
InFmt_MUBUF *iFmt)
169 Inst_MUBUF__BUFFER_STORE_FORMAT_X
170 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(
InFmt_MUBUF *iFmt)
201 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
202 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(
InFmt_MUBUF *iFmt)
233 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
234 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(
InFmt_MUBUF *iFmt)
265 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
266 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(
InFmt_MUBUF *iFmt)
267 :
Inst_MUBUF(iFmt,
"buffer_store_format_xyzw")
274 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
275 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
298 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
299 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(
InFmt_MUBUF *iFmt)
300 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_x")
307 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
308 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
331 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
332 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(
InFmt_MUBUF *iFmt)
333 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xy")
340 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
341 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
366 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
367 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(
InFmt_MUBUF *iFmt)
368 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xyz")
375 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
376 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
401 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
402 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(
InFmt_MUBUF *iFmt)
403 :
Inst_MUBUF(iFmt,
"buffer_load_format_d16_xyzw")
410 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
411 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
436 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
437 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(
InFmt_MUBUF *iFmt)
438 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_x")
443 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
444 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
469 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
470 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(
InFmt_MUBUF *iFmt)
471 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xy")
478 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
479 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
504 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
505 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(
InFmt_MUBUF *iFmt)
506 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xyz")
513 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
514 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
539 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
540 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(
InFmt_MUBUF *iFmt)
541 :
Inst_MUBUF(iFmt,
"buffer_store_format_d16_xyzw")
548 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
549 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
574 Inst_MUBUF__BUFFER_LOAD_UBYTE
598 if (gpuDynInst->exec_mask.none()) {
605 gpuDynInst->latency.init(gpuDynInst->computeUnit());
606 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
621 addr0, addr1, rsrcDesc,
offset, inst_offset);
626 addr0, addr1, rsrcDesc,
offset, inst_offset);
631 addr1, addr0, rsrcDesc,
offset, inst_offset);
637 addr1, addr0, rsrcDesc,
offset, inst_offset);
640 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
661 if (gpuDynInst->exec_mask[lane]) {
664 gpuDynInst->d_data))[lane]);
676 Inst_MUBUF__BUFFER_LOAD_SBYTE
708 Inst_MUBUF__BUFFER_LOAD_USHORT
709 ::Inst_MUBUF__BUFFER_LOAD_USHORT(
InFmt_MUBUF *iFmt)
732 if (gpuDynInst->exec_mask.none()) {
739 gpuDynInst->latency.init(gpuDynInst->computeUnit());
740 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
755 addr0, addr1, rsrcDesc,
offset, inst_offset);
760 addr0, addr1, rsrcDesc,
offset, inst_offset);
765 addr1, addr0, rsrcDesc,
offset, inst_offset);
771 addr1, addr0, rsrcDesc,
offset, inst_offset);
774 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
795 if (gpuDynInst->exec_mask[lane]) {
798 gpuDynInst->d_data))[lane]);
810 Inst_MUBUF__BUFFER_LOAD_SSHORT
811 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(
InFmt_MUBUF *iFmt)
842 Inst_MUBUF__BUFFER_LOAD_SHORT_D16
843 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(
InFmt_MUBUF *iFmt)
850 warn(
"BUFFER.LDS not implemented!");
868 if (gpuDynInst->exec_mask.none()) {
875 gpuDynInst->latency.init(gpuDynInst->computeUnit());
876 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
895 addr0, addr1, rsrcDesc,
offset, inst_offset);
900 addr0, addr1, rsrcDesc,
offset, inst_offset);
905 addr1, addr0, rsrcDesc,
offset, inst_offset);
911 addr1, addr0, rsrcDesc,
offset, inst_offset);
914 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
929 if (gpuDynInst->exec_mask[lane]) {
932 gpuDynInst->d_data))[lane];
944 Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI
945 ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(
InFmt_MUBUF *iFmt)
946 :
Inst_MUBUF(iFmt,
"buffer_load_short_d16_hi")
952 warn(
"BUFFER.LDS not implemented!");
971 if (gpuDynInst->exec_mask.none()) {
978 gpuDynInst->latency.init(gpuDynInst->computeUnit());
979 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
998 addr0, addr1, rsrcDesc,
offset, inst_offset);
1003 addr0, addr1, rsrcDesc,
offset, inst_offset);
1008 addr1, addr0, rsrcDesc,
offset, inst_offset);
1014 addr1, addr0, rsrcDesc,
offset, inst_offset);
1017 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1032 if (gpuDynInst->exec_mask[lane]) {
1035 gpuDynInst->d_data))[lane];
1047 Inst_MUBUF__BUFFER_LOAD_DWORD
1048 ::Inst_MUBUF__BUFFER_LOAD_DWORD(
InFmt_MUBUF *iFmt)
1069 Wavefront *wf = gpuDynInst->wavefront();
1071 if (gpuDynInst->exec_mask.none()) {
1078 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1079 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1098 addr0, addr1, rsrcDesc,
offset, inst_offset);
1103 addr0, addr1, rsrcDesc,
offset, inst_offset);
1108 addr1, addr0, rsrcDesc,
offset, inst_offset);
1114 addr1, addr0, rsrcDesc,
offset, inst_offset);
1117 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1138 if (gpuDynInst->exec_mask[lane]) {
1141 gpuDynInst->d_data))[lane];
1152 Inst_MUBUF__BUFFER_LOAD_DWORDX2
1153 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(
InFmt_MUBUF *iFmt)
1172 Wavefront *wf = gpuDynInst->wavefront();
1174 if (gpuDynInst->exec_mask.none()) {
1181 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1182 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1197 addr0, addr1, rsrcDesc,
offset, inst_offset);
1202 addr0, addr1, rsrcDesc,
offset, inst_offset);
1207 addr1, addr0, rsrcDesc,
offset, inst_offset);
1213 addr1, addr0, rsrcDesc,
offset, inst_offset);
1216 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1232 if (gpuDynInst->exec_mask[lane]) {
1234 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1235 gpuDynInst->d_data))[lane * 2];
1236 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1237 gpuDynInst->d_data))[lane * 2 + 1];
1250 Inst_MUBUF__BUFFER_LOAD_DWORDX3
1251 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(
InFmt_MUBUF *iFmt)
1272 Wavefront *wf = gpuDynInst->wavefront();
1274 if (gpuDynInst->exec_mask.none()) {
1281 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1282 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1297 addr0, addr1, rsrcDesc,
offset, inst_offset);
1302 addr0, addr1, rsrcDesc,
offset, inst_offset);
1307 addr1, addr0, rsrcDesc,
offset, inst_offset);
1313 addr1, addr0, rsrcDesc,
offset, inst_offset);
1316 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1339 if (gpuDynInst->exec_mask[lane]) {
1341 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1342 gpuDynInst->d_data))[lane * 3];
1343 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1344 gpuDynInst->d_data))[lane * 3 + 1];
1345 vdst2[lane] = (
reinterpret_cast<VecElemU32*
>(
1346 gpuDynInst->d_data))[lane * 3 + 2];
1361 Inst_MUBUF__BUFFER_LOAD_DWORDX4
1362 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(
InFmt_MUBUF *iFmt)
1383 Wavefront *wf = gpuDynInst->wavefront();
1385 if (gpuDynInst->exec_mask.none()) {
1392 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1393 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1408 addr0, addr1, rsrcDesc,
offset, inst_offset);
1413 addr0, addr1, rsrcDesc,
offset, inst_offset);
1418 addr1, addr0, rsrcDesc,
offset, inst_offset);
1424 addr1, addr0, rsrcDesc,
offset, inst_offset);
1427 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1451 if (gpuDynInst->exec_mask[lane]) {
1453 vdst0[lane] = (
reinterpret_cast<VecElemU32*
>(
1454 gpuDynInst->d_data))[lane * 4];
1455 vdst1[lane] = (
reinterpret_cast<VecElemU32*
>(
1456 gpuDynInst->d_data))[lane * 4 + 1];
1457 vdst2[lane] = (
reinterpret_cast<VecElemU32*
>(
1458 gpuDynInst->d_data))[lane * 4 + 2];
1459 vdst3[lane] = (
reinterpret_cast<VecElemU32*
>(
1460 gpuDynInst->d_data))[lane * 4 + 3];
1477 Inst_MUBUF__BUFFER_STORE_BYTE
1478 ::Inst_MUBUF__BUFFER_STORE_BYTE(
InFmt_MUBUF *iFmt)
1499 Wavefront *wf = gpuDynInst->wavefront();
1501 if (gpuDynInst->exec_mask.none()) {
1510 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1511 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1528 addr0, addr1, rsrcDesc,
offset, inst_offset);
1533 addr0, addr1, rsrcDesc,
offset, inst_offset);
1538 addr1, addr0, rsrcDesc,
offset, inst_offset);
1544 addr1, addr0, rsrcDesc,
offset, inst_offset);
1547 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1550 if (gpuDynInst->exec_mask[lane]) {
1551 (
reinterpret_cast<VecElemI8*
>(gpuDynInst->d_data))[lane]
1569 Inst_MUBUF__BUFFER_STORE_SHORT
1570 ::Inst_MUBUF__BUFFER_STORE_SHORT(
InFmt_MUBUF *iFmt)
1591 Wavefront *wf = gpuDynInst->wavefront();
1593 if (gpuDynInst->exec_mask.none()) {
1602 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1603 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1620 addr0, addr1, rsrcDesc,
offset, inst_offset);
1625 addr0, addr1, rsrcDesc,
offset, inst_offset);
1630 addr1, addr0, rsrcDesc,
offset, inst_offset);
1636 addr1, addr0, rsrcDesc,
offset, inst_offset);
1639 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1642 if (gpuDynInst->exec_mask[lane]) {
1643 (
reinterpret_cast<VecElemI16*
>(gpuDynInst->d_data))[lane]
1683 Wavefront *wf = gpuDynInst->wavefront();
1685 if (gpuDynInst->exec_mask.none()) {
1694 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1695 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1712 addr0, addr1, rsrcDesc,
offset, inst_offset);
1717 addr0, addr1, rsrcDesc,
offset, inst_offset);
1722 addr1, addr0, rsrcDesc,
offset, inst_offset);
1728 addr1, addr0, rsrcDesc,
offset, inst_offset);
1731 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1734 if (gpuDynInst->exec_mask[lane]) {
1735 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane]
1753 Inst_MUBUF__BUFFER_STORE_DWORDX2
1754 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(
InFmt_MUBUF *iFmt)
1775 Wavefront *wf = gpuDynInst->wavefront();
1777 if (gpuDynInst->exec_mask.none()) {
1786 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1787 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1806 addr0, addr1, rsrcDesc,
offset, inst_offset);
1811 addr0, addr1, rsrcDesc,
offset, inst_offset);
1816 addr1, addr0, rsrcDesc,
offset, inst_offset);
1822 addr1, addr0, rsrcDesc,
offset, inst_offset);
1825 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1828 if (gpuDynInst->exec_mask[lane]) {
1829 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 2]
1831 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*2 + 1]
1849 Inst_MUBUF__BUFFER_STORE_DWORDX3
1850 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(
InFmt_MUBUF *iFmt)
1871 Wavefront *wf = gpuDynInst->wavefront();
1873 if (gpuDynInst->exec_mask.none()) {
1882 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1883 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
1904 addr0, addr1, rsrcDesc,
offset, inst_offset);
1909 addr0, addr1, rsrcDesc,
offset, inst_offset);
1914 addr1, addr0, rsrcDesc,
offset, inst_offset);
1920 addr1, addr0, rsrcDesc,
offset, inst_offset);
1923 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
1926 if (gpuDynInst->exec_mask[lane]) {
1927 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 3]
1929 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*3 + 1]
1931 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*3 + 2]
1949 Inst_MUBUF__BUFFER_STORE_DWORDX4
1950 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(
InFmt_MUBUF *iFmt)
1971 Wavefront *wf = gpuDynInst->wavefront();
1973 if (gpuDynInst->exec_mask.none()) {
1982 gpuDynInst->latency.init(gpuDynInst->computeUnit());
1983 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2006 addr0, addr1, rsrcDesc,
offset, inst_offset);
2011 addr0, addr1, rsrcDesc,
offset, inst_offset);
2016 addr1, addr0, rsrcDesc,
offset, inst_offset);
2022 addr1, addr0, rsrcDesc,
offset, inst_offset);
2025 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2028 if (gpuDynInst->exec_mask[lane]) {
2029 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane * 4]
2031 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 1]
2033 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 2]
2035 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane*4 + 3]
2053 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
2054 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(
InFmt_MUBUF *iFmt)
2079 setFlag(GPUStaticInst::MemSync);
2094 Wavefront *wf = gpuDynInst->wavefront();
2096 if (gpuDynInst->exec_mask.none()) {
2103 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2104 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2106 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2107 gpuDynInst->computeUnit()->globalMemoryPipe.
2108 issueRequest(gpuDynInst);
2110 fatal(
"Unsupported scope for flat instruction.\n");
2129 Inst_MUBUF__BUFFER_WBINVL1_VOL
2130 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(
InFmt_MUBUF*iFmt)
2138 setFlag(GPUStaticInst::MemSync);
2154 Wavefront *wf = gpuDynInst->wavefront();
2156 if (gpuDynInst->exec_mask.none()) {
2163 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2164 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2166 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
2167 gpuDynInst->computeUnit()->globalMemoryPipe.
2168 issueRequest(gpuDynInst);
2170 fatal(
"Unsupported scope for flat instruction.\n");
2184 Inst_MUBUF__BUFFER_ATOMIC_SWAP
2185 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(
InFmt_MUBUF *iFmt)
2214 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
2215 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(
InFmt_MUBUF *iFmt)
2242 Wavefront *wf = gpuDynInst->wavefront();
2244 if (gpuDynInst->exec_mask.none()) {
2251 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2252 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2271 addr0, addr1, rsrcDesc,
offset, inst_offset);
2276 addr0, addr1, rsrcDesc,
offset, inst_offset);
2281 addr1, addr0, rsrcDesc,
offset, inst_offset);
2287 addr1, addr0, rsrcDesc,
offset, inst_offset);
2291 if (gpuDynInst->exec_mask[lane]) {
2292 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->x_data))[lane]
2294 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->a_data))[lane]
2299 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2315 if (gpuDynInst->exec_mask[lane]) {
2317 gpuDynInst->d_data))[lane];
2326 Inst_MUBUF__BUFFER_ATOMIC_ADD
2327 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(
InFmt_MUBUF *iFmt)
2356 Inst_MUBUF__BUFFER_ATOMIC_SUB
2357 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(
InFmt_MUBUF *iFmt)
2386 Inst_MUBUF__BUFFER_ATOMIC_SMIN
2387 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(
InFmt_MUBUF *iFmt)
2416 Inst_MUBUF__BUFFER_ATOMIC_UMIN
2417 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(
InFmt_MUBUF *iFmt)
2446 Inst_MUBUF__BUFFER_ATOMIC_SMAX
2447 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(
InFmt_MUBUF *iFmt)
2476 Inst_MUBUF__BUFFER_ATOMIC_UMAX
2477 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(
InFmt_MUBUF *iFmt)
2506 Inst_MUBUF__BUFFER_ATOMIC_AND
2507 ::Inst_MUBUF__BUFFER_ATOMIC_AND(
InFmt_MUBUF *iFmt)
2536 Inst_MUBUF__BUFFER_ATOMIC_OR
2566 Inst_MUBUF__BUFFER_ATOMIC_XOR
2567 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(
InFmt_MUBUF *iFmt)
2596 Inst_MUBUF__BUFFER_ATOMIC_INC
2597 ::Inst_MUBUF__BUFFER_ATOMIC_INC(
InFmt_MUBUF *iFmt)
2626 Inst_MUBUF__BUFFER_ATOMIC_DEC
2627 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(
InFmt_MUBUF *iFmt)
2656 Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16
2657 ::Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16(
InFmt_MUBUF *iFmt)
2658 :
Inst_MUBUF(iFmt,
"buffer_atomic_pk_add_bf16")
2676 Wavefront *wf = gpuDynInst->wavefront();
2678 if (gpuDynInst->exec_mask.none()) {
2684 gpuDynInst->latency.init(gpuDynInst->computeUnit());
2685 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
2702 addr0, addr1, rsrcDesc,
offset, inst_offset);
2707 addr0, addr1, rsrcDesc,
offset, inst_offset);
2712 addr1, addr0, rsrcDesc,
offset, inst_offset);
2718 addr1, addr0, rsrcDesc,
offset, inst_offset);
2722 if (gpuDynInst->exec_mask[lane]) {
2723 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->a_data))[lane]
2728 gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
2745 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
2746 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(
InFmt_MUBUF *iFmt)
2775 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2776 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(
InFmt_MUBUF *iFmt)
2777 :
Inst_MUBUF(iFmt,
"buffer_atomic_cmpswap_x2")
2789 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
2790 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
2808 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
2809 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(
InFmt_MUBUF *iFmt)
2838 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
2839 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(
InFmt_MUBUF *iFmt)
2868 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
2869 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(
InFmt_MUBUF *iFmt)
2898 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
2899 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(
InFmt_MUBUF *iFmt)
2928 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
2929 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(
InFmt_MUBUF *iFmt)
2958 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
2959 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(
InFmt_MUBUF *iFmt)
2988 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
2989 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(
InFmt_MUBUF *iFmt)
3018 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
3019 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(
InFmt_MUBUF *iFmt)
3046 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
3047 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(
InFmt_MUBUF *iFmt)
3076 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
3077 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(
InFmt_MUBUF *iFmt)
3106 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
3107 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(
InFmt_MUBUF *iFmt)
const std::string _opcode
~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_ADD()
~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_AND()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_DEC()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_INC()
~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_OR()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_PK_ADD_BF16()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SUB()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_ATOMIC_XOR()
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_DWORD()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SBYTE()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SHORT_D16()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_SSHORT()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_UBYTE()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_LOAD_USHORT()
~Inst_MUBUF__BUFFER_STORE_BYTE()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX3()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORDX4()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_DWORD()
void initiateAcc(GPUDynInstPtr) override
Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *)
~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
void execute(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_STORE_SHORT()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_WBINVL1_VOL()
void initiateAcc(GPUDynInstPtr) override
~Inst_MUBUF__BUFFER_WBINVL1()
void execute(GPUDynInstPtr) override
Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *)
void completeAcc(GPUDynInstPtr) override
void initMemWrite(GPUDynInstPtr gpuDynInst)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void ldsComplete(GPUDynInstPtr gpuDynInst)
void initMemRead(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
Inst_MUBUF(InFmt_MUBUF *, const std::string &opcode)
void read() override
read from and write to the underlying register(s) that this operand is referring to.
void panicUnimplemented() const
void read() override
read from the vrf.
void write() override
write to the vrf.
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void decVMemInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define fatal(...)
This implements a cprintf based fatal() function.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
classes that represnt vector/scalar operands in VEGA ISA.
VecOperand< VecElemU32, false > VecOperandU32
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
VecOperand< VecElemU32, true > ConstVecOperandU32
VecOperand< VecElemI8, true, 1 > ConstVecOperandI8
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
const int NumVecElemPerVecReg(64)
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr