37 #include "debug/GCN3.hh"
38 #include "debug/GPUSync.hh"
73 >= 0x100000000ULL ? 1 : 0;
617 sdst = src0.
rawData() &~ src1.rawData();
647 sdst = src0.
rawData() &~ src1.rawData();
677 sdst = src0.
rawData() |~ src1.rawData();
707 sdst = src0.
rawData() |~ src1.rawData();
1427 scc = (src.
rawData() == simm16) ? 1 : 0;
1452 scc = (src.
rawData() != simm16) ? 1 : 0;
1477 scc = (src.
rawData() > simm16) ? 1 : 0;
1502 scc = (src.
rawData() >= simm16) ? 1 : 0;
1527 scc = (src.
rawData() < simm16) ? 1 : 0;
1552 scc = (src.
rawData() <= simm16) ? 1 : 0;
1577 scc = (src.rawData() == simm16) ? 1 : 0;
1602 scc = (src.rawData() != simm16) ? 1 : 0;
1627 scc = (src.rawData() > simm16) ? 1 : 0;
1652 scc = (src.rawData() >= simm16) ? 1 : 0;
1677 scc = (src.rawData() < simm16) ? 1 : 0;
1702 scc = (src.rawData() <= simm16) ? 1 : 0;
1834 if (hwregId==1 && size==2
1836 warn_once(
"Be cautious that s_setreg_b32 has no real effect "
1837 "on FP modes: %s\n", gpuDynInst->disassemble());
1877 if (hwregId==1 && size==2
1879 warn_once(
"Be cautious that s_setreg_imm32_b32 has no real effect "
1880 "on FP modes: %s\n", gpuDynInst->disassemble());
2498 sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
2523 sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
2640 Wavefront *wf = gpuDynInst->wavefront();
2664 Wavefront *wf = gpuDynInst->wavefront();
2687 Wavefront *wf = gpuDynInst->wavefront();
2733 Wavefront *wf = gpuDynInst->wavefront();
2742 scc = wf->
execMask().any() ? 1 : 0;
2765 Wavefront *wf = gpuDynInst->wavefront();
2774 scc = wf->
execMask().any() ? 1 : 0;
2797 Wavefront *wf = gpuDynInst->wavefront();
2806 scc = wf->
execMask().any() ? 1 : 0;
2814 :
Inst_SOP1(iFmt,
"s_andn2_saveexec_b64")
2829 Wavefront *wf = gpuDynInst->wavefront();
2838 scc = wf->
execMask().any() ? 1 : 0;
2846 :
Inst_SOP1(iFmt,
"s_orn2_saveexec_b64")
2861 Wavefront *wf = gpuDynInst->wavefront();
2870 scc = wf->
execMask().any() ? 1 : 0;
2878 :
Inst_SOP1(iFmt,
"s_nand_saveexec_b64")
2893 Wavefront *wf = gpuDynInst->wavefront();
2902 scc = wf->
execMask().any() ? 1 : 0;
2925 Wavefront *wf = gpuDynInst->wavefront();
2934 scc = wf->
execMask().any() ? 1 : 0;
2942 :
Inst_SOP1(iFmt,
"s_xnor_saveexec_b64")
2957 Wavefront *wf = gpuDynInst->wavefront();
2966 scc = wf->
execMask().any() ? 1 : 0;
3176 sdst = std::abs(src.
rawData());
3761 Wavefront *wf = gpuDynInst->wavefront();
3791 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3792 "program and decrementing max barrier count for "
3793 "barrier Id%d. New max count: %d.\n", cu->
cu_id,
3798 DPRINTF(GPUExec,
"CU%d: decrease ref ctr WG[%d] to [%d]\n",
3808 DPRINTF(GPUExec,
"Doing return for CU%d: WF[%d][%d][%d]\n",
3828 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3829 "now complete. Releasing barrier Id%d.\n", cu->
cu_id,
3852 if (!kernelEnd || !relNeeded) {
3868 gpuDynInst->simdId = wf->
simdId;
3869 gpuDynInst->wfSlotId = wf->
wfSlotId;
3870 gpuDynInst->wfDynId = wf->
wfDynId;
3872 DPRINTF(GPUExec,
"inject global memory fence for CU%d: "
3900 Wavefront *wf = gpuDynInst->wavefront();
3942 Wavefront *wf = gpuDynInst->wavefront();
3971 Wavefront *wf = gpuDynInst->wavefront();
4001 Wavefront *wf = gpuDynInst->wavefront();
4031 Wavefront *wf = gpuDynInst->wavefront();
4059 Wavefront *wf = gpuDynInst->wavefront();
4084 Wavefront *wf = gpuDynInst->wavefront();
4113 Wavefront *wf = gpuDynInst->wavefront();
4119 DPRINTF(GPUSync,
"CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4120 "barrier Id%d. %d waves now at barrier, %d waves "
4168 gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
4204 gpuDynInst->wavefront()->setSleepTime(64 * simm16);
4368 :
Inst_SOPP(iFmt,
"s_cbranch_cdbgsys_or_user")
4386 :
Inst_SOPP(iFmt,
"s_cbranch_cdbgsys_and_user")
4478 Wavefront *wf = gpuDynInst->wavefront();
4480 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4481 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4497 gpuDynInst->computeUnit()->scalarMemoryPipe
4498 .issueRequest(gpuDynInst);
4504 initMemRead<1>(gpuDynInst);
4532 Wavefront *wf = gpuDynInst->wavefront();
4534 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4535 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4551 gpuDynInst->computeUnit()->scalarMemoryPipe.
4552 issueRequest(gpuDynInst);
4558 initMemRead<2>(gpuDynInst);
4584 Wavefront *wf = gpuDynInst->wavefront();
4586 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4587 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4603 gpuDynInst->computeUnit()->scalarMemoryPipe.
4604 issueRequest(gpuDynInst);
4610 initMemRead<4>(gpuDynInst);
4636 Wavefront *wf = gpuDynInst->wavefront();
4638 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4639 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4655 gpuDynInst->computeUnit()->scalarMemoryPipe.
4656 issueRequest(gpuDynInst);
4662 initMemRead<8>(gpuDynInst);
4688 Wavefront *wf = gpuDynInst->wavefront();
4690 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4691 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4707 gpuDynInst->computeUnit()->scalarMemoryPipe.
4708 issueRequest(gpuDynInst);
4714 initMemRead<16>(gpuDynInst);
4726 :
Inst_SMEM(iFmt,
"s_buffer_load_dword")
4741 Wavefront *wf = gpuDynInst->wavefront();
4743 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4744 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4760 gpuDynInst->computeUnit()->scalarMemoryPipe
4761 .issueRequest(gpuDynInst);
4767 initMemRead<1>(gpuDynInst);
4780 :
Inst_SMEM(iFmt,
"s_buffer_load_dwordx2")
4795 Wavefront *wf = gpuDynInst->wavefront();
4797 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4798 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4814 gpuDynInst->computeUnit()->scalarMemoryPipe
4815 .issueRequest(gpuDynInst);
4821 initMemRead<2>(gpuDynInst);
4834 :
Inst_SMEM(iFmt,
"s_buffer_load_dwordx4")
4849 Wavefront *wf = gpuDynInst->wavefront();
4851 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4852 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4868 gpuDynInst->computeUnit()->scalarMemoryPipe
4869 .issueRequest(gpuDynInst);
4875 initMemRead<4>(gpuDynInst);
4888 :
Inst_SMEM(iFmt,
"s_buffer_load_dwordx8")
4903 Wavefront *wf = gpuDynInst->wavefront();
4905 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4906 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4922 gpuDynInst->computeUnit()->scalarMemoryPipe
4923 .issueRequest(gpuDynInst);
4929 initMemRead<8>(gpuDynInst);
4942 :
Inst_SMEM(iFmt,
"s_buffer_load_dwordx16")
4957 Wavefront *wf = gpuDynInst->wavefront();
4959 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4960 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4976 gpuDynInst->computeUnit()->scalarMemoryPipe
4977 .issueRequest(gpuDynInst);
4983 initMemRead<16>(gpuDynInst);
5013 Wavefront *wf = gpuDynInst->wavefront();
5015 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5016 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5024 std::memcpy((
void*)gpuDynInst->scalar_data, sdata.
rawDataPtr(),
5037 gpuDynInst->computeUnit()->scalarMemoryPipe.
5038 issueRequest(gpuDynInst);
5044 initMemWrite<1>(gpuDynInst);
5068 Wavefront *wf = gpuDynInst->wavefront();
5070 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5071 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5079 std::memcpy((
void*)gpuDynInst->scalar_data, sdata.
rawDataPtr(),
5092 gpuDynInst->computeUnit()->scalarMemoryPipe.
5093 issueRequest(gpuDynInst);
5099 initMemWrite<2>(gpuDynInst);
5123 Wavefront *wf = gpuDynInst->wavefront();
5125 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5126 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5134 std::memcpy((
void*)gpuDynInst->scalar_data, sdata.
rawDataPtr(),