59 if (gpuDynInst->exec_mask.none()) {
68 gpuDynInst->latency.init(gpuDynInst->computeUnit());
69 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
88 if (gpuDynInst->exec_mask[lane]) {
90 gpuDynInst->d_data))[lane]);
115 if (gpuDynInst->exec_mask.none()) {
124 gpuDynInst->latency.init(gpuDynInst->computeUnit());
125 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
144 if (gpuDynInst->exec_mask[lane]) {
146 gpuDynInst->d_data))[lane]);
171 if (gpuDynInst->exec_mask.none()) {
180 gpuDynInst->latency.init(gpuDynInst->computeUnit());
181 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
200 if (gpuDynInst->exec_mask[lane]) {
202 gpuDynInst->d_data))[lane]);
258 if (gpuDynInst->exec_mask.none()) {
267 gpuDynInst->latency.init(gpuDynInst->computeUnit());
268 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
287 if (gpuDynInst->exec_mask[lane]) {
289 gpuDynInst->d_data))[lane];
315 if (gpuDynInst->exec_mask.none()) {
324 gpuDynInst->latency.init(gpuDynInst->computeUnit());
325 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
344 if (gpuDynInst->exec_mask[lane]) {
346 gpuDynInst->d_data))[lane];
372 if (gpuDynInst->exec_mask.none()) {
381 gpuDynInst->latency.init(gpuDynInst->computeUnit());
382 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
403 if (gpuDynInst->exec_mask[lane]) {
405 gpuDynInst->d_data))[lane * 3];
407 gpuDynInst->d_data))[lane * 3 + 1];
409 gpuDynInst->d_data))[lane * 3 + 2];
438 if (gpuDynInst->exec_mask.none()) {
447 gpuDynInst->latency.init(gpuDynInst->computeUnit());
448 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
470 if (gpuDynInst->exec_mask[lane]) {
472 gpuDynInst->d_data))[lane * 4];
474 gpuDynInst->d_data))[lane * 4 + 1];
476 gpuDynInst->d_data))[lane * 4 + 2];
478 gpuDynInst->d_data))[lane * 4 + 3];
507 if (gpuDynInst->exec_mask.none()) {
517 gpuDynInst->latency.init(gpuDynInst->computeUnit());
518 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
527 if (gpuDynInst->exec_mask[lane]) {
528 (
reinterpret_cast<VecElemU8*
>(gpuDynInst->d_data))[lane]
566 if (gpuDynInst->exec_mask.none()) {
576 gpuDynInst->latency.init(gpuDynInst->computeUnit());
577 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
586 if (gpuDynInst->exec_mask[lane]) {
587 (
reinterpret_cast<VecElemU16*
>(gpuDynInst->d_data))[lane]
609 :
Inst_FLAT(iFmt,
"flat_store_short_d16_hi")
626 if (gpuDynInst->exec_mask.none()) {
636 gpuDynInst->latency.init(gpuDynInst->computeUnit());
637 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
646 if (gpuDynInst->exec_mask[lane]) {
647 (
reinterpret_cast<VecElemU16*
>(gpuDynInst->d_data))[lane]
648 = (
data[lane] >> 16);
685 if (gpuDynInst->exec_mask.none()) {
695 gpuDynInst->latency.init(gpuDynInst->computeUnit());
696 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
705 if (gpuDynInst->exec_mask[lane]) {
706 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane]
745 if (gpuDynInst->exec_mask.none()) {
755 gpuDynInst->latency.init(gpuDynInst->computeUnit());
756 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
765 if (gpuDynInst->exec_mask[lane]) {
766 (
reinterpret_cast<VecElemU64*
>(gpuDynInst->d_data))[lane]
805 if (gpuDynInst->exec_mask.none()) {
815 gpuDynInst->latency.init(gpuDynInst->computeUnit());
816 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
829 if (gpuDynInst->exec_mask[lane]) {
831 gpuDynInst->d_data))[lane * 3] = data0[lane];
833 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
835 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
873 if (gpuDynInst->exec_mask.none()) {
883 gpuDynInst->latency.init(gpuDynInst->computeUnit());
884 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
899 if (gpuDynInst->exec_mask[lane]) {
901 gpuDynInst->d_data))[lane * 4] = data0[lane];
903 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
905 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
907 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
967 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
968 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(
InFmt_FLAT *iFmt)
1453 :
Inst_FLAT(iFmt,
"flat_atomic_swap_x2")
1494 :
Inst_FLAT(iFmt,
"flat_atomic_cmpswap_x2")
1619 :
Inst_FLAT(iFmt,
"flat_atomic_smin_x2")
1660 :
Inst_FLAT(iFmt,
"flat_atomic_umin_x2")
1701 :
Inst_FLAT(iFmt,
"flat_atomic_smax_x2")
1742 :
Inst_FLAT(iFmt,
"flat_atomic_umax_x2")
1989 :
Inst_FLAT(iFmt,
"flat_atomic_add_f32")
2025 :
Inst_FLAT(iFmt,
"flat_atomic_pk_add_f16")
2059 :
Inst_FLAT(iFmt,
"flat_atomic_add_f64")
2095 :
Inst_FLAT(iFmt,
"flat_atomic_min_f64")
2131 :
Inst_FLAT(iFmt,
"flat_atomic_max_f64")
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_F32()
Inst_FLAT__FLAT_ATOMIC_ADD_F32(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_F64()
Inst_FLAT__FLAT_ATOMIC_ADD_F64(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_ADD_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_ADD()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_AND_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_AND_X2(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_AND()
Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_DEC_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_DEC()
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_INC_X2()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_INC_X2(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_INC()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_MAX_F64(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_MAX_F64()
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_MIN_F64()
Inst_FLAT__FLAT_ATOMIC_MIN_F64(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_OR_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_OR_X2()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_OR()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
Inst_FLAT__FLAT_ATOMIC_SMAX_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMAX()
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMIN_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMIN()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SUB_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SUB()
Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SWAP_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SWAP()
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMAX_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_UMAX()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMIN_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
~Inst_FLAT__FLAT_ATOMIC_UMIN()
Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_XOR_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_XOR()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX2()
Inst_FLAT__FLAT_LOAD_DWORDX2(InFmt_FLAT *)
Inst_FLAT__FLAT_LOAD_DWORDX3(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX3()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX4()
Inst_FLAT__FLAT_LOAD_DWORDX4(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_DWORD()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_SBYTE()
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_SSHORT()
Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_UBYTE()
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_USHORT()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_BYTE()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX2()
Inst_FLAT__FLAT_STORE_DWORDX2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_DWORDX3(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX3()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX4()
Inst_FLAT__FLAT_STORE_DWORDX4(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORD()
~Inst_FLAT__FLAT_STORE_SHORT_D16_HI()
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_SHORT()
Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
void initMemRead(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void panicUnimplemented() const
void read() override
read from the vrf.
void write() override
write to the vrf.
void decVMemInstsIssued()
void decLGKMInstsIssued()
constexpr unsigned NumVecElemPerVecReg
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr