59 if (gpuDynInst->exec_mask.none()) {
70 gpuDynInst->latency.init(gpuDynInst->computeUnit());
71 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
90 if (gpuDynInst->exec_mask[lane]) {
92 gpuDynInst->d_data))[lane]);
117 if (gpuDynInst->exec_mask.none()) {
128 gpuDynInst->latency.init(gpuDynInst->computeUnit());
129 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
148 if (gpuDynInst->exec_mask[lane]) {
150 gpuDynInst->d_data))[lane]);
175 if (gpuDynInst->exec_mask.none()) {
186 gpuDynInst->latency.init(gpuDynInst->computeUnit());
187 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
206 if (gpuDynInst->exec_mask[lane]) {
208 gpuDynInst->d_data))[lane]);
264 if (gpuDynInst->exec_mask.none()) {
275 gpuDynInst->latency.init(gpuDynInst->computeUnit());
276 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
295 if (gpuDynInst->exec_mask[lane]) {
297 gpuDynInst->d_data))[lane];
323 if (gpuDynInst->exec_mask.none()) {
334 gpuDynInst->latency.init(gpuDynInst->computeUnit());
335 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
357 gpuDynInst->d_data))[lane * 2];
359 gpuDynInst->d_data))[lane * 2 + 1];
364 gpuDynInst->d_data))[lane];
394 if (gpuDynInst->exec_mask.none()) {
405 gpuDynInst->latency.init(gpuDynInst->computeUnit());
406 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
429 gpuDynInst->d_data))[lane * 3];
431 gpuDynInst->d_data))[lane * 3 + 1];
433 gpuDynInst->d_data))[lane * 3 + 2];
438 gpuDynInst->d_data))[lane];
471 if (gpuDynInst->exec_mask.none()) {
482 gpuDynInst->latency.init(gpuDynInst->computeUnit());
483 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
507 gpuDynInst->d_data))[lane * 4];
509 gpuDynInst->d_data))[lane * 4 + 1];
511 gpuDynInst->d_data))[lane * 4 + 2];
513 gpuDynInst->d_data))[lane * 4 + 3];
518 gpuDynInst->d_data))[lane];
553 if (gpuDynInst->exec_mask.none()) {
566 gpuDynInst->latency.init(gpuDynInst->computeUnit());
567 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
576 if (gpuDynInst->exec_mask[lane]) {
577 (
reinterpret_cast<VecElemU8*
>(gpuDynInst->d_data))[lane]
615 if (gpuDynInst->exec_mask.none()) {
628 gpuDynInst->latency.init(gpuDynInst->computeUnit());
629 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
638 if (gpuDynInst->exec_mask[lane]) {
639 (
reinterpret_cast<VecElemU16*
>(gpuDynInst->d_data))[lane]
661 :
Inst_FLAT(iFmt,
"flat_store_short_d16_hi")
678 if (gpuDynInst->exec_mask.none()) {
691 gpuDynInst->latency.init(gpuDynInst->computeUnit());
692 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
701 if (gpuDynInst->exec_mask[lane]) {
702 (
reinterpret_cast<VecElemU16*
>(gpuDynInst->d_data))[lane]
703 = (
data[lane] >> 16);
740 if (gpuDynInst->exec_mask.none()) {
753 gpuDynInst->latency.init(gpuDynInst->computeUnit());
754 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
763 if (gpuDynInst->exec_mask[lane]) {
764 (
reinterpret_cast<VecElemU32*
>(gpuDynInst->d_data))[lane]
803 if (gpuDynInst->exec_mask.none()) {
816 gpuDynInst->latency.init(gpuDynInst->computeUnit());
817 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
826 if (gpuDynInst->exec_mask[lane]) {
827 (
reinterpret_cast<VecElemU64*
>(gpuDynInst->d_data))[lane]
866 if (gpuDynInst->exec_mask.none()) {
879 gpuDynInst->latency.init(gpuDynInst->computeUnit());
880 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
893 if (gpuDynInst->exec_mask[lane]) {
895 gpuDynInst->d_data))[lane * 3] = data0[lane];
897 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
899 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
937 if (gpuDynInst->exec_mask.none()) {
950 gpuDynInst->latency.init(gpuDynInst->computeUnit());
951 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
966 if (gpuDynInst->exec_mask[lane]) {
968 gpuDynInst->d_data))[lane * 4] = data0[lane];
970 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
972 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
974 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
1034 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
1035 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(
InFmt_FLAT *iFmt)
1036 :
Inst_FLAT(iFmt,
"flat_atomic_cmpswap")
1520 :
Inst_FLAT(iFmt,
"flat_atomic_swap_x2")
1561 :
Inst_FLAT(iFmt,
"flat_atomic_cmpswap_x2")
1686 :
Inst_FLAT(iFmt,
"flat_atomic_smin_x2")
1727 :
Inst_FLAT(iFmt,
"flat_atomic_umin_x2")
1768 :
Inst_FLAT(iFmt,
"flat_atomic_smax_x2")
1809 :
Inst_FLAT(iFmt,
"flat_atomic_umax_x2")
2056 :
Inst_FLAT(iFmt,
"flat_atomic_add_f32")
2092 :
Inst_FLAT(iFmt,
"flat_atomic_pk_add_f16")
2126 :
Inst_FLAT(iFmt,
"flat_atomic_add_f64")
2162 :
Inst_FLAT(iFmt,
"flat_atomic_min_f64")
2198 :
Inst_FLAT(iFmt,
"flat_atomic_max_f64")
bool isFlatScratch() const
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_F32()
Inst_FLAT__FLAT_ATOMIC_ADD_F32(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_F64()
Inst_FLAT__FLAT_ATOMIC_ADD_F64(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_ADD_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_ADD()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_AND_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_AND_X2(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_AND()
Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_DEC_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_DEC()
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_INC_X2()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_INC_X2(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_INC()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_MAX_F64(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_MAX_F64()
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_MIN_F64()
Inst_FLAT__FLAT_ATOMIC_MIN_F64(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_OR_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_OR_X2()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_OR()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
Inst_FLAT__FLAT_ATOMIC_SMAX_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMAX()
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMIN_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SMIN()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SUB_X2(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SUB()
Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SWAP_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_SWAP()
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMAX_X2(InFmt_FLAT *)
~Inst_FLAT__FLAT_ATOMIC_UMAX()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_UMIN_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
~Inst_FLAT__FLAT_ATOMIC_UMIN()
Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_XOR_X2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_ATOMIC_XOR()
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX2()
Inst_FLAT__FLAT_LOAD_DWORDX2(InFmt_FLAT *)
Inst_FLAT__FLAT_LOAD_DWORDX3(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX3()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_DWORDX4()
Inst_FLAT__FLAT_LOAD_DWORDX4(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_DWORD()
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_SBYTE()
void completeAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *)
~Inst_FLAT__FLAT_LOAD_SSHORT()
Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *)
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_UBYTE()
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_LOAD_USHORT()
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_BYTE()
void initiateAcc(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX2()
Inst_FLAT__FLAT_STORE_DWORDX2(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_DWORDX3(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX3()
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORDX4()
Inst_FLAT__FLAT_STORE_DWORDX4(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void completeAcc(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_DWORD()
~Inst_FLAT__FLAT_STORE_SHORT_D16_HI()
void completeAcc(GPUDynInstPtr) override
Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT *)
void execute(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void execute(GPUDynInstPtr) override
~Inst_FLAT__FLAT_STORE_SHORT()
Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *)
void completeAcc(GPUDynInstPtr) override
void initiateAcc(GPUDynInstPtr) override
void atomicComplete(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr, ScalarRegU32 saddr, ScalarRegI32 offset)
Inst_FLAT(InFmt_FLAT *, const std::string &opcode)
void initMemRead(GPUDynInstPtr gpuDynInst)
void atomicExecute(GPUDynInstPtr gpuDynInst)
void issueRequestHelper(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void panicUnimplemented() const
void read() override
read from the vrf.
void write() override
write to the vrf.
void untrackExpInst(GPUDynInstPtr gpu_dyn_inst)
void decVMemInstsIssued()
void untrackLGKMInst(GPUDynInstPtr gpu_dyn_inst)
void decLGKMInstsIssued()
void untrackVMemInst(GPUDynInstPtr gpu_dyn_inst)
classes that represnt vector/scalar operands in VEGA ISA.
VecOperand< VecElemU32, false > VecOperandU32
VecOperand< VecElemU8, true, 1 > ConstVecOperandU8
VecOperand< VecElemU32, true > ConstVecOperandU32
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
const int NumVecElemPerVecReg(64)
VecOperand< VecElemU64, true > ConstVecOperandU64
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr