release/current/vop2_8cc_source.html

/*

 * Copyright (c) 2024 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "arch/amdgpu/vega/insts/inst_util.hh"

#include "arch/amdgpu/vega/insts/instructions.hh"

#include "debug/VEGA.hh"


namespace gem5

{


namespace VegaISA

{

    // --- Inst_VOP2__V_CNDMASK_B32 class methods ---


    Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_cndmask_b32")

    {

        setFlag(ALU);

        setFlag(ReadsVCC);

    } // Inst_VOP2__V_CNDMASK_B32

    Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()

    {

    } // ~Inst_VOP2__V_CNDMASK_B32

    Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() {…}


    // --- description from .arch file ---

    // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC

    // as a scalar GPR in S2.

    void


    Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();

        vcc.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane]

                    = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADD_F32 class methods ---


    Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_add_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_ADD_F32

    Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()

    {

    } // ~Inst_VOP2__V_ADD_F32

    Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f + S1.f.

    void


    Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        VecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);


        if (isDPPInst()) {

            VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);

            src0_dpp.read();


            DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "

                    "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "

                    "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "

                    "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,

                    extData.iFmt_VOP_DPP.DPP_CTRL,

                    extData.iFmt_VOP_DPP.SRC0_ABS,

                    extData.iFmt_VOP_DPP.SRC0_NEG,

                    extData.iFmt_VOP_DPP.SRC1_ABS,

                    extData.iFmt_VOP_DPP.SRC1_NEG,

                    extData.iFmt_VOP_DPP.BC,

                    extData.iFmt_VOP_DPP.BANK_MASK,

                    extData.iFmt_VOP_DPP.ROW_MASK);


            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0_dpp[lane] + src1[lane];

                }

            }

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0[lane] + src1[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUB_F32 class methods ---


    Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_sub_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_SUB_F32

    Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()

    {

    } // ~Inst_VOP2__V_SUB_F32

    Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f - S1.f.

    // SQ translates to V_ADD_F32.

    void


    Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] - src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBREV_F32 class methods ---


    Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subrev_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_SUBREV_F32

    Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()

    {

    } // ~Inst_VOP2__V_SUBREV_F32

    Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() {…}


    // --- description from .arch file ---

    // D.f = S1.f - S0.f.

    // SQ translates to V_ADD_F32.

    void


    Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] - src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods ---


    Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_legacy_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_MUL_LEGACY_F32

    Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()

    {

    } // ~Inst_VOP2__V_MUL_LEGACY_F32

    Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0).

    void


    Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] * src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_F32 class methods ---


    Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_MUL_F32

    Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()

    {

    } // ~Inst_VOP2__V_MUL_F32

    Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f * S1.f.

    void


    Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::isnan(src0[lane]) ||

                    std::isnan(src1[lane])) {

                    vdst[lane] = NAN;

                } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||

                           std::fpclassify(src0[lane]) == FP_ZERO) &&

                           !std::signbit(src0[lane])) {

                    if (std::isinf(src1[lane])) {

                        vdst[lane] = NAN;

                    } else if (!std::signbit(src1[lane])) {

                        vdst[lane] = +0.0;

                    } else {

                        vdst[lane] = -0.0;

                    }

                } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||

                           std::fpclassify(src0[lane]) == FP_ZERO) &&

                           std::signbit(src0[lane])) {

                    if (std::isinf(src1[lane])) {

                        vdst[lane] = NAN;

                    } else if (std::signbit(src1[lane])) {

                        vdst[lane] = +0.0;

                    } else {

                        vdst[lane] = -0.0;

                    }

                } else if (std::isinf(src0[lane]) &&

                           !std::signbit(src0[lane])) {

                    if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||

                        std::fpclassify(src1[lane]) == FP_ZERO) {

                        vdst[lane] = NAN;

                    } else if (!std::signbit(src1[lane])) {

                        vdst[lane] = +INFINITY;

                    } else {

                        vdst[lane] = -INFINITY;

                    }

                } else if (std::isinf(src0[lane]) &&

                           std::signbit(src0[lane])) {

                    if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||

                        std::fpclassify(src1[lane]) == FP_ZERO) {

                        vdst[lane] = NAN;

                    } else if (std::signbit(src1[lane])) {

                        vdst[lane] = +INFINITY;

                    } else {

                        vdst[lane] = -INFINITY;

                    }

                } else {

                    vdst[lane] = src0[lane] * src1[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_I32_I24 class methods ---


    Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_i32_i24")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MUL_I32_I24

    Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()

    {

    } // ~Inst_VOP2__V_MUL_I32_I24

    Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() {…}


    // --- description from .arch file ---

    // D.i = S0.i[23:0] * S1.i[23:0].

    void


    Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = sext<24>(bits(src0[lane], 23, 0))

                    * sext<24>(bits(src1[lane], 23, 0));

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods ---


    Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_hi_i32_i24")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MUL_HI_I32_I24

    Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()

    {

    } // ~Inst_VOP2__V_MUL_HI_I32_I24

    Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() {…}


    // --- description from .arch file ---

    // D.i = (S0.i[23:0] * S1.i[23:0])>>32.

    void


    Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                VecElemI64 tmp_src0

                    = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));

                VecElemI64 tmp_src1

                    = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));


                vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_U32_U24 class methods ---


    Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_u32_u24")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MUL_U32_U24

    Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()

    {

    } // ~Inst_VOP2__V_MUL_U32_U24

    Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() {…}


    // --- description from .arch file ---

    // D.u = S0.u[23:0] * S1.u[23:0].

    void


    Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)

    {

        auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,

                         VecOperandU32& vdst, Wavefront* wf) {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = bits(src0[lane], 23, 0) *

                                 bits(src1[lane], 23, 0);

                }

            }

        };


        vop2Helper<ConstVecOperandU32, VecOperandU32>(gpuDynInst, opImpl);

    } // execute

    Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---


    Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_hi_u32_u24")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MUL_HI_U32_U24

    Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()

    {

    } // ~Inst_VOP2__V_MUL_HI_U32_U24

    Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() {…}


    // --- description from .arch file ---

    // D.i = (S0.u[23:0] * S1.u[23:0])>>32.

    void


    Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);

                VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);

                vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_F32 class methods ---


    Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_MIN_F32

    Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()

    {

    } // ~Inst_VOP2__V_MIN_F32

    Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() {…}


    // --- description from .arch file ---

    // D.f = (S0.f < S1.f ? S0.f : S1.f).

    void


    Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::fmin(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_F32 class methods ---


    Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP2__V_MAX_F32

    Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()

    {

    } // ~Inst_VOP2__V_MAX_F32

    Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() {…}


    // --- description from .arch file ---

    // D.f = (S0.f >= S1.f ? S0.f : S1.f).

    void


    Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::fmax(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_I32 class methods ---


    Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_i32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MIN_I32

    Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()

    {

    } // ~Inst_VOP2__V_MIN_I32

    Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() {…}


    // --- description from .arch file ---

    // D.i = min(S0.i, S1.i).

    void


    Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::min(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_I32 class methods ---


    Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_i32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MAX_I32

    Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()

    {

    } // ~Inst_VOP2__V_MAX_I32

    Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() {…}


    // --- description from .arch file ---

    // D.i = max(S0.i, S1.i).

    void


    Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::max(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_U32 class methods ---


    Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_u32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MIN_U32

    Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()

    {

    } // ~Inst_VOP2__V_MIN_U32

    Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() {…}


    // --- description from .arch file ---

    // D.u = min(S0.u, S1.u).

    void


    Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::min(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_U32 class methods ---


    Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_u32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MAX_U32

    Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()

    {

    } // ~Inst_VOP2__V_MAX_U32

    Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() {…}


    // --- description from .arch file ---

    // D.u = max(S0.u, S1.u).

    void


    Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::max(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_LSHRREV_B32 class methods ---


    Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_lshrrev_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_LSHRREV_B32

    Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()

    {

    } // ~Inst_VOP2__V_LSHRREV_B32

    Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() {…}


    // --- description from .arch file ---

    // D.u = S1.u >> S0.u[4:0].

    // The vacated bits are set to zero.

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ASHRREV_I32 class methods ---


    Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_ashrrev_i32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_ASHRREV_I32

    Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()

    {

    } // ~Inst_VOP2__V_ASHRREV_I32

    Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() {…}


    // --- description from .arch file ---

    // D.i = signext(S1.i) >> S0.i[4:0].

    // The vacated bits are set to the sign bit of the input value.

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_LSHLREV_B32 class methods ---


    Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_lshlrev_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_LSHLREV_B32

    Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()

    {

    } // ~Inst_VOP2__V_LSHLREV_B32

    Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() {…}


    // --- description from .arch file ---

    // D.u = S1.u << S0.u[4:0].

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        VecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        if (isSDWAInst()) {

            VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);

            // use copies of original src0, src1, and vdst during selecting

            VecOperandU32 origSrc0_sdwa(gpuDynInst,

                                        extData.iFmt_VOP_SDWA.SRC0);

            VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);

            VecOperandU32 origVdst(gpuDynInst, instData.VDST);


            src0_sdwa.read();

            origSrc0_sdwa.read();

            origSrc1.read();


            DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "

                    "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "

                    "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "

                    "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",

                    extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,

                    extData.iFmt_VOP_SDWA.DST_U,

                    extData.iFmt_VOP_SDWA.CLMP,

                    extData.iFmt_VOP_SDWA.SRC0_SEL,

                    extData.iFmt_VOP_SDWA.SRC0_SEXT,

                    extData.iFmt_VOP_SDWA.SRC0_NEG,

                    extData.iFmt_VOP_SDWA.SRC0_ABS,

                    extData.iFmt_VOP_SDWA.SRC1_SEL,

                    extData.iFmt_VOP_SDWA.SRC1_SEXT,

                    extData.iFmt_VOP_SDWA.SRC1_NEG,

                    extData.iFmt_VOP_SDWA.SRC1_ABS);


            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,

                            src1, origSrc1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);

                    origVdst[lane] = vdst[lane]; // keep copy consistent

                }

            }


            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_AND_B32 class methods ---


    Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_and_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_AND_B32

    Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()

    {

    } // ~Inst_VOP2__V_AND_B32

    Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() {…}


    // --- description from .arch file ---

    // D.u = S0.u & S1.u.

    // Input and output modifiers not supported.

    void


    Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        VecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);


        if (isDPPInst()) {

            VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);

            src0_dpp.read();


            DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], "

                    "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "

                    "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "

                    "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,

                    extData.iFmt_VOP_DPP.DPP_CTRL,

                    extData.iFmt_VOP_DPP.SRC0_ABS,

                    extData.iFmt_VOP_DPP.SRC0_NEG,

                    extData.iFmt_VOP_DPP.SRC1_ABS,

                    extData.iFmt_VOP_DPP.SRC1_NEG,

                    extData.iFmt_VOP_DPP.BC,

                    extData.iFmt_VOP_DPP.BANK_MASK,

                    extData.iFmt_VOP_DPP.ROW_MASK);


            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0_dpp[lane] & src1[lane];

                }

            }

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0[lane] & src1[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_OR_B32 class methods ---


    Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_or_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_OR_B32

    Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()

    {

    } // ~Inst_VOP2__V_OR_B32

    Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() {…}


    // --- description from .arch file ---

    // D.u = S0.u | S1.u.

    // Input and output modifiers not supported.

    void


    Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        VecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        if (isSDWAInst()) {

            VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);

            // use copies of original src0, src1, and dest during selecting

            VecOperandU32 origSrc0_sdwa(gpuDynInst,

                                        extData.iFmt_VOP_SDWA.SRC0);

            VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);

            VecOperandU32 origVdst(gpuDynInst, instData.VDST);


            src0_sdwa.read();

            origSrc0_sdwa.read();

            origSrc1.read();


            DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "

                    "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "

                    "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "

                    "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",

                    extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,

                    extData.iFmt_VOP_SDWA.DST_U,

                    extData.iFmt_VOP_SDWA.CLMP,

                    extData.iFmt_VOP_SDWA.SRC0_SEL,

                    extData.iFmt_VOP_SDWA.SRC0_SEXT,

                    extData.iFmt_VOP_SDWA.SRC0_NEG,

                    extData.iFmt_VOP_SDWA.SRC0_ABS,

                    extData.iFmt_VOP_SDWA.SRC1_SEL,

                    extData.iFmt_VOP_SDWA.SRC1_SEXT,

                    extData.iFmt_VOP_SDWA.SRC1_NEG,

                    extData.iFmt_VOP_SDWA.SRC1_ABS);


            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,

                            src1, origSrc1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0_sdwa[lane] | src1[lane];

                    origVdst[lane] = vdst[lane]; // keep copy consistent

                }

            }


            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0[lane] | src1[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_XOR_B32 class methods ---


    Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_xor_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_XOR_B32

    Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()

    {

    } // ~Inst_VOP2__V_XOR_B32

    Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() {…}


    // --- description from .arch file ---

    // D.u = S0.u ^ S1.u.

    // Input and output modifiers not supported.

    void


    Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] ^ src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAC_F32 class methods ---


    Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mac_f32")

    {

        setFlag(ALU);

        setFlag(F32);

        setFlag(MAC);

    } // Inst_VOP2__V_MAC_F32

    Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()

    {

    } // ~Inst_VOP2__V_MAC_F32

    Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f * S1.f + D.f.

    // SQ translates to V_MAD_F32.

    void


    Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        VecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();

        vdst.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);


        if (isDPPInst()) {

            VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);

            src0_dpp.read();


            DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "

                    "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "

                    "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "

                    "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,

                    extData.iFmt_VOP_DPP.DPP_CTRL,

                    extData.iFmt_VOP_DPP.SRC0_ABS,

                    extData.iFmt_VOP_DPP.SRC0_NEG,

                    extData.iFmt_VOP_DPP.SRC1_ABS,

                    extData.iFmt_VOP_DPP.SRC1_NEG,

                    extData.iFmt_VOP_DPP.BC,

                    extData.iFmt_VOP_DPP.BANK_MASK,

                    extData.iFmt_VOP_DPP.ROW_MASK);


            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = std::fma(src0_dpp[lane], src1[lane],

                                          vdst[lane]);

                }

            }

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MADMK_F32 class methods ---


    Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_madmk_f32")

    {

        setFlag(ALU);

        setFlag(F32);

        setFlag(MAD);

    } // Inst_VOP2__V_MADMK_F32

    Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()

    {

    } // ~Inst_VOP2__V_MADMK_F32

    Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.

    // This opcode cannot use the VOP3 encoding and cannot use input/output

    // ---  modifiers.

    // SQ translates to V_MAD_F32.

    void


    Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);

        VecElemF32 k = extData.imm_f32;


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::fma(src0[lane], k, src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MADAK_F32 class methods ---


    Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_madak_f32")

    {

        setFlag(ALU);

        setFlag(F32);

        setFlag(MAD);

    } // Inst_VOP2__V_MADAK_F32

    Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()

    {

    } // ~Inst_VOP2__V_MADAK_F32

    Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.

    // This opcode cannot use the VOP3 encoding and cannot use input/output

    // ---  modifiers.

    // SQ translates to V_MAD_F32.

    void


    Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);

        VecElemF32 k = extData.imm_f32;


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::fma(src0[lane], src1[lane], k);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADD_CO_U32 class methods ---


    Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_add_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

    } // Inst_VOP2__V_ADD_CO_U32

    Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32()

    {

    } // ~Inst_VOP2__V_ADD_CO_U32

    Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u + S1.u;

    // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED

    // ---  overflow or carry-out for V_ADDC_U32.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair.

    void


    Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        VecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();


        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        if (isSDWAInst()) {

            VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);

            // use copies of original src0, src1, and dest during selecting

            VecOperandU32 origSrc0_sdwa(gpuDynInst,

                                        extData.iFmt_VOP_SDWA.SRC0);

            VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);

            VecOperandU32 origVdst(gpuDynInst, instData.VDST);


            src0_sdwa.read();

            origSrc0_sdwa.read();

            origSrc1.read();


            DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "

                    "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "

                    "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "

                    "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",

                    extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,

                    extData.iFmt_VOP_SDWA.DST_U,

                    extData.iFmt_VOP_SDWA.CLMP,

                    extData.iFmt_VOP_SDWA.SRC0_SEL,

                    extData.iFmt_VOP_SDWA.SRC0_SEXT,

                    extData.iFmt_VOP_SDWA.SRC0_NEG,

                    extData.iFmt_VOP_SDWA.SRC0_ABS,

                    extData.iFmt_VOP_SDWA.SRC1_SEL,

                    extData.iFmt_VOP_SDWA.SRC1_SEXT,

                    extData.iFmt_VOP_SDWA.SRC1_NEG,

                    extData.iFmt_VOP_SDWA.SRC1_ABS);


            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,

                            src1, origSrc1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0_sdwa[lane] + src1[lane];

                    origVdst[lane] = vdst[lane]; // keep copy consistent

                    vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]

                        + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);

                }

            }


            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0[lane] + src1[lane];

                    vcc.setBit(lane, ((VecElemU64)src0[lane]

                        + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);

                }

            }

        }


        vcc.write();

        vdst.write();

    } // execute

    Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUB_CO_U32 class methods ---


    Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_sub_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

    } // Inst_VOP2__V_SUB_CO_U32

    Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32()

    {

    } // ~Inst_VOP2__V_SUB_CO_U32

    Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u - S1.u;

    // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or

    // carry-out for V_SUBB_U32.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair.

    void


    Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] - src1[lane];

                vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);

            }

        }


        vdst.write();

        vcc.write();

    } // execute

    Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBREV_CO_U32 class methods ---


    Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subrev_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

    } // Inst_VOP2__V_SUBREV_CO_U32

    Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32()

    {

    } // ~Inst_VOP2__V_SUBREV_CO_U32

    Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S1.u - S0.u;

    // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or

    // carry-out for V_SUBB_U32.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair.

    void


    Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] - src0[lane];

                vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);

            }

        }


        vdst.write();

        vcc.write();

    } // execute

    Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADDC_CO_U32 class methods ---


    Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_addc_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

        setFlag(ReadsVCC);

    } // Inst_VOP2__V_ADDC_CO_U32

    Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32()

    {

    } // ~Inst_VOP2__V_ADDC_CO_U32

    Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u + S1.u + VCC[threadId];

    // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0)

    // is an UNSIGNED overflow.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC

    // source comes from the SGPR-pair at S2.u.

    void


    Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();

        vcc.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] + src1[lane]

                    + bits(vcc.rawData(), lane);

                vcc.setBit(lane, ((VecElemU64)src0[lane]

                    + (VecElemU64)src1[lane]

                        + (VecElemU64)bits(vcc.rawData(), lane, lane))

                            >= 0x100000000 ? 1 : 0);

            }

        }


        vdst.write();

        vcc.write();

    } // execute

    Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBB_CO_U32 class methods ---


    Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subb_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

        setFlag(ReadsVCC);

    } // Inst_VOP2__V_SUBB_CO_U32

    Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32()

    {

    } // ~Inst_VOP2__V_SUBB_CO_U32

    Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u - S1.u - VCC[threadId];

    // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED

    // ---  overflow.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC

    // ---  source comes from the SGPR-pair at S2.u.

    void


    Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();

        vcc.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane]

                    = src0[lane] - src1[lane] - bits(vcc.rawData(), lane);

                vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))

                    > src0[lane] ? 1 : 0);

            }

        }


        vdst.write();

        vcc.write();

    } // execute

    Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods ---


    Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subbrev_co_u32")

    {

        setFlag(ALU);

        setFlag(WritesVCC);

        setFlag(ReadsVCC);

    } // Inst_VOP2__V_SUBBREV_CO_U32

    Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32()

    {

    } // ~Inst_VOP2__V_SUBBREV_CO_U32

    Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() {…}


    // --- description from .arch file ---

    // D.u = S1.u - S0.u - VCC[threadId];

    // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED

    // overflow.

    // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC

    // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32.

    // SQ translates this to V_SUBREV_U32 with reversed operands.

    void


    Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);

        ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);


        src0.readSrc();

        src1.read();

        vcc.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane]

                    = src1[lane] - src0[lane] - bits(vcc.rawData(), lane);

                vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))

                    > src1[lane] ? 1 : 0);

            }

        }


        vdst.write();

        vcc.write();

    } // execute

    Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADD_F16 class methods ---


    Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_add_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_ADD_F16

    Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()

    {

    } // ~Inst_VOP2__V_ADD_F16

    Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 + S1.f16.

    // Supports denormals, round mode, exception flags, saturation.

    void


    Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUB_F16 class methods ---


    Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_sub_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_SUB_F16

    Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()

    {

    } // ~Inst_VOP2__V_SUB_F16

    Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 - S1.f16.

    // Supports denormals, round mode, exception flags, saturation.

    // SQ translates to V_ADD_F16.

    void


    Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBREV_F16 class methods ---


    Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subrev_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_SUBREV_F16

    Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()

    {

    } // ~Inst_VOP2__V_SUBREV_F16

    Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S1.f16 - S0.f16.

    // Supports denormals, round mode, exception flags, saturation.

    // SQ translates to V_ADD_F16.

    void


    Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_F16 class methods ---


    Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_MUL_F16

    Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()

    {

    } // ~Inst_VOP2__V_MUL_F16

    Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 * S1.f16.

    // Supports denormals, round mode, exception flags, saturation.

    void


    Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAC_F16 class methods ---


    Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mac_f16")

    {

        setFlag(ALU);

        setFlag(F16);

        setFlag(MAC);

    } // Inst_VOP2__V_MAC_F16

    Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()

    {

    } // ~Inst_VOP2__V_MAC_F16

    Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 * S1.f16 + D.f16.

    // Supports round mode, exception flags, saturation.

    // SQ translates this to V_MAD_F16.

    void


    Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MADMK_F16 class methods ---


    Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_madmk_f16")

    {

        setFlag(ALU);

        setFlag(F16);

        setFlag(MAD);

    } // Inst_VOP2__V_MADMK_F16

    Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()

    {

    } // ~Inst_VOP2__V_MADMK_F16

    Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored

    // in the following literal DWORD.

    // This opcode cannot use the VOP3 encoding and cannot use input/output

    // modifiers. Supports round mode, exception flags, saturation.

    // SQ translates this to V_MAD_F16.

    void


    Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MADAK_F16 class methods ---


    Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_madak_f16")

    {

        setFlag(ALU);

        setFlag(F16);

        setFlag(MAD);

    } // Inst_VOP2__V_MADAK_F16

    Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()

    {

    } // ~Inst_VOP2__V_MADAK_F16

    Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored

    // in the following literal DWORD.

    // This opcode cannot use the VOP3 encoding and cannot use input/output

    // modifiers. Supports round mode, exception flags, saturation.

    // SQ translates this to V_MAD_F16.

    void


    Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADD_U16 class methods ---


    Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_add_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_ADD_U16

    Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()

    {

    } // ~Inst_VOP2__V_ADD_U16

    Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() {…}


    // --- description from .arch file ---

    // D.u16 = S0.u16 + S1.u16.

    // Supports saturation (unsigned 16-bit integer domain).

    void


    Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] + src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUB_U16 class methods ---


    Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_sub_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_SUB_U16

    Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()

    {

    } // ~Inst_VOP2__V_SUB_U16

    Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() {…}


    // --- description from .arch file ---

    // D.u16 = S0.u16 - S1.u16.

    // Supports saturation (unsigned 16-bit integer domain).

    void


    Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] - src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBREV_U16 class methods ---


    Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subrev_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_SUBREV_U16

    Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()

    {

    } // ~Inst_VOP2__V_SUBREV_U16

    Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() {…}


    // --- description from .arch file ---

    // D.u16 = S1.u16 - S0.u16.

    // Supports saturation (unsigned 16-bit integer domain).

    // SQ translates this to V_SUB_U16 with reversed operands.

    void


    Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] - src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MUL_LO_U16 class methods ---


    Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_mul_lo_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MUL_LO_U16

    Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()

    {

    } // ~Inst_VOP2__V_MUL_LO_U16

    Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() {…}


    // --- description from .arch file ---

    // D.u16 = S0.u16 * S1.u16.

    // Supports saturation (unsigned 16-bit integer domain).

    void


    Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] * src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_LSHLREV_B16 class methods ---


    Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_lshlrev_b16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_LSHLREV_B16

    Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()

    {

    } // ~Inst_VOP2__V_LSHLREV_B16

    Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() {…}


    // --- description from .arch file ---

    // D.u[15:0] = S1.u[15:0] << S0.u[3:0].

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_LSHRREV_B16 class methods ---


    Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_lshrrev_b16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_LSHRREV_B16

    Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()

    {

    } // ~Inst_VOP2__V_LSHRREV_B16

    Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() {…}


    // --- description from .arch file ---

    // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].

    // The vacated bits are set to zero.

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] >> src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ASHRREV_I16 class methods ---


    Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_ashrrev_i16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_ASHRREV_I16

    Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()

    {

    } // ~Inst_VOP2__V_ASHRREV_I16

    Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() {…}


    // --- description from .arch file ---

    // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].

    // The vacated bits are set to the sign bit of the input value.

    // SQ translates this to an internal SP opcode.

    void


    Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);

        VecOperandI16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] >> src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_F16 class methods ---


    Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_MAX_F16

    Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()

    {

    } // ~Inst_VOP2__V_MAX_F16

    Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() {…}


    // --- description from .arch file ---

    // D.f16 = max(S0.f16, S1.f16).

    // IEEE compliant. Supports denormals, round mode, exception flags,

    // saturation.

    void


    Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_F16 class methods ---


    Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_MIN_F16

    Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()

    {

    } // ~Inst_VOP2__V_MIN_F16

    Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() {…}


    // --- description from .arch file ---

    // D.f16 = min(S0.f16, S1.f16).

    // IEEE compliant. Supports denormals, round mode, exception flags,

    // saturation.

    void


    Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_U16 class methods ---


    Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MAX_U16

    Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()

    {

    } // ~Inst_VOP2__V_MAX_U16

    Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() {…}


    // --- description from .arch file ---

    // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).

    void


    Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::max(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MAX_I16 class methods ---


    Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_max_i16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MAX_I16

    Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()

    {

    } // ~Inst_VOP2__V_MAX_I16

    Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() {…}


    // --- description from .arch file ---

    // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).

    void


    Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);

        VecOperandI16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::max(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_U16 class methods ---


    Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_u16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MIN_U16

    Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()

    {

    } // ~Inst_VOP2__V_MIN_U16

    Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() {…}


    // --- description from .arch file ---

    // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).

    void


    Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);

        VecOperandU16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::min(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_MIN_I16 class methods ---


    Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_min_i16")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_MIN_I16

    Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()

    {

    } // ~Inst_VOP2__V_MIN_I16

    Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() {…}


    // --- description from .arch file ---

    // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).

    void


    Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);

        VecOperandI16 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::min(src0[lane], src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_LDEXP_F16 class methods ---


    Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_ldexp_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP2__V_LDEXP_F16

    Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()

    {

    } // ~Inst_VOP2__V_LDEXP_F16

    Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 * (2 ** S1.i16).

    void


    Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_ADD_U32 class methods ---


    Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_add_u32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_ADD_U32

    Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()

    {

    } // ~Inst_VOP2__V_ADD_U32

    Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u + S1.u;

    void


    Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        VecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        if (isSDWAInst()) {

            VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);

            // use copies of original src0, src1, and dest during selecting

            VecOperandU32 origSrc0_sdwa(gpuDynInst,

                                        extData.iFmt_VOP_SDWA.SRC0);

            VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);

            VecOperandU32 origVdst(gpuDynInst, instData.VDST);


            src0_sdwa.read();

            origSrc0_sdwa.read();

            origSrc1.read();


            DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "

                    "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "

                    "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "

                    "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",

                    extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,

                    extData.iFmt_VOP_SDWA.DST_U,

                    extData.iFmt_VOP_SDWA.CLMP,

                    extData.iFmt_VOP_SDWA.SRC0_SEL,

                    extData.iFmt_VOP_SDWA.SRC0_SEXT,

                    extData.iFmt_VOP_SDWA.SRC0_NEG,

                    extData.iFmt_VOP_SDWA.SRC0_ABS,

                    extData.iFmt_VOP_SDWA.SRC1_SEL,

                    extData.iFmt_VOP_SDWA.SRC1_SEXT,

                    extData.iFmt_VOP_SDWA.SRC1_NEG,

                    extData.iFmt_VOP_SDWA.SRC1_ABS);


            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,

                            src1, origSrc1);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0_sdwa[lane] + src1[lane];

                    origVdst[lane] = vdst[lane]; // keep copy consistent

                }

            }


            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src0[lane] + src1[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUB_U32 class methods ---


    Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_sub_u32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_SUB_U32

    Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()

    {

    } // ~Inst_VOP2__V_SUB_U32

    Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() {…}


    // --- description from .arch file ---

    // D.u = S0.u - S1.u;

    void


    Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src0[lane] - src1[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_SUBREV_U32 class methods ---


    Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_subrev_u32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_SUBREV_U32

    Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()

    {

    } // ~Inst_VOP2__V_SUBREV_U32

    Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() {…}


    // --- description from .arch file ---

    // D.u = S1.u - S0.u;

    void


    Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src1[lane] - src0[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_FMAC_F32 class methods ---


    Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_fmac_f32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_FMAC_F32

    Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32()

    {

    } // ~Inst_VOP2__V_FMAC_F32

    Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32() {…}


    // --- description from .arch file ---

    // D.u = S1.u - S0.u;

    void


    Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();

        vdst.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP2__V_XNOR_B32 class methods ---


    Inst_VOP2__V_XNOR_B32::Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *iFmt)

        : Inst_VOP2(iFmt, "v_xnor_b32")

    {

        setFlag(ALU);

    } // Inst_VOP2__V_XNOR_B32

    Inst_VOP2__V_XNOR_B32::Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *iFmt) {…}


    Inst_VOP2__V_XNOR_B32::~Inst_VOP2__V_XNOR_B32()

    {

    } // ~Inst_VOP2__V_XNOR_B32

    Inst_VOP2__V_XNOR_B32::~Inst_VOP2__V_XNOR_B32() {…}


    // --- description from .arch file ---

    // D.u = S1.u - S0.u;

    void


    Inst_VOP2__V_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);

        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src0.readSrc();

        src1.read();

        vdst.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = ~(src0[lane] ^ src1[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP2__V_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) {…}

} // namespace VegaISA

} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::GPUStaticInst::isDPPInst
bool isDPPInst() const
Definition gpu_static_inst.hh:116

gem5::GPUStaticInst::setFlag
void setFlag(Flags flag)
Definition gpu_static_inst.hh:250

gem5::GPUStaticInst::isSDWAInst
bool isSDWAInst() const
Definition gpu_static_inst.hh:115

gem5::GPUStaticInst::_opcode
const std::string _opcode
Definition gpu_static_inst.hh:304

gem5::VegaISA::Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32
Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1355

gem5::VegaISA::Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32
~Inst_VOP2__V_ADDC_CO_U32()
Definition vop2.cc:1363

gem5::VegaISA::Inst_VOP2__V_ADDC_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1374

gem5::VegaISA::Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32
Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1183

gem5::VegaISA::Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32
~Inst_VOP2__V_ADD_CO_U32()
Definition vop2.cc:1190

gem5::VegaISA::Inst_VOP2__V_ADD_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1200

gem5::VegaISA::Inst_VOP2__V_ADD_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1517

gem5::VegaISA::Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16
Inst_VOP2__V_ADD_F16(InFmt_VOP2 *)
Definition vop2.cc:1502

gem5::VegaISA::Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16
~Inst_VOP2__V_ADD_F16()
Definition vop2.cc:1509

gem5::VegaISA::Inst_VOP2__V_ADD_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:98

gem5::VegaISA::Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32
Inst_VOP2__V_ADD_F32(InFmt_VOP2 *)
Definition vop2.cc:84

gem5::VegaISA::Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32
~Inst_VOP2__V_ADD_F32()
Definition vop2.cc:91

gem5::VegaISA::Inst_VOP2__V_ADD_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1675

gem5::VegaISA::Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16
~Inst_VOP2__V_ADD_U16()
Definition vop2.cc:1667

gem5::VegaISA::Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16
Inst_VOP2__V_ADD_U16(InFmt_VOP2 *)
Definition vop2.cc:1661

gem5::VegaISA::Inst_VOP2__V_ADD_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2144

gem5::VegaISA::Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32
Inst_VOP2__V_ADD_U32(InFmt_VOP2 *)
Definition vop2.cc:2131

gem5::VegaISA::Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32
~Inst_VOP2__V_ADD_U32()
Definition vop2.cc:2137

gem5::VegaISA::Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32
~Inst_VOP2__V_AND_B32()
Definition vop2.cc:863

gem5::VegaISA::Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32
Inst_VOP2__V_AND_B32(InFmt_VOP2 *)
Definition vop2.cc:857

gem5::VegaISA::Inst_VOP2__V_AND_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:871

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16
Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *)
Definition vop2.cc:1885

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1900

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16
~Inst_VOP2__V_ASHRREV_I16()
Definition vop2.cc:1891

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32
Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *)
Definition vop2.cc:742

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32
~Inst_VOP2__V_ASHRREV_I32()
Definition vop2.cc:748

gem5::VegaISA::Inst_VOP2__V_ASHRREV_I32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:757

gem5::VegaISA::Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32
Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *)
Definition vop2.cc:43

gem5::VegaISA::Inst_VOP2__V_CNDMASK_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:58

gem5::VegaISA::Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32
~Inst_VOP2__V_CNDMASK_B32()
Definition vop2.cc:50

gem5::VegaISA::Inst_VOP2__V_FMAC_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2292

gem5::VegaISA::Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32
Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *)
Definition vop2.cc:2279

gem5::VegaISA::Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32
~Inst_VOP2__V_FMAC_F32()
Definition vop2.cc:2285

gem5::VegaISA::Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16
~Inst_VOP2__V_LDEXP_F16()
Definition vop2.cc:2118

gem5::VegaISA::Inst_VOP2__V_LDEXP_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2125

gem5::VegaISA::Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16
Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *)
Definition vop2.cc:2111

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16
~Inst_VOP2__V_LSHLREV_B16()
Definition vop2.cc:1816

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1824

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16
Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *)
Definition vop2.cc:1810

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:794

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32
Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *)
Definition vop2.cc:780

gem5::VegaISA::Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32
~Inst_VOP2__V_LSHLREV_B32()
Definition vop2.cc:786

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16
Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *)
Definition vop2.cc:1847

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1862

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16
~Inst_VOP2__V_LSHRREV_B16()
Definition vop2.cc:1853

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32
Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *)
Definition vop2.cc:704

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:719

gem5::VegaISA::Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32
~Inst_VOP2__V_LSHRREV_B32()
Definition vop2.cc:710

gem5::VegaISA::Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16
Inst_VOP2__V_MAC_F16(InFmt_VOP2 *)
Definition vop2.cc:1588

gem5::VegaISA::Inst_VOP2__V_MAC_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1605

gem5::VegaISA::Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16
~Inst_VOP2__V_MAC_F16()
Definition vop2.cc:1596

gem5::VegaISA::Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32
~Inst_VOP2__V_MAC_F32()
Definition vop2.cc:1041

gem5::VegaISA::Inst_VOP2__V_MAC_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1049

gem5::VegaISA::Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32
Inst_VOP2__V_MAC_F32(InFmt_VOP2 *)
Definition vop2.cc:1033

gem5::VegaISA::Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16
Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *)
Definition vop2.cc:1636

gem5::VegaISA::Inst_VOP2__V_MADAK_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1655

gem5::VegaISA::Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16
~Inst_VOP2__V_MADAK_F16()
Definition vop2.cc:1644

gem5::VegaISA::Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32
Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *)
Definition vop2.cc:1141

gem5::VegaISA::Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32
~Inst_VOP2__V_MADAK_F32()
Definition vop2.cc:1149

gem5::VegaISA::Inst_VOP2__V_MADAK_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1159

gem5::VegaISA::Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16
~Inst_VOP2__V_MADMK_F16()
Definition vop2.cc:1619

gem5::VegaISA::Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16
Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *)
Definition vop2.cc:1611

gem5::VegaISA::Inst_VOP2__V_MADMK_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1630

gem5::VegaISA::Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32
~Inst_VOP2__V_MADMK_F32()
Definition vop2.cc:1107

gem5::VegaISA::Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32
Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *)
Definition vop2.cc:1099

gem5::VegaISA::Inst_VOP2__V_MADMK_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1117

gem5::VegaISA::Inst_VOP2__V_MAX_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1939

gem5::VegaISA::Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16
~Inst_VOP2__V_MAX_F16()
Definition vop2.cc:1930

gem5::VegaISA::Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16
Inst_VOP2__V_MAX_F16(InFmt_VOP2 *)
Definition vop2.cc:1923

gem5::VegaISA::Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32
~Inst_VOP2__V_MAX_F32()
Definition vop2.cc:530

gem5::VegaISA::Inst_VOP2__V_MAX_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:537

gem5::VegaISA::Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32
Inst_VOP2__V_MAX_F32(InFmt_VOP2 *)
Definition vop2.cc:523

gem5::VegaISA::Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16
Inst_VOP2__V_MAX_I16(InFmt_VOP2 *)
Definition vop2.cc:2003

gem5::VegaISA::Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16
~Inst_VOP2__V_MAX_I16()
Definition vop2.cc:2009

gem5::VegaISA::Inst_VOP2__V_MAX_I16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2016

gem5::VegaISA::Inst_VOP2__V_MAX_I32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:609

gem5::VegaISA::Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32
~Inst_VOP2__V_MAX_I32()
Definition vop2.cc:602

gem5::VegaISA::Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32
Inst_VOP2__V_MAX_I32(InFmt_VOP2 *)
Definition vop2.cc:596

gem5::VegaISA::Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16
~Inst_VOP2__V_MAX_U16()
Definition vop2.cc:1973

gem5::VegaISA::Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16
Inst_VOP2__V_MAX_U16(InFmt_VOP2 *)
Definition vop2.cc:1967

gem5::VegaISA::Inst_VOP2__V_MAX_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1980

gem5::VegaISA::Inst_VOP2__V_MAX_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:681

gem5::VegaISA::Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32
~Inst_VOP2__V_MAX_U32()
Definition vop2.cc:674

gem5::VegaISA::Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32
Inst_VOP2__V_MAX_U32(InFmt_VOP2 *)
Definition vop2.cc:668

gem5::VegaISA::Inst_VOP2__V_MIN_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1961

gem5::VegaISA::Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16
~Inst_VOP2__V_MIN_F16()
Definition vop2.cc:1952

gem5::VegaISA::Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16
Inst_VOP2__V_MIN_F16(InFmt_VOP2 *)
Definition vop2.cc:1945

gem5::VegaISA::Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32
~Inst_VOP2__V_MIN_F32()
Definition vop2.cc:493

gem5::VegaISA::Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32
Inst_VOP2__V_MIN_F32(InFmt_VOP2 *)
Definition vop2.cc:486

gem5::VegaISA::Inst_VOP2__V_MIN_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:500

gem5::VegaISA::Inst_VOP2__V_MIN_I16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2088

gem5::VegaISA::Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16
~Inst_VOP2__V_MIN_I16()
Definition vop2.cc:2081

gem5::VegaISA::Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16
Inst_VOP2__V_MIN_I16(InFmt_VOP2 *)
Definition vop2.cc:2075

gem5::VegaISA::Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32
~Inst_VOP2__V_MIN_I32()
Definition vop2.cc:566

gem5::VegaISA::Inst_VOP2__V_MIN_I32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:573

gem5::VegaISA::Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32
Inst_VOP2__V_MIN_I32(InFmt_VOP2 *)
Definition vop2.cc:560

gem5::VegaISA::Inst_VOP2__V_MIN_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2052

gem5::VegaISA::Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16
~Inst_VOP2__V_MIN_U16()
Definition vop2.cc:2045

gem5::VegaISA::Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16
Inst_VOP2__V_MIN_U16(InFmt_VOP2 *)
Definition vop2.cc:2039

gem5::VegaISA::Inst_VOP2__V_MIN_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:645

gem5::VegaISA::Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32
~Inst_VOP2__V_MIN_U32()
Definition vop2.cc:638

gem5::VegaISA::Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32
Inst_VOP2__V_MIN_U32(InFmt_VOP2 *)
Definition vop2.cc:632

gem5::VegaISA::Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16
~Inst_VOP2__V_MUL_F16()
Definition vop2.cc:1574

gem5::VegaISA::Inst_VOP2__V_MUL_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1582

gem5::VegaISA::Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16
Inst_VOP2__V_MUL_F16(InFmt_VOP2 *)
Definition vop2.cc:1567

gem5::VegaISA::Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32
~Inst_VOP2__V_MUL_F32()
Definition vop2.cc:266

gem5::VegaISA::Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32
Inst_VOP2__V_MUL_F32(InFmt_VOP2 *)
Definition vop2.cc:259

gem5::VegaISA::Inst_VOP2__V_MUL_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:273

gem5::VegaISA::Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24
Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *)
Definition vop2.cc:378

gem5::VegaISA::Inst_VOP2__V_MUL_HI_I32_I24::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:391

gem5::VegaISA::Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24
~Inst_VOP2__V_MUL_HI_I32_I24()
Definition vop2.cc:384

gem5::VegaISA::Inst_VOP2__V_MUL_HI_U32_U24::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:461

gem5::VegaISA::Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24
~Inst_VOP2__V_MUL_HI_U32_U24()
Definition vop2.cc:454

gem5::VegaISA::Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24
Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *)
Definition vop2.cc:448

gem5::VegaISA::Inst_VOP2__V_MUL_I32_I24::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:354

gem5::VegaISA::Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24
Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *)
Definition vop2.cc:341

gem5::VegaISA::Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24
~Inst_VOP2__V_MUL_I32_I24()
Definition vop2.cc:347

gem5::VegaISA::Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32
~Inst_VOP2__V_MUL_LEGACY_F32()
Definition vop2.cc:229

gem5::VegaISA::Inst_VOP2__V_MUL_LEGACY_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:236

gem5::VegaISA::Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32
Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *)
Definition vop2.cc:222

gem5::VegaISA::Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16
Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *)
Definition vop2.cc:1773

gem5::VegaISA::Inst_VOP2__V_MUL_LO_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1787

gem5::VegaISA::Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16
~Inst_VOP2__V_MUL_LO_U16()
Definition vop2.cc:1779

gem5::VegaISA::Inst_VOP2__V_MUL_U32_U24::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:432

gem5::VegaISA::Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24
~Inst_VOP2__V_MUL_U32_U24()
Definition vop2.cc:425

gem5::VegaISA::Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24
Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *)
Definition vop2.cc:419

gem5::VegaISA::Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32
~Inst_VOP2__V_OR_B32()
Definition vop2.cc:925

gem5::VegaISA::Inst_VOP2__V_OR_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:933

gem5::VegaISA::Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32
Inst_VOP2__V_OR_B32(InFmt_VOP2 *)
Definition vop2.cc:919

gem5::VegaISA::Inst_VOP2__V_SUBBREV_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1473

gem5::VegaISA::Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32
~Inst_VOP2__V_SUBBREV_CO_U32()
Definition vop2.cc:1461

gem5::VegaISA::Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32
Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1453

gem5::VegaISA::Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32
~Inst_VOP2__V_SUBB_CO_U32()
Definition vop2.cc:1413

gem5::VegaISA::Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32
Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1405

gem5::VegaISA::Inst_VOP2__V_SUBB_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1424

gem5::VegaISA::Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32
Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1312

gem5::VegaISA::Inst_VOP2__V_SUBREV_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1329

gem5::VegaISA::Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32
~Inst_VOP2__V_SUBREV_CO_U32()
Definition vop2.cc:1319

gem5::VegaISA::Inst_VOP2__V_SUBREV_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1561

gem5::VegaISA::Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16
~Inst_VOP2__V_SUBREV_F16()
Definition vop2.cc:1552

gem5::VegaISA::Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16
Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *)
Definition vop2.cc:1545

gem5::VegaISA::Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32
~Inst_VOP2__V_SUBREV_F32()
Definition vop2.cc:191

gem5::VegaISA::Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32
Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *)
Definition vop2.cc:184

gem5::VegaISA::Inst_VOP2__V_SUBREV_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:199

gem5::VegaISA::Inst_VOP2__V_SUBREV_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1750

gem5::VegaISA::Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16
Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *)
Definition vop2.cc:1735

gem5::VegaISA::Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16
~Inst_VOP2__V_SUBREV_U16()
Definition vop2.cc:1741

gem5::VegaISA::Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32
Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *)
Definition vop2.cc:2243

gem5::VegaISA::Inst_VOP2__V_SUBREV_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2256

gem5::VegaISA::Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32
~Inst_VOP2__V_SUBREV_U32()
Definition vop2.cc:2249

gem5::VegaISA::Inst_VOP2__V_SUB_CO_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1286

gem5::VegaISA::Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32
Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *)
Definition vop2.cc:1269

gem5::VegaISA::Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32
~Inst_VOP2__V_SUB_CO_U32()
Definition vop2.cc:1276

gem5::VegaISA::Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16
Inst_VOP2__V_SUB_F16(InFmt_VOP2 *)
Definition vop2.cc:1523

gem5::VegaISA::Inst_VOP2__V_SUB_F16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1539

gem5::VegaISA::Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16
~Inst_VOP2__V_SUB_F16()
Definition vop2.cc:1530

gem5::VegaISA::Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32
Inst_VOP2__V_SUB_F32(InFmt_VOP2 *)
Definition vop2.cc:146

gem5::VegaISA::Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32
~Inst_VOP2__V_SUB_F32()
Definition vop2.cc:153

gem5::VegaISA::Inst_VOP2__V_SUB_F32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:161

gem5::VegaISA::Inst_VOP2__V_SUB_U16::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1712

gem5::VegaISA::Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16
~Inst_VOP2__V_SUB_U16()
Definition vop2.cc:1704

gem5::VegaISA::Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16
Inst_VOP2__V_SUB_U16(InFmt_VOP2 *)
Definition vop2.cc:1698

gem5::VegaISA::Inst_VOP2__V_SUB_U32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2220

gem5::VegaISA::Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32
~Inst_VOP2__V_SUB_U32()
Definition vop2.cc:2213

gem5::VegaISA::Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32
Inst_VOP2__V_SUB_U32(InFmt_VOP2 *)
Definition vop2.cc:2207

gem5::VegaISA::Inst_VOP2__V_XNOR_B32::Inst_VOP2__V_XNOR_B32
Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *)
Definition vop2.cc:2316

gem5::VegaISA::Inst_VOP2__V_XNOR_B32::~Inst_VOP2__V_XNOR_B32
~Inst_VOP2__V_XNOR_B32()
Definition vop2.cc:2322

gem5::VegaISA::Inst_VOP2__V_XNOR_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:2329

gem5::VegaISA::Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32
Inst_VOP2__V_XOR_B32(InFmt_VOP2 *)
Definition vop2.cc:996

gem5::VegaISA::Inst_VOP2__V_XOR_B32::execute
void execute(GPUDynInstPtr) override
Definition vop2.cc:1010

gem5::VegaISA::Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32
~Inst_VOP2__V_XOR_B32()
Definition vop2.cc:1002

gem5::VegaISA::Inst_VOP2
Definition op_encodings.hh:259

gem5::VegaISA::Inst_VOP2::extData
InstFormat extData
Definition op_encodings.hh:273

gem5::VegaISA::Inst_VOP2::instData
InFmt_VOP2 instData
Definition op_encodings.hh:271

gem5::VegaISA::ScalarOperand
Definition operand.hh:367

gem5::VegaISA::ScalarOperand::write
void write() override
Definition operand.hh:427

gem5::VegaISA::ScalarOperand::read
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409

gem5::VegaISA::ScalarOperand::rawData
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392

gem5::VegaISA::ScalarOperand::setBit
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:491

gem5::VegaISA::VEGAGPUStaticInst::panicUnimplemented
void panicUnimplemented() const
Definition gpu_static_inst.cc:54

gem5::VegaISA::VecOperand
Definition operand.hh:103

gem5::VegaISA::VecOperand::read
void read() override
read from the vrf.
Definition operand.hh:147

gem5::VegaISA::VecOperand::readSrc
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:131

gem5::VegaISA::VecOperand::write
void write() override
write to the vrf.
Definition operand.hh:199

gem5::Wavefront
Definition wavefront.hh:61

gem5::Wavefront::execMask
VectorMask & execMask()
Definition wavefront.cc:1451

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214

inst_util.hh

instructions.hh

gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61

gem5::MipsISA::k
Bitfield< 23 > k
Definition dt_constants.hh:81

gem5::VegaISA::VecElemI32
int32_t VecElemI32
Definition gpu_registers.hh:166

gem5::VegaISA::VecElemI64
int64_t VecElemI64
Definition gpu_registers.hh:169

gem5::VegaISA::processSDWA_src
void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T &src0, T &origSrc0)
processSDWA_src is a helper function for implementing sub d-word addressing instructions for the src ...
Definition inst_util.hh:836

gem5::VegaISA::VecElemU32
uint32_t VecElemU32
Definition gpu_registers.hh:165

gem5::VegaISA::VecElemF32
float VecElemF32
Definition gpu_registers.hh:167

gem5::VegaISA::processSDWA_dst
void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T &dst, T &origDst)
processSDWA_dst is a helper function for implementing sub d-word addressing instructions for the dst ...
Definition inst_util.hh:892

gem5::VegaISA::VecElemU64
uint64_t VecElemU64
Definition gpu_registers.hh:168

gem5::VegaISA::REG_VCC_LO
@ REG_VCC_LO
Definition gpu_registers.hh:56

gem5::VegaISA::processDPP
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Definition inst_util.hh:424

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

std::isinf
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:78

std::isnan
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

gem5::VegaISA::InFmt_VOP2
Definition gpu_decoder.hh:1874

gem5::VegaISA::InFmt_VOP2::VDST
unsigned int VDST
Definition gpu_decoder.hh:1877

gem5::VegaISA::InFmt_VOP2::SRC0
unsigned int SRC0
Definition gpu_decoder.hh:1875

gem5::VegaISA::InFmt_VOP2::VSRC1
unsigned int VSRC1
Definition gpu_decoder.hh:1876

gem5::VegaISA::InFmt_VOP_DPP::DPP_CTRL
unsigned int DPP_CTRL
Definition gpu_decoder.hh:1916

gem5::VegaISA::InFmt_VOP_DPP::BC
unsigned int BC
Definition gpu_decoder.hh:1918

gem5::VegaISA::InFmt_VOP_DPP::SRC0
unsigned int SRC0
Definition gpu_decoder.hh:1915

gem5::VegaISA::InFmt_VOP_DPP::SRC1_NEG
unsigned int SRC1_NEG
Definition gpu_decoder.hh:1921

gem5::VegaISA::InFmt_VOP_DPP::SRC0_NEG
unsigned int SRC0_NEG
Definition gpu_decoder.hh:1919

gem5::VegaISA::InFmt_VOP_DPP::BANK_MASK
unsigned int BANK_MASK
Definition gpu_decoder.hh:1923

gem5::VegaISA::InFmt_VOP_DPP::SRC1_ABS
unsigned int SRC1_ABS
Definition gpu_decoder.hh:1922

gem5::VegaISA::InFmt_VOP_DPP::SRC0_ABS
unsigned int SRC0_ABS
Definition gpu_decoder.hh:1920

gem5::VegaISA::InFmt_VOP_DPP::ROW_MASK
unsigned int ROW_MASK
Definition gpu_decoder.hh:1924

gem5::VegaISA::InFmt_VOP_SDWA::DST_U
unsigned int DST_U
Definition gpu_decoder.hh:1930

gem5::VegaISA::InFmt_VOP_SDWA::SRC0_SEXT
unsigned int SRC0_SEXT
Definition gpu_decoder.hh:1934

gem5::VegaISA::InFmt_VOP_SDWA::SRC0_ABS
unsigned int SRC0_ABS
Definition gpu_decoder.hh:1936

gem5::VegaISA::InFmt_VOP_SDWA::SRC0_NEG
unsigned int SRC0_NEG
Definition gpu_decoder.hh:1935

gem5::VegaISA::InFmt_VOP_SDWA::CLMP
unsigned int CLMP
Definition gpu_decoder.hh:1931

gem5::VegaISA::InFmt_VOP_SDWA::SRC1_SEL
unsigned int SRC1_SEL
Definition gpu_decoder.hh:1939

gem5::VegaISA::InFmt_VOP_SDWA::SRC0
unsigned int SRC0
Definition gpu_decoder.hh:1928

gem5::VegaISA::InFmt_VOP_SDWA::SRC1_SEXT
unsigned int SRC1_SEXT
Definition gpu_decoder.hh:1940

gem5::VegaISA::InFmt_VOP_SDWA::SRC1_ABS
unsigned int SRC1_ABS
Definition gpu_decoder.hh:1942

gem5::VegaISA::InFmt_VOP_SDWA::SRC0_SEL
unsigned int SRC0_SEL
Definition gpu_decoder.hh:1933

gem5::VegaISA::InFmt_VOP_SDWA::DST_SEL
unsigned int DST_SEL
Definition gpu_decoder.hh:1929

gem5::VegaISA::InFmt_VOP_SDWA::SRC1_NEG
unsigned int SRC1_NEG
Definition gpu_decoder.hh:1941

gem5::VegaISA::InstFormat::imm_f32
float imm_f32
Definition gpu_decoder.hh:2039

gem5::VegaISA::InstFormat::iFmt_VOP_SDWA
InFmt_VOP_SDWA iFmt_VOP_SDWA
Definition gpu_decoder.hh:2032

gem5::VegaISA::InstFormat::iFmt_VOP_DPP
InFmt_VOP_DPP iFmt_VOP_DPP
Definition gpu_decoder.hh:2031