release/current/vop1_8cc_source.html

/*

 * Copyright (c) 2024 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "arch/amdgpu/vega/insts/inst_util.hh"

#include "arch/amdgpu/vega/insts/instructions.hh"


namespace gem5

{


namespace VegaISA

{

    // --- Inst_VOP1__V_NOP class methods ---


    Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_nop")

    {

        setFlag(Nop);

        setFlag(ALU);

    } // Inst_VOP1__V_NOP

    Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()

    {

    } // ~Inst_VOP1__V_NOP

    Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() {…}


    // --- description from .arch file ---

    // Do nothing.

    void


    Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst)

    {

    } // execute

    Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_MOV_B32 class methods ---


    Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_mov_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_MOV_B32

    Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()

    {

    } // ~Inst_VOP1__V_MOV_B32

    Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() {…}


    // --- description from .arch file ---

    // D.u = S0.u.

    // Input and output modifiers not supported; this is an untyped operation.

    void


    Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);


        if (isDPPInst()) {

            VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);

            src_dpp.read();


            DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "

                    "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "

                    "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "

                    "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,

                    extData.iFmt_VOP_DPP.DPP_CTRL,

                    extData.iFmt_VOP_DPP.SRC0_ABS,

                    extData.iFmt_VOP_DPP.SRC0_NEG,

                    extData.iFmt_VOP_DPP.SRC1_ABS,

                    extData.iFmt_VOP_DPP.SRC1_NEG,

                    extData.iFmt_VOP_DPP.BC,

                    extData.iFmt_VOP_DPP.BANK_MASK,

                    extData.iFmt_VOP_DPP.ROW_MASK);


            // NOTE: For VOP1, there is no SRC1, so make sure we're not trying

            // to negate it or take the absolute value of it

            assert(!extData.iFmt_VOP_DPP.SRC1_ABS);

            assert(!extData.iFmt_VOP_DPP.SRC1_NEG);

            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);


            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src_dpp[lane];

                }

            }

        } else {

            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                if (wf->execMask(lane)) {

                    vdst[lane] = src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods ---


    Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_readfirstlane_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_READFIRSTLANE_B32

    Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( {…}


    Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()

    {

    } // ~Inst_VOP1__V_READFIRSTLANE_B32

    Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() {…}


    // --- description from .arch file ---

    // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data

    // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)

    // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ

    // translates to V_READLANE_B32.

    // Input and output modifiers not supported; this is an untyped operation.

    void


    Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ScalarRegI32 src_lane(0);

        ScalarRegU64 exec_mask = wf->execMask().to_ullong();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        ScalarOperandU32 sdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        if (exec_mask) {

            src_lane = findLsbSet(exec_mask);

        }


        sdst = src[src_lane];


        sdst.write();

    } // execute

    Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_I32_F64 class methods ---


    Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_i32_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_I32_F64

    Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()

    {

    } // ~Inst_VOP1__V_CVT_I32_F64

    Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() {…}


    // --- description from .arch file ---

    // D.i = (int)S0.d.

    // Out-of-range floating point values (including infinity) saturate. NaN is

    // ---  converted to 0.

    void


    Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                int exp;

                std::frexp(src[lane],&exp);

                if (std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else if (std::isinf(src[lane]) || exp > 30) {

                    if (std::signbit(src[lane])) {

                        vdst[lane] = INT_MIN;

                    } else {

                        vdst[lane] = INT_MAX;

                    }

                } else {

                    vdst[lane] = (VecElemI32)src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F64_I32 class methods ---


    Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f64_i32")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_F64_I32

    Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()

    {

    } // ~Inst_VOP1__V_CVT_F64_I32

    Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() {…}


    // --- description from .arch file ---

    // D.d = (double)S0.i.

    void


    Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF64)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_I32 class methods ---


    Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_i32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_I32

    Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()

    {

    } // ~Inst_VOP1__V_CVT_F32_I32

    Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() {…}


    // --- description from .arch file ---

    // D.f = (float)S0.i.

    void


    Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_U32 class methods ---


    Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_u32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_U32

    Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()

    {

    } // ~Inst_VOP1__V_CVT_F32_U32

    Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() {…}


    // --- description from .arch file ---

    // D.f = (float)S0.u.

    void


    Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_U32_F32 class methods ---


    Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_u32_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_U32_F32

    Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()

    {

    } // ~Inst_VOP1__V_CVT_U32_F32

    Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() {…}


    // --- description from .arch file ---

    // D.u = (unsigned)S0.f.

    // Out-of-range floating point values (including infinity) saturate. NaN is

    // ---  converted to 0.

    void


    Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                int exp;

                std::frexp(src[lane],&exp);

                if (std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else if (std::isinf(src[lane])) {

                    if (std::signbit(src[lane])) {

                        vdst[lane] = 0;

                    } else {

                        vdst[lane] = UINT_MAX;

                    }

                } else if (exp > 31) {

                    vdst[lane] = UINT_MAX;

                } else {

                    vdst[lane] = (VecElemU32)src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_I32_F32 class methods ---


    Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_i32_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_I32_F32

    Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()

    {

    } // ~Inst_VOP1__V_CVT_I32_F32

    Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() {…}


    // --- description from .arch file ---

    // D.i = (int)S0.f.

    // Out-of-range floating point values (including infinity) saturate. NaN is

    // ---  converted to 0.

    void


    Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                int exp;

                std::frexp(src[lane],&exp);

                if (std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else if (std::isinf(src[lane]) || exp > 30) {

                    if (std::signbit(src[lane])) {

                        vdst[lane] = INT_MIN;

                    } else {

                        vdst[lane] = INT_MAX;

                    }

                } else {

                    vdst[lane] = (VecElemI32)src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_MOV_FED_B32 class methods ---


    Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_mov_fed_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_MOV_FED_B32

    Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()

    {

    } // ~Inst_VOP1__V_MOV_FED_B32

    Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() {…}


    // --- description from .arch file ---

    // D.u = S0.u;

    // Introduce EDC double error upon write to dest vgpr without causing an

    // ---  exception.

    // Input and output modifiers not supported; this is an untyped operation.

    void


    Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F16_F32 class methods ---


    Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f16_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F16_F32

    Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()

    {

    } // ~Inst_VOP1__V_CVT_F16_F32

    Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() {…}


    // --- description from .arch file ---

    // D.f16 = flt32_to_flt16(S0.f).

    // Supports input modifiers and creates FP16 denormals when appropriate.

    void


    Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                float tmp = src[lane];

                AMDGPU::mxfloat16 out(tmp);


                vdst[lane] = (out.data >> 16);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_F16 class methods ---


    Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_f16")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_F16

    Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()

    {

    } // ~Inst_VOP1__V_CVT_F32_F16

    Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() {…}


    // --- description from .arch file ---

    // D.f = flt16_to_flt32(S0.f16).

    // FP16 denormal inputs are always accepted.

    void


    Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                AMDGPU::mxfloat16 tmp(src[lane]);

                vdst[lane] = float(tmp);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods ---


    Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_RPI_I32_F32

    Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( {…}


    Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()

    {

    } // ~Inst_VOP1__V_CVT_RPI_I32_F32

    Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() {…}


    // --- description from .arch file ---

    // D.i = (int)floor(S0.f + 0.5).

    void


    Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods ---


    Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_FLR_I32_F32

    Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( {…}


    Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()

    {

    } // ~Inst_VOP1__V_CVT_FLR_I32_F32

    Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() {…}


    // --- description from .arch file ---

    // D.i = (int)floor(S0.f).

    void


    Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemI32)std::floor(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods ---


    Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_off_f32_i4")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_OFF_F32_I4

    Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()

    {

    } // ~Inst_VOP1__V_CVT_OFF_F32_I4

    Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() {…}


    // --- description from .arch file ---

    // 4-bit signed int to 32-bit float. Used for interpolation in shader.

    void


    Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)

    {

        // Could not parse sq_uc.arch desc field

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_F64 class methods ---


    Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_F32_F64

    Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()

    {

    } // ~Inst_VOP1__V_CVT_F32_F64

    Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() {…}


    // --- description from .arch file ---

    // D.f = (float)S0.d.

    void


    Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F64_F32 class methods ---


    Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f64_f32")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_F64_F32

    Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()

    {

    } // ~Inst_VOP1__V_CVT_F64_F32

    Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() {…}


    // --- description from .arch file ---

    // D.d = (double)S0.f.

    void


    Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF64)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods ---


    Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_UBYTE0

    Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()

    {

    } // ~Inst_VOP1__V_CVT_F32_UBYTE0

    Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() {…}


    // --- description from .arch file ---

    // D.f = (float)(S0.u[7:0]).

    void


    Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods ---


    Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_UBYTE1

    Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()

    {

    } // ~Inst_VOP1__V_CVT_F32_UBYTE1

    Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() {…}


    // --- description from .arch file ---

    // D.f = (float)(S0.u[15:8]).

    void


    Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods ---


    Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_UBYTE2

    Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()

    {

    } // ~Inst_VOP1__V_CVT_F32_UBYTE2

    Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() {…}


    // --- description from .arch file ---

    // D.f = (float)(S0.u[23:16]).

    void


    Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods ---


    Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CVT_F32_UBYTE3

    Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()

    {

    } // ~Inst_VOP1__V_CVT_F32_UBYTE3

    Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() {…}


    // --- description from .arch file ---

    // D.f = (float)(S0.u[31:24]).

    void


    Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_U32_F64 class methods ---


    Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_u32_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_U32_F64

    Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()

    {

    } // ~Inst_VOP1__V_CVT_U32_F64

    Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() {…}


    // --- description from .arch file ---

    // D.u = (unsigned)S0.d.

    // Out-of-range floating point values (including infinity) saturate. NaN is

    // ---  converted to 0.

    void


    Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                int exp;

                std::frexp(src[lane],&exp);

                if (std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else if (std::isinf(src[lane])) {

                    if (std::signbit(src[lane])) {

                        vdst[lane] = 0;

                    } else {

                        vdst[lane] = UINT_MAX;

                    }

                } else if (exp > 31) {

                    vdst[lane] = UINT_MAX;

                } else {

                    vdst[lane] = (VecElemU32)src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F64_U32 class methods ---


    Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f64_u32")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CVT_F64_U32

    Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()

    {

    } // ~Inst_VOP1__V_CVT_F64_U32

    Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() {…}


    // --- description from .arch file ---

    // D.d = (double)S0.u.

    void


    Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = (VecElemF64)src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_TRUNC_F64 class methods ---


    Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_trunc_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_TRUNC_F64

    Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()

    {

    } // ~Inst_VOP1__V_TRUNC_F64

    Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() {…}


    // --- description from .arch file ---

    // D.d = trunc(S0.d), return integer part of S0.d.

    void


    Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::trunc(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CEIL_F64 class methods ---


    Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ceil_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_CEIL_F64

    Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()

    {

    } // ~Inst_VOP1__V_CEIL_F64

    Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() {…}


    // --- description from .arch file ---

    // D.d = trunc(S0.d);

    // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0.

    void


    Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::ceil(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RNDNE_F64 class methods ---


    Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rndne_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_RNDNE_F64

    Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()

    {

    } // ~Inst_VOP1__V_RNDNE_F64

    Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() {…}


    // --- description from .arch file ---

    // D.d = round_nearest_even(S0.d).

    void


    Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = roundNearestEven(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FLOOR_F64 class methods ---


    Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_floor_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_FLOOR_F64

    Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()

    {

    } // ~Inst_VOP1__V_FLOOR_F64

    Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() {…}


    // --- description from .arch file ---

    // D.d = trunc(S0.d);

    // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0.

    void


    Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::floor(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FRACT_F32 class methods ---


    Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_fract_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_FRACT_F32

    Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()

    {

    } // ~Inst_VOP1__V_FRACT_F32

    Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() {…}


    // --- description from .arch file ---

    // D.f = S0.f - floor(S0.f).

    void


    Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                VecElemF32 int_part(0.0);

                vdst[lane] = std::modf(src[lane], &int_part);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_TRUNC_F32 class methods ---


    Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_trunc_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_TRUNC_F32

    Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()

    {

    } // ~Inst_VOP1__V_TRUNC_F32

    Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() {…}


    // --- description from .arch file ---

    // D.f = trunc(S0.f), return integer part of S0.f.

    void


    Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst (gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::trunc(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CEIL_F32 class methods ---


    Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ceil_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_CEIL_F32

    Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()

    {

    } // ~Inst_VOP1__V_CEIL_F32

    Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() {…}


    // --- description from .arch file ---

    // D.f = trunc(S0.f);

    // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0.

    void


    Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::ceil(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RNDNE_F32 class methods ---


    Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rndne_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_RNDNE_F32

    Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()

    {

    } // ~Inst_VOP1__V_RNDNE_F32

    Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() {…}


    // --- description from .arch file ---

    // D.f = round_nearest_even(S0.f).

    void


    Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = roundNearestEven(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FLOOR_F32 class methods ---


    Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_floor_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_FLOOR_F32

    Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()

    {

    } // ~Inst_VOP1__V_FLOOR_F32

    Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() {…}


    // --- description from .arch file ---

    // D.f = trunc(S0.f);

    // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0.

    void


    Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::floor(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_EXP_F32 class methods ---


    Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_exp_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_EXP_F32

    Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()

    {

    } // ~Inst_VOP1__V_EXP_F32

    Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() {…}


    // --- description from .arch file ---

    // D.f = pow(2.0, S0.f).

    void


    Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::pow(2.0, src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_LOG_F32 class methods ---


    Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_log_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_LOG_F32

    Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()

    {

    } // ~Inst_VOP1__V_LOG_F32

    Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() {…}


    // --- description from .arch file ---

    // D.f = log2(S0.f). Base 2 logarithm.

    void


    Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::log2(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RCP_F32 class methods ---


    Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rcp_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_RCP_F32

    Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()

    {

    } // ~Inst_VOP1__V_RCP_F32

    Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() {…}


    // --- description from .arch file ---

    // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error.

    void


    Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = 1.0 / src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods ---


    Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rcp_iflag_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_RCP_IFLAG_F32

    Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()

    {

    } // ~Inst_VOP1__V_RCP_IFLAG_F32

    Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() {…}


    // --- description from .arch file ---

    // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise

    // ---  integer DIV_BY_ZERO exception but cannot raise floating-point

    // ---  exceptions.

    void


    Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = 1.0 / src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RSQ_F32 class methods ---


    Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rsq_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_RSQ_F32

    Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()

    {

    } // ~Inst_VOP1__V_RSQ_F32

    Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() {…}


    // --- description from .arch file ---

    // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules.

    void


    Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = 1.0 / std::sqrt(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RCP_F64 class methods ---


    Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rcp_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_RCP_F64

    Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()

    {

    } // ~Inst_VOP1__V_RCP_F64

    Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() {…}


    // --- description from .arch file ---

    // D.d = 1.0 / S0.d.

    void


    Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::fpclassify(src[lane]) == FP_ZERO) {

                    vdst[lane] = +INFINITY;

                } else if (std::isnan(src[lane])) {

                    vdst[lane] = NAN;

                } else if (std::isinf(src[lane])) {

                    if (std::signbit(src[lane])) {

                        vdst[lane] = -0.0;

                    } else {

                        vdst[lane] = 0.0;

                    }

                } else {

                    vdst[lane] = 1.0 / src[lane];

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RSQ_F64 class methods ---


    Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rsq_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_RSQ_F64

    Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()

    {

    } // ~Inst_VOP1__V_RSQ_F64

    Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() {…}


    // --- description from .arch file ---

    // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32.

    void


    Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::fpclassify(src[lane]) == FP_ZERO) {

                    vdst[lane] = +INFINITY;

                } else if (std::isnan(src[lane])) {

                    vdst[lane] = NAN;

                } else if (std::isinf(src[lane])

                           && !std::signbit(src[lane])) {

                    vdst[lane] = 0.0;

                } else if (std::signbit(src[lane])) {

                    vdst[lane] = NAN;

                } else {

                    vdst[lane] = 1.0 / std::sqrt(src[lane]);

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_SQRT_F32 class methods ---


    Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_sqrt_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_SQRT_F32

    Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()

    {

    } // ~Inst_VOP1__V_SQRT_F32

    Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() {…}


    // --- description from .arch file ---

    // D.f = sqrt(S0.f).

    void


    Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::sqrt(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_SQRT_F64 class methods ---


    Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_sqrt_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_SQRT_F64

    Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()

    {

    } // ~Inst_VOP1__V_SQRT_F64

    Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() {…}


    // --- description from .arch file ---

    // D.d = sqrt(S0.d).

    void


    Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::sqrt(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_SIN_F32 class methods ---


    Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_sin_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_SIN_F32

    Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()

    {

    } // ~Inst_VOP1__V_SIN_F32

    Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() {…}


    // --- description from .arch file ---

    // D.f = sin(S0.f * 2 * PI).

    // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in

    // float 0.0.

    void


    Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        ConstScalarOperandF32 pi(gpuDynInst, REG_PI);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();

        pi.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (src[lane] < -256.0 || src[lane] > 256.0) {

                    vdst[lane] = 0.0;

                } else {

                    vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_COS_F32 class methods ---


    Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cos_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_COS_F32

    Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()

    {

    } // ~Inst_VOP1__V_COS_F32

    Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() {…}


    // --- description from .arch file ---

    // D.f = cos(S0.f * 2 * PI).

    // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in

    // float 1.0.

    void


    Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        ConstScalarOperandF32 pi(gpuDynInst, REG_PI);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();

        pi.read();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (src[lane] < -256.0 || src[lane] > 256.0) {

                    vdst[lane] = 0.0;

                } else {

                    vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_NOT_B32 class methods ---


    Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_not_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_NOT_B32

    Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()

    {

    } // ~Inst_VOP1__V_NOT_B32

    Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() {…}


    // --- description from .arch file ---

    // D.u = ~S0.u.

    // Input and output modifiers not supported.

    void


    Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = ~src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_BFREV_B32 class methods ---


    Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_bfrev_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_BFREV_B32

    Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()

    {

    } // ~Inst_VOP1__V_BFREV_B32

    Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() {…}


    // --- description from .arch file ---

    // D.u[31:0] = S0.u[0:31], bitfield reverse.

    // Input and output modifiers not supported.

    void


    Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = reverseBits(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FFBH_U32 class methods ---


    Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ffbh_u32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_FFBH_U32

    Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()

    {

    } // ~Inst_VOP1__V_FFBH_U32

    Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() {…}


    // --- description from .arch file ---

    // D.u = position of first 1 in S0.u from MSB;

    // D.u = 0xffffffff if S0.u == 0.

    void


    Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = findFirstOneMsb(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FFBL_B32 class methods ---


    Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ffbl_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_FFBL_B32

    Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()

    {

    } // ~Inst_VOP1__V_FFBL_B32

    Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() {…}


    // --- description from .arch file ---

    // D.u = position of first 1 in S0.u from LSB;

    // D.u = 0xffffffff if S0.u == 0.

    void


    Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = findFirstOne(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FFBH_I32 class methods ---


    Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ffbh_i32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_FFBH_I32

    Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()

    {

    } // ~Inst_VOP1__V_FFBH_I32

    Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() {…}


    // --- description from .arch file ---

    // D.u = position of first bit different from sign bit in S0.i from MSB;

    // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.

    void


    Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandI32 src(gpuDynInst, instData.SRC0);

        VecOperandU32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = firstOppositeSignBit(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods ---


    Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_FREXP_EXP_I32_F64

    Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( {…}


    Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()

    {

    } // ~Inst_VOP1__V_FREXP_EXP_I32_F64

    Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() {…}


    // --- description from .arch file ---

    // See V_FREXP_EXP_I32_F32.

    void


    Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::isinf(src[lane]) || std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else {

                    VecElemI32 exp = 0;

                    std::frexp(src[lane], &exp);

                    vdst[lane] = exp;

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_MANT_F64 class methods ---


    Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_mant_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_FREXP_MANT_F64

    Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()

    {

    } // ~Inst_VOP1__V_FREXP_MANT_F64

    Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() {…}


    // --- description from .arch file ---

    // See V_FREXP_MANT_F32.

    void


    Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::isinf(src[lane]) || std::isnan(src[lane])) {

                    vdst[lane] = src[lane];

                } else {

                    VecElemI32 exp(0);

                    vdst[lane] = std::frexp(src[lane], &exp);

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FRACT_F64 class methods ---


    Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_fract_f64")

    {

        setFlag(ALU);

        setFlag(F64);

    } // Inst_VOP1__V_FRACT_F64

    Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()

    {

    } // ~Inst_VOP1__V_FRACT_F64

    Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() {…}


    // --- description from .arch file ---

    // See V_FRACT_F32.

    void


    Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF64 src(gpuDynInst, instData.SRC0);

        VecOperandF64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                VecElemF64 int_part(0.0);

                vdst[lane] = std::modf(src[lane], &int_part);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods ---


    Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_FREXP_EXP_I32_F32

    Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( {…}


    Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()

    {

    } // ~Inst_VOP1__V_FREXP_EXP_I32_F32

    Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() {…}


    // --- description from .arch file ---

    // if (S0.f == INF || S0.f == NAN) then D.i = 0;

    // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1).

    // Returns exponent of single precision float input, such that S0.f =

    // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns

    // the significand.

    void


    Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandI32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::isinf(src[lane]) || std::isnan(src[lane])) {

                    vdst[lane] = 0;

                } else {

                    VecElemI32 exp(0);

                    std::frexp(src[lane], &exp);

                    vdst[lane] = exp;

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_MANT_F32 class methods ---


    Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_mant_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_FREXP_MANT_F32

    Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()

    {

    } // ~Inst_VOP1__V_FREXP_MANT_F32

    Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() {…}


    // --- description from .arch file ---

    // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;

    // else D.f = Mantissa(S0.f).

    // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary

    // ---  significand of single precision float input, such that S0.f =

    // ---  significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which

    // ---  returns integer exponent.

    void


    Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                if (std::isinf(src[lane]) || std::isnan(src[lane])) {

                    vdst[lane] = src[lane];

                } else {

                    VecElemI32 exp(0);

                    vdst[lane] = std::frexp(src[lane], &exp);

                }

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CLREXCP class methods ---


    Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_clrexcp")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_CLREXCP

    Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()

    {

    } // ~Inst_VOP1__V_CLREXCP

    Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() {…}


    // --- description from .arch file ---

    // Clear wave's exception state in SIMD (SP).

    void


    Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_MOV_B64 class methods ---


    Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_mov_b64")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_MOV_B64

    Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64()

    {

    } // ~Inst_VOP1__V_MOV_B64

    Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64() {…}


    // --- description from .arch file ---

    // D.u = S0.u.

    // Input and output modifiers not supported; this is an untyped operation.

    void


    Inst_VOP1__V_MOV_B64::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandU64 src(gpuDynInst, instData.SRC0);

        VecOperandU64 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_MOV_B64::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F16_U16 class methods ---


    Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f16_u16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_CVT_F16_U16

    Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()

    {

    } // ~Inst_VOP1__V_CVT_F16_U16

    Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() {…}


    // --- description from .arch file ---

    // D.f16 = uint16_to_flt16(S.u16).

    // Supports denormals, rounding, exception flags and saturation.

    void


    Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_F16_I16 class methods ---


    Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_f16_i16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_CVT_F16_I16

    Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()

    {

    } // ~Inst_VOP1__V_CVT_F16_I16

    Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() {…}


    // --- description from .arch file ---

    // D.f16 = int16_to_flt16(S.i16).

    // Supports denormals, rounding, exception flags and saturation.

    void


    Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_U16_F16 class methods ---


    Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_u16_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_CVT_U16_F16

    Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()

    {

    } // ~Inst_VOP1__V_CVT_U16_F16

    Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() {…}


    // --- description from .arch file ---

    // D.u16 = flt16_to_uint16(S.f16).

    // Supports rounding, exception flags and saturation.

    void


    Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CVT_I16_F16 class methods ---


    Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cvt_i16_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_CVT_I16_F16

    Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()

    {

    } // ~Inst_VOP1__V_CVT_I16_F16

    Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() {…}


    // --- description from .arch file ---

    // D.i16 = flt16_to_int16(S.f16).

    // Supports rounding, exception flags and saturation.

    void


    Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RCP_F16 class methods ---


    Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rcp_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_RCP_F16

    Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()

    {

    } // ~Inst_VOP1__V_RCP_F16

    Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == 1.0f)

    //     D.f16 = 1.0f;

    // else

    //     D.f16 = ApproximateRecip(S0.f16).

    void


    Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_SQRT_F16 class methods ---


    Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_sqrt_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_SQRT_F16

    Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()

    {

    } // ~Inst_VOP1__V_SQRT_F16

    Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == 1.0f)

    //     D.f16 = 1.0f;

    // else

    //     D.f16 = ApproximateSqrt(S0.f16).

    void


    Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RSQ_F16 class methods ---


    Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rsq_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_RSQ_F16

    Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()

    {

    } // ~Inst_VOP1__V_RSQ_F16

    Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == 1.0f)

    //     D.f16 = 1.0f;

    // else

    //     D.f16 = ApproximateRecipSqrt(S0.f16).

    void


    Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_LOG_F16 class methods ---


    Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_log_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_LOG_F16

    Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()

    {

    } // ~Inst_VOP1__V_LOG_F16

    Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == 1.0f)

    //     D.f16 = 0.0f;

    // else

    //     D.f16 = ApproximateLog2(S0.f16).

    void


    Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_EXP_F16 class methods ---


    Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_exp_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_EXP_F16

    Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()

    {

    } // ~Inst_VOP1__V_EXP_F16

    Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == 0.0f)

    //     D.f16 = 1.0f;

    // else

    //     D.f16 = Approximate2ToX(S0.f16).

    void


    Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_MANT_F16 class methods ---


    Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_mant_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_FREXP_MANT_F16

    Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()

    {

    } // ~Inst_VOP1__V_FREXP_MANT_F16

    Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == +-INF || S0.f16 == NAN)

    //     D.f16 = S0.f16;

    // else

    //     D.f16 = mantissa(S0.f16).

    // Result range is (-1.0,-0.5][0.5,1.0).

    // C math library frexp function.

    // Returns binary significand of half precision float input, such that the

    // original single float = significand * (2 ** exponent).

    void


    Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods ---


    Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(

          InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_FREXP_EXP_I16_F16

    Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( {…}


    Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()

    {

    } // ~Inst_VOP1__V_FREXP_EXP_I16_F16

    Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() {…}


    // --- description from .arch file ---

    // if (S0.f16 == +-INF || S0.f16 == NAN)

    //     D.i16 = 0;

    // else

    //     D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1).

    // C math library frexp function.

    // Returns exponent of half precision float input, such that the

    // original single float = significand * (2 ** exponent).

    void


    Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FLOOR_F16 class methods ---


    Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_floor_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_FLOOR_F16

    Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()

    {

    } // ~Inst_VOP1__V_FLOOR_F16

    Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() {…}


    // --- description from .arch file ---

    // D.f16 = trunc(S0.f16);

    // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f.

    void


    Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_CEIL_F16 class methods ---


    Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_ceil_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_CEIL_F16

    Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()

    {

    } // ~Inst_VOP1__V_CEIL_F16

    Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() {…}


    // --- description from .arch file ---

    // D.f16 = trunc(S0.f16);

    // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f.

    void


    Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_TRUNC_F16 class methods ---


    Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_trunc_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_TRUNC_F16

    Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()

    {

    } // ~Inst_VOP1__V_TRUNC_F16

    Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() {…}


    // --- description from .arch file ---

    // D.f16 = trunc(S0.f16).

    // Round-to-zero semantics.

    void


    Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_RNDNE_F16 class methods ---


    Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_rndne_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_RNDNE_F16

    Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()

    {

    } // ~Inst_VOP1__V_RNDNE_F16

    Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() {…}


    // --- description from .arch file ---

    // D.f16 = FLOOR(S0.f16 + 0.5f);

    // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f.

    // Round-to-nearest-even semantics.

    void


    Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_FRACT_F16 class methods ---


    Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_fract_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_FRACT_F16

    Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()

    {

    } // ~Inst_VOP1__V_FRACT_F16

    Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() {…}


    // --- description from .arch file ---

    // D.f16 = S0.f16 + -floor(S0.f16).

    void


    Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_SIN_F16 class methods ---


    Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_sin_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_SIN_F16

    Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()

    {

    } // ~Inst_VOP1__V_SIN_F16

    Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() {…}


    // --- description from .arch file ---

    // D.f16 = sin(S0.f16 * 2 * PI).

    void


    Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_COS_F16 class methods ---


    Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_cos_f16")

    {

        setFlag(ALU);

        setFlag(F16);

    } // Inst_VOP1__V_COS_F16

    Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()

    {

    } // ~Inst_VOP1__V_COS_F16

    Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() {…}


    // --- description from .arch file ---

    // D.f16 = cos(S0.f16 * 2 * PI).

    void


    Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)

    {

        panicUnimplemented();

    } // execute

    Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods ---


    Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_exp_legacy_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_EXP_LEGACY_F32

    Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()

    {

    } // ~Inst_VOP1__V_EXP_LEGACY_F32

    Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() {…}


    // --- description from .arch file ---

    // D.f = pow(2.0, S0.f) with legacy semantics.

    void


    Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::pow(2.0, src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods ---


    Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_log_legacy_f32")

    {

        setFlag(ALU);

        setFlag(F32);

    } // Inst_VOP1__V_LOG_LEGACY_F32

    Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) {…}


    Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()

    {

    } // ~Inst_VOP1__V_LOG_LEGACY_F32

    Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() {…}


    // --- description from .arch file ---

    // D.f = log2(S0.f). Base 2 logarithm with legacy semantics.

    void


    Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        ConstVecOperandF32 src(gpuDynInst, instData.SRC0);

        VecOperandF32 vdst(gpuDynInst, instData.VDST);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = std::log2(src[lane]);

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) {…}

    // --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods ---


    Inst_VOP1__V_ACCVGPR_MOV_B32::

        Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt)

        : Inst_VOP1(iFmt, "v_accvgpr_mov_b32")

    {

        setFlag(ALU);

    } // Inst_VOP1__V_ACCVGPR_MOV_B32

    Inst_VOP1__V_ACCVGPR_MOV_B32:: {…}


    Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32()

    {

    } // ~Inst_VOP1__V_ACCVGPR_MOV_B32

    Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32() {…}


    void


    Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst)

    {

        Wavefront *wf = gpuDynInst->wavefront();

        unsigned accum_offset = wf->accumOffset;


        ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset);

        VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset);


        src.readSrc();


        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);

        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);


        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

            if (wf->execMask(lane)) {

                vdst[lane] = src[lane];

            }

        }


        vdst.write();

    } // execute

    Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst) {…}

} // namespace VegaISA

} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::AMDGPU::mxfp
Definition mxfp.hh:53

gem5::AMDGPU::mxfp::data
uint32_t data
Definition mxfp.hh:112

gem5::GPUStaticInst::isDPPInst
bool isDPPInst() const
Definition gpu_static_inst.hh:116

gem5::GPUStaticInst::setFlag
void setFlag(Flags flag)
Definition gpu_static_inst.hh:250

gem5::GPUStaticInst::isSDWAInst
bool isSDWAInst() const
Definition gpu_static_inst.hh:115

gem5::GPUStaticInst::_opcode
const std::string _opcode
Definition gpu_static_inst.hh:304

gem5::SparcISA::Nop
Nop class.
Definition nop.hh:49

gem5::VegaISA::Inst_VOP1__V_ACCVGPR_MOV_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2571

gem5::VegaISA::Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32
~Inst_VOP1__V_ACCVGPR_MOV_B32()
Definition vop1.cc:2566

gem5::VegaISA::Inst_VOP1__V_ACCVGPR_MOV_B32::Inst_VOP1__V_ACCVGPR_MOV_B32
Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *)
Definition vop1.cc:2560

gem5::VegaISA::Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32
~Inst_VOP1__V_BFREV_B32()
Definition vop1.cc:1694

gem5::VegaISA::Inst_VOP1__V_BFREV_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1702

gem5::VegaISA::Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32
Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *)
Definition vop1.cc:1688

gem5::VegaISA::Inst_VOP1__V_CEIL_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2380

gem5::VegaISA::Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16
~Inst_VOP1__V_CEIL_F16()
Definition vop1.cc:2372

gem5::VegaISA::Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16
Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *)
Definition vop1.cc:2365

gem5::VegaISA::Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32
~Inst_VOP1__V_CEIL_F32()
Definition vop1.cc:1127

gem5::VegaISA::Inst_VOP1__V_CEIL_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1135

gem5::VegaISA::Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32
Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *)
Definition vop1.cc:1120

gem5::VegaISA::Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64
~Inst_VOP1__V_CEIL_F64()
Definition vop1.cc:949

gem5::VegaISA::Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64
Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *)
Definition vop1.cc:942

gem5::VegaISA::Inst_VOP1__V_CEIL_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:957

gem5::VegaISA::Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP
Inst_VOP1__V_CLREXCP(InFmt_VOP1 *)
Definition vop1.cc:2037

gem5::VegaISA::Inst_VOP1__V_CLREXCP::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2050

gem5::VegaISA::Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP
~Inst_VOP1__V_CLREXCP()
Definition vop1.cc:2043

gem5::VegaISA::Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16
Inst_VOP1__V_COS_F16(InFmt_VOP1 *)
Definition vop1.cc:2469

gem5::VegaISA::Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16
~Inst_VOP1__V_COS_F16()
Definition vop1.cc:2476

gem5::VegaISA::Inst_VOP1__V_COS_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2483

gem5::VegaISA::Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32
~Inst_VOP1__V_COS_F32()
Definition vop1.cc:1617

gem5::VegaISA::Inst_VOP1__V_COS_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1626

gem5::VegaISA::Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32
Inst_VOP1__V_COS_F32(InFmt_VOP1 *)
Definition vop1.cc:1610

gem5::VegaISA::Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32
Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *)
Definition vop1.cc:442

gem5::VegaISA::Inst_VOP1__V_CVT_F16_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:457

gem5::VegaISA::Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32
~Inst_VOP1__V_CVT_F16_F32()
Definition vop1.cc:449

gem5::VegaISA::Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16
Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *)
Definition vop1.cc:2112

gem5::VegaISA::Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16
~Inst_VOP1__V_CVT_F16_I16()
Definition vop1.cc:2119

gem5::VegaISA::Inst_VOP1__V_CVT_F16_I16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2127

gem5::VegaISA::Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16
~Inst_VOP1__V_CVT_F16_U16()
Definition vop1.cc:2098

gem5::VegaISA::Inst_VOP1__V_CVT_F16_U16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2106

gem5::VegaISA::Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16
Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *)
Definition vop1.cc:2091

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:496

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16
~Inst_VOP1__V_CVT_F32_F16()
Definition vop1.cc:488

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16
Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *)
Definition vop1.cc:481

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:625

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64
~Inst_VOP1__V_CVT_F32_F64()
Definition vop1.cc:618

gem5::VegaISA::Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64
Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *)
Definition vop1.cc:611

gem5::VegaISA::Inst_VOP1__V_CVT_F32_I32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:264

gem5::VegaISA::Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32
Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *)
Definition vop1.cc:250

gem5::VegaISA::Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32
~Inst_VOP1__V_CVT_F32_I32()
Definition vop1.cc:257

gem5::VegaISA::Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32
Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *)
Definition vop1.cc:285

gem5::VegaISA::Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32
~Inst_VOP1__V_CVT_F32_U32()
Definition vop1.cc:292

gem5::VegaISA::Inst_VOP1__V_CVT_F32_U32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:299

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0
Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *)
Definition vop1.cc:681

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0
~Inst_VOP1__V_CVT_F32_UBYTE0()
Definition vop1.cc:688

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE0::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:695

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1
~Inst_VOP1__V_CVT_F32_UBYTE1()
Definition vop1.cc:723

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE1::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:730

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1
Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *)
Definition vop1.cc:716

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2
Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *)
Definition vop1.cc:751

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE2::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:765

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2
~Inst_VOP1__V_CVT_F32_UBYTE2()
Definition vop1.cc:758

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3
~Inst_VOP1__V_CVT_F32_UBYTE3()
Definition vop1.cc:793

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3
Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *)
Definition vop1.cc:786

gem5::VegaISA::Inst_VOP1__V_CVT_F32_UBYTE3::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:800

gem5::VegaISA::Inst_VOP1__V_CVT_F64_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:660

gem5::VegaISA::Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32
Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *)
Definition vop1.cc:646

gem5::VegaISA::Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32
~Inst_VOP1__V_CVT_F64_F32()
Definition vop1.cc:653

gem5::VegaISA::Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32
Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *)
Definition vop1.cc:215

gem5::VegaISA::Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32
~Inst_VOP1__V_CVT_F64_I32()
Definition vop1.cc:222

gem5::VegaISA::Inst_VOP1__V_CVT_F64_I32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:229

gem5::VegaISA::Inst_VOP1__V_CVT_F64_U32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:886

gem5::VegaISA::Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32
Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *)
Definition vop1.cc:872

gem5::VegaISA::Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32
~Inst_VOP1__V_CVT_F64_U32()
Definition vop1.cc:879

gem5::VegaISA::Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32
Inst_VOP1__V_CVT_FLR_I32_F32(InFmt_VOP1 *)
Definition vop1.cc:554

gem5::VegaISA::Inst_VOP1__V_CVT_FLR_I32_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:569

gem5::VegaISA::Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32
~Inst_VOP1__V_CVT_FLR_I32_F32()
Definition vop1.cc:562

gem5::VegaISA::Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16
~Inst_VOP1__V_CVT_I16_F16()
Definition vop1.cc:2161

gem5::VegaISA::Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16
Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *)
Definition vop1.cc:2154

gem5::VegaISA::Inst_VOP1__V_CVT_I16_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2169

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32
~Inst_VOP1__V_CVT_I32_F32()
Definition vop1.cc:378

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32
Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *)
Definition vop1.cc:371

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:387

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:182

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64
~Inst_VOP1__V_CVT_I32_F64()
Definition vop1.cc:173

gem5::VegaISA::Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64
Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *)
Definition vop1.cc:166

gem5::VegaISA::Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4
Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *)
Definition vop1.cc:590

gem5::VegaISA::Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4
~Inst_VOP1__V_CVT_OFF_F32_I4()
Definition vop1.cc:597

gem5::VegaISA::Inst_VOP1__V_CVT_OFF_F32_I4::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:604

gem5::VegaISA::Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32
Inst_VOP1__V_CVT_RPI_I32_F32(InFmt_VOP1 *)
Definition vop1.cc:518

gem5::VegaISA::Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32
~Inst_VOP1__V_CVT_RPI_I32_F32()
Definition vop1.cc:526

gem5::VegaISA::Inst_VOP1__V_CVT_RPI_I32_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:533

gem5::VegaISA::Inst_VOP1__V_CVT_U16_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2148

gem5::VegaISA::Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16
~Inst_VOP1__V_CVT_U16_F16()
Definition vop1.cc:2140

gem5::VegaISA::Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16
Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *)
Definition vop1.cc:2133

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32
Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *)
Definition vop1.cc:320

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:336

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32
~Inst_VOP1__V_CVT_U32_F32()
Definition vop1.cc:327

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:837

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64
Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *)
Definition vop1.cc:821

gem5::VegaISA::Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64
~Inst_VOP1__V_CVT_U32_F64()
Definition vop1.cc:828

gem5::VegaISA::Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16
Inst_VOP1__V_EXP_F16(InFmt_VOP1 *)
Definition vop1.cc:2267

gem5::VegaISA::Inst_VOP1__V_EXP_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2284

gem5::VegaISA::Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16
~Inst_VOP1__V_EXP_F16()
Definition vop1.cc:2274

gem5::VegaISA::Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32
Inst_VOP1__V_EXP_F32(InFmt_VOP1 *)
Definition vop1.cc:1227

gem5::VegaISA::Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32
~Inst_VOP1__V_EXP_F32()
Definition vop1.cc:1234

gem5::VegaISA::Inst_VOP1__V_EXP_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1241

gem5::VegaISA::Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32
Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *)
Definition vop1.cc:2489

gem5::VegaISA::Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32
~Inst_VOP1__V_EXP_LEGACY_F32()
Definition vop1.cc:2496

gem5::VegaISA::Inst_VOP1__V_EXP_LEGACY_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2503

gem5::VegaISA::Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32
~Inst_VOP1__V_FFBH_I32()
Definition vop1.cc:1799

gem5::VegaISA::Inst_VOP1__V_FFBH_I32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1807

gem5::VegaISA::Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32
Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *)
Definition vop1.cc:1793

gem5::VegaISA::Inst_VOP1__V_FFBH_U32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1737

gem5::VegaISA::Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32
Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *)
Definition vop1.cc:1723

gem5::VegaISA::Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32
~Inst_VOP1__V_FFBH_U32()
Definition vop1.cc:1729

gem5::VegaISA::Inst_VOP1__V_FFBL_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1772

gem5::VegaISA::Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32
Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *)
Definition vop1.cc:1758

gem5::VegaISA::Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32
~Inst_VOP1__V_FFBL_B32()
Definition vop1.cc:1764

gem5::VegaISA::Inst_VOP1__V_FLOOR_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2359

gem5::VegaISA::Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16
~Inst_VOP1__V_FLOOR_F16()
Definition vop1.cc:2351

gem5::VegaISA::Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16
Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *)
Definition vop1.cc:2344

gem5::VegaISA::Inst_VOP1__V_FLOOR_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1206

gem5::VegaISA::Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32
~Inst_VOP1__V_FLOOR_F32()
Definition vop1.cc:1198

gem5::VegaISA::Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32
Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *)
Definition vop1.cc:1191

gem5::VegaISA::Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64
Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *)
Definition vop1.cc:1013

gem5::VegaISA::Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64
~Inst_VOP1__V_FLOOR_F64()
Definition vop1.cc:1020

gem5::VegaISA::Inst_VOP1__V_FLOOR_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1028

gem5::VegaISA::Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16
Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *)
Definition vop1.cc:2429

gem5::VegaISA::Inst_VOP1__V_FRACT_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2443

gem5::VegaISA::Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16
~Inst_VOP1__V_FRACT_F16()
Definition vop1.cc:2436

gem5::VegaISA::Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32
Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *)
Definition vop1.cc:1049

gem5::VegaISA::Inst_VOP1__V_FRACT_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1063

gem5::VegaISA::Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32
~Inst_VOP1__V_FRACT_F32()
Definition vop1.cc:1056

gem5::VegaISA::Inst_VOP1__V_FRACT_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1924

gem5::VegaISA::Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64
Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *)
Definition vop1.cc:1910

gem5::VegaISA::Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64
~Inst_VOP1__V_FRACT_F64()
Definition vop1.cc:1917

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16
Inst_VOP1__V_FREXP_EXP_I16_F16(InFmt_VOP1 *)
Definition vop1.cc:2317

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16
~Inst_VOP1__V_FREXP_EXP_I16_F16()
Definition vop1.cc:2325

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I16_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2338

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32
~Inst_VOP1__V_FREXP_EXP_I32_F32()
Definition vop1.cc:1954

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1965

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32
Inst_VOP1__V_FREXP_EXP_I32_F32(InFmt_VOP1 *)
Definition vop1.cc:1946

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64
~Inst_VOP1__V_FREXP_EXP_I32_F64()
Definition vop1.cc:1836

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1843

gem5::VegaISA::Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64
Inst_VOP1__V_FREXP_EXP_I32_F64(InFmt_VOP1 *)
Definition vop1.cc:1828

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16
~Inst_VOP1__V_FREXP_MANT_F16()
Definition vop1.cc:2297

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16
Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *)
Definition vop1.cc:2290

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2311

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32
Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *)
Definition vop1.cc:1992

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32
~Inst_VOP1__V_FREXP_MANT_F32()
Definition vop1.cc:1999

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2011

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1884

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64
Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *)
Definition vop1.cc:1870

gem5::VegaISA::Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64
~Inst_VOP1__V_FREXP_MANT_F64()
Definition vop1.cc:1877

gem5::VegaISA::Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16
Inst_VOP1__V_LOG_F16(InFmt_VOP1 *)
Definition vop1.cc:2244

gem5::VegaISA::Inst_VOP1__V_LOG_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2261

gem5::VegaISA::Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16
~Inst_VOP1__V_LOG_F16()
Definition vop1.cc:2251

gem5::VegaISA::Inst_VOP1__V_LOG_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1276

gem5::VegaISA::Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32
Inst_VOP1__V_LOG_F32(InFmt_VOP1 *)
Definition vop1.cc:1262

gem5::VegaISA::Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32
~Inst_VOP1__V_LOG_F32()
Definition vop1.cc:1269

gem5::VegaISA::Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32
Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *)
Definition vop1.cc:2524

gem5::VegaISA::Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32
~Inst_VOP1__V_LOG_LEGACY_F32()
Definition vop1.cc:2531

gem5::VegaISA::Inst_VOP1__V_LOG_LEGACY_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2538

gem5::VegaISA::Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32
Inst_VOP1__V_MOV_B32(InFmt_VOP1 *)
Definition vop1.cc:61

gem5::VegaISA::Inst_VOP1__V_MOV_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:75

gem5::VegaISA::Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32
~Inst_VOP1__V_MOV_B32()
Definition vop1.cc:67

gem5::VegaISA::Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64
~Inst_VOP1__V_MOV_B64()
Definition vop1.cc:2062

gem5::VegaISA::Inst_VOP1__V_MOV_B64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2070

gem5::VegaISA::Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64
Inst_VOP1__V_MOV_B64(InFmt_VOP1 *)
Definition vop1.cc:2056

gem5::VegaISA::Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32
~Inst_VOP1__V_MOV_FED_B32()
Definition vop1.cc:426

gem5::VegaISA::Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32
Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *)
Definition vop1.cc:420

gem5::VegaISA::Inst_VOP1__V_MOV_FED_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:436

gem5::VegaISA::Inst_VOP1__V_NOP::Inst_VOP1__V_NOP
Inst_VOP1__V_NOP(InFmt_VOP1 *)
Definition vop1.cc:42

gem5::VegaISA::Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP
~Inst_VOP1__V_NOP()
Definition vop1.cc:49

gem5::VegaISA::Inst_VOP1__V_NOP::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:56

gem5::VegaISA::Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32
~Inst_VOP1__V_NOT_B32()
Definition vop1.cc:1659

gem5::VegaISA::Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32
Inst_VOP1__V_NOT_B32(InFmt_VOP1 *)
Definition vop1.cc:1653

gem5::VegaISA::Inst_VOP1__V_NOT_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1667

gem5::VegaISA::Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16
~Inst_VOP1__V_RCP_F16()
Definition vop1.cc:2182

gem5::VegaISA::Inst_VOP1__V_RCP_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2192

gem5::VegaISA::Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16
Inst_VOP1__V_RCP_F16(InFmt_VOP1 *)
Definition vop1.cc:2175

gem5::VegaISA::Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32
~Inst_VOP1__V_RCP_F32()
Definition vop1.cc:1304

gem5::VegaISA::Inst_VOP1__V_RCP_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1311

gem5::VegaISA::Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32
Inst_VOP1__V_RCP_F32(InFmt_VOP1 *)
Definition vop1.cc:1297

gem5::VegaISA::Inst_VOP1__V_RCP_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1418

gem5::VegaISA::Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64
Inst_VOP1__V_RCP_F64(InFmt_VOP1 *)
Definition vop1.cc:1404

gem5::VegaISA::Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64
~Inst_VOP1__V_RCP_F64()
Definition vop1.cc:1411

gem5::VegaISA::Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32
~Inst_VOP1__V_RCP_IFLAG_F32()
Definition vop1.cc:1339

gem5::VegaISA::Inst_VOP1__V_RCP_IFLAG_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1348

gem5::VegaISA::Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32
Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *)
Definition vop1.cc:1332

gem5::VegaISA::Inst_VOP1__V_READFIRSTLANE_B32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:143

gem5::VegaISA::Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32
Inst_VOP1__V_READFIRSTLANE_B32(InFmt_VOP1 *)
Definition vop1.cc:125

gem5::VegaISA::Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32
~Inst_VOP1__V_READFIRSTLANE_B32()
Definition vop1.cc:132

gem5::VegaISA::Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16
~Inst_VOP1__V_RNDNE_F16()
Definition vop1.cc:2414

gem5::VegaISA::Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16
Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *)
Definition vop1.cc:2407

gem5::VegaISA::Inst_VOP1__V_RNDNE_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2423

gem5::VegaISA::Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32
~Inst_VOP1__V_RNDNE_F32()
Definition vop1.cc:1163

gem5::VegaISA::Inst_VOP1__V_RNDNE_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1170

gem5::VegaISA::Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32
Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *)
Definition vop1.cc:1156

gem5::VegaISA::Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64
Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *)
Definition vop1.cc:978

gem5::VegaISA::Inst_VOP1__V_RNDNE_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:992

gem5::VegaISA::Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64
~Inst_VOP1__V_RNDNE_F64()
Definition vop1.cc:985

gem5::VegaISA::Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16
Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *)
Definition vop1.cc:2221

gem5::VegaISA::Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16
~Inst_VOP1__V_RSQ_F16()
Definition vop1.cc:2228

gem5::VegaISA::Inst_VOP1__V_RSQ_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2238

gem5::VegaISA::Inst_VOP1__V_RSQ_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1383

gem5::VegaISA::Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32
Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *)
Definition vop1.cc:1369

gem5::VegaISA::Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32
~Inst_VOP1__V_RSQ_F32()
Definition vop1.cc:1376

gem5::VegaISA::Inst_VOP1__V_RSQ_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1465

gem5::VegaISA::Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64
Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *)
Definition vop1.cc:1451

gem5::VegaISA::Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64
~Inst_VOP1__V_RSQ_F64()
Definition vop1.cc:1458

gem5::VegaISA::Inst_VOP1__V_SIN_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2463

gem5::VegaISA::Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16
~Inst_VOP1__V_SIN_F16()
Definition vop1.cc:2456

gem5::VegaISA::Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16
Inst_VOP1__V_SIN_F16(InFmt_VOP1 *)
Definition vop1.cc:2449

gem5::VegaISA::Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32
Inst_VOP1__V_SIN_F32(InFmt_VOP1 *)
Definition vop1.cc:1567

gem5::VegaISA::Inst_VOP1__V_SIN_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1583

gem5::VegaISA::Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32
~Inst_VOP1__V_SIN_F32()
Definition vop1.cc:1574

gem5::VegaISA::Inst_VOP1__V_SQRT_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2215

gem5::VegaISA::Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16
~Inst_VOP1__V_SQRT_F16()
Definition vop1.cc:2205

gem5::VegaISA::Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16
Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *)
Definition vop1.cc:2198

gem5::VegaISA::Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32
~Inst_VOP1__V_SQRT_F32()
Definition vop1.cc:1504

gem5::VegaISA::Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32
Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *)
Definition vop1.cc:1497

gem5::VegaISA::Inst_VOP1__V_SQRT_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1511

gem5::VegaISA::Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64
~Inst_VOP1__V_SQRT_F64()
Definition vop1.cc:1539

gem5::VegaISA::Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64
Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *)
Definition vop1.cc:1532

gem5::VegaISA::Inst_VOP1__V_SQRT_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1546

gem5::VegaISA::Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16
Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *)
Definition vop1.cc:2386

gem5::VegaISA::Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16
~Inst_VOP1__V_TRUNC_F16()
Definition vop1.cc:2393

gem5::VegaISA::Inst_VOP1__V_TRUNC_F16::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:2401

gem5::VegaISA::Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32
Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *)
Definition vop1.cc:1085

gem5::VegaISA::Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32
~Inst_VOP1__V_TRUNC_F32()
Definition vop1.cc:1092

gem5::VegaISA::Inst_VOP1__V_TRUNC_F32::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:1099

gem5::VegaISA::Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64
Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *)
Definition vop1.cc:907

gem5::VegaISA::Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64
~Inst_VOP1__V_TRUNC_F64()
Definition vop1.cc:914

gem5::VegaISA::Inst_VOP1__V_TRUNC_F64::execute
void execute(GPUDynInstPtr) override
Definition vop1.cc:921

gem5::VegaISA::Inst_VOP1
Definition op_encodings.hh:386

gem5::VegaISA::Inst_VOP1::instData
InFmt_VOP1 instData
Definition op_encodings.hh:398

gem5::VegaISA::Inst_VOP1::extData
InstFormat extData
Definition op_encodings.hh:400

gem5::VegaISA::ScalarOperand
Definition operand.hh:367

gem5::VegaISA::ScalarOperand::write
void write() override
Definition operand.hh:427

gem5::VegaISA::ScalarOperand::read
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409

gem5::VegaISA::ScalarOperand::rawData
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392

gem5::VegaISA::VEGAGPUStaticInst::panicUnimplemented
void panicUnimplemented() const
Definition gpu_static_inst.cc:54

gem5::VegaISA::VecOperand
Definition operand.hh:103

gem5::VegaISA::VecOperand::read
void read() override
read from the vrf.
Definition operand.hh:147

gem5::VegaISA::VecOperand::readSrc
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:131

gem5::VegaISA::VecOperand::write
void write() override
write to the vrf.
Definition operand.hh:199

gem5::Wavefront
Definition wavefront.hh:61

gem5::Wavefront::accumOffset
uint32_t accumOffset
Definition wavefront.hh:137

gem5::Wavefront::execMask
VectorMask & execMask()
Definition wavefront.cc:1451

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79

gem5::findLsbSet
constexpr int findLsbSet(uint64_t val)
Returns the bit position of the LSB that is set in the input That function will either use a builtin ...
Definition bitfield.hh:369

gem5::reverseBits
std::enable_if_t< std::is_integral_v< T >, T > reverseBits(T val, size_t size=sizeof(T))
Takes a value and returns the bit reversed version.
Definition bitfield.hh:255

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214

inst_util.hh

instructions.hh

gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61

gem5::VegaISA::firstOppositeSignBit
ScalarRegI32 firstOppositeSignBit(ScalarRegI32 val)
Definition inst_util.hh:174

gem5::VegaISA::VecElemI32
int32_t VecElemI32
Definition gpu_registers.hh:166

gem5::VegaISA::ScalarRegU64
uint64_t ScalarRegU64
Definition gpu_registers.hh:156

gem5::VegaISA::findFirstOne
ScalarRegI32 findFirstOne(T val)
Definition inst_util.hh:142

gem5::VegaISA::findFirstOneMsb
ScalarRegI32 findFirstOneMsb(T val)
Definition inst_util.hh:153

gem5::VegaISA::roundNearestEven
T roundNearestEven(T val)
Definition inst_util.hh:259

gem5::VegaISA::ScalarRegI32
int32_t ScalarRegI32
Definition gpu_registers.hh:154

gem5::VegaISA::VecElemU32
uint32_t VecElemU32
Definition gpu_registers.hh:165

gem5::VegaISA::VecElemF32
float VecElemF32
Definition gpu_registers.hh:167

gem5::VegaISA::REG_PI
@ REG_PI
Definition gpu_registers.hh:122

gem5::VegaISA::VecElemF64
double VecElemF64
Definition gpu_registers.hh:170

gem5::VegaISA::processDPP
void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, T &src0)
processDPP is a helper function for implementing Data Parallel Primitive instructions.
Definition inst_util.hh:424

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

std::isinf
constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:78

std::isnan
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

gem5::VegaISA::InFmt_VOP1
Definition gpu_decoder.hh:1867

gem5::VegaISA::InFmt_VOP1::SRC0
unsigned int SRC0
Definition gpu_decoder.hh:1868

gem5::VegaISA::InFmt_VOP1::VDST
unsigned int VDST
Definition gpu_decoder.hh:1870

gem5::VegaISA::InFmt_VOP_DPP::DPP_CTRL
unsigned int DPP_CTRL
Definition gpu_decoder.hh:1916

gem5::VegaISA::InFmt_VOP_DPP::BC
unsigned int BC
Definition gpu_decoder.hh:1918

gem5::VegaISA::InFmt_VOP_DPP::SRC0
unsigned int SRC0
Definition gpu_decoder.hh:1915

gem5::VegaISA::InFmt_VOP_DPP::SRC1_NEG
unsigned int SRC1_NEG
Definition gpu_decoder.hh:1921

gem5::VegaISA::InFmt_VOP_DPP::SRC0_NEG
unsigned int SRC0_NEG
Definition gpu_decoder.hh:1919

gem5::VegaISA::InFmt_VOP_DPP::BANK_MASK
unsigned int BANK_MASK
Definition gpu_decoder.hh:1923

gem5::VegaISA::InFmt_VOP_DPP::SRC1_ABS
unsigned int SRC1_ABS
Definition gpu_decoder.hh:1922

gem5::VegaISA::InFmt_VOP_DPP::SRC0_ABS
unsigned int SRC0_ABS
Definition gpu_decoder.hh:1920

gem5::VegaISA::InFmt_VOP_DPP::ROW_MASK
unsigned int ROW_MASK
Definition gpu_decoder.hh:1924

gem5::VegaISA::InstFormat::iFmt_VOP_DPP
InFmt_VOP_DPP iFmt_VOP_DPP
Definition gpu_decoder.hh:2031