release/current/operand_8hh_source.html

/*

 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __ARCH_VEGA_OPERAND_HH__

#define __ARCH_VEGA_OPERAND_HH__


#include <array>


#include "arch/amdgpu/vega/gpu_registers.hh"

#include "arch/generic/vec_reg.hh"

#include "gpu-compute/scalar_register_file.hh"

#include "gpu-compute/shader.hh"

#include "gpu-compute/vector_register_file.hh"

#include "gpu-compute/wavefront.hh"


namespace gem5

{


namespace VegaISA

{

    template<typename T> struct OpTraits { typedef float FloatT; };

    template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };

    template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };


    class Operand

    {

      public:

        Operand() = delete;


        Operand(GPUDynInstPtr gpuDynInst, int opIdx)

            : _gpuDynInst(gpuDynInst), _opIdx(opIdx)

        {

            assert(_gpuDynInst);

            assert(_opIdx >= 0);

        }

        Operand(GPUDynInstPtr gpuDynInst, int opIdx) {…}


        virtual void read() = 0;

        virtual void write() = 0;


      protected:

        GPUDynInstPtr _gpuDynInst;

        int _opIdx;

    };

    class Operand {…};


    template<typename DataType, bool Const, size_t NumDwords>

    class ScalarOperand;


    template<typename DataType, bool Const,

        size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>


    class VecOperand final : public Operand

    {

      static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,

            "Incorrect number of DWORDS for VEGA operand.");


      public:

        VecOperand() = delete;


        VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)

            : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),

              negMod(false), scRegData(gpuDynInst, _opIdx),

              vrfData{{ nullptr }}

        {

            vecReg.zero();

        }

        VecOperand(GPUDynInstPtr gpuDynInst, int opIdx) {…}


        ~VecOperand()

        {

        }

        ~VecOperand() {…}


        void


        readSrc()

        {

            if (isVectorReg(_opIdx)) {

                _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()

                    ->reservedScalarRegs);

                read();

            } else {

                readScalar();

            }

        }

        readSrc() {…}


        void


        read() override

        {

            assert(_gpuDynInst);

            assert(_gpuDynInst->wavefront());

            assert(_gpuDynInst->computeUnit());

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            for (auto i = 0; i < NumDwords; ++i) {

                int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx + i);

                vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);


                DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx);

            }


            if (NumDwords == 1) {

                assert(vrfData[0]);

                auto vgpr = vecReg.template as<DataType>();

                auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();

                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    std::memcpy((void*)&vgpr[lane],

                        (void*)&reg_file_vgpr[lane], sizeof(DataType));

                }

            } else if (NumDwords == 2) {

                assert(vrfData[0]);

                assert(vrfData[1]);

                auto vgpr = vecReg.template as<VecElemU64>();

                auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();

                auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    VecElemU64 tmp_val(0);

                    ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];

                    ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];

                    vgpr[lane] = tmp_val;

                }

            }

        }

        read() override {…}


        void


        write() override

        {

            assert(_gpuDynInst);

            assert(_gpuDynInst->wavefront());

            assert(_gpuDynInst->computeUnit());

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();

            VectorMask &exec_mask = _gpuDynInst->isLoad()

                ? _gpuDynInst->exec_mask : wf->execMask();


            if (NumDwords == 1) {

                int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx);

                vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);

                assert(vrfData[0]);

                auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();

                auto vgpr = vecReg.template as<DataType>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {

                        std::memcpy((void*)&reg_file_vgpr[lane],

                            (void*)&vgpr[lane], sizeof(DataType));

                    }

                }


                DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx);

            } else if (NumDwords == 2) {

                int vgprIdx0 = cu->registerManager->mapVgpr(wf, _opIdx);

                int vgprIdx1 = cu->registerManager->mapVgpr(wf, _opIdx + 1);

                vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);

                vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);

                assert(vrfData[0]);

                assert(vrfData[1]);

                auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();

                auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();

                auto vgpr = vecReg.template as<VecElemU64>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {

                        reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];

                        reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];

                    }

                }


                DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);

            }

        }

        write() override {…}


        void


        negModifier()

        {

            negMod = true;

        }

        negModifier() {…}


        void


        absModifier()

        {

            absMod = true;

        }

        absModifier() {…}


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>

        typename std::enable_if<Condition, const DataType>::type


        operator[](size_t idx) const

        {

            assert(idx < NumVecElemPerVecReg);


            if (scalar) {

                DataType ret_val = scRegData.rawData();


                if (absMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = std::fabs(ret_val);

                }


                if (negMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = -ret_val;

                }


                return ret_val;

            } else {

                auto vgpr = vecReg.template as<DataType>();

                DataType ret_val = vgpr[idx];


                if (absMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = std::fabs(ret_val);

                }


                if (negMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = -ret_val;

                }


                return ret_val;

            }

        }

        operator[](size_t idx) const {…}


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>

        typename std::enable_if<Condition, DataType&>::type


        operator[](size_t idx)

        {

            assert(!scalar);

            assert(idx < NumVecElemPerVecReg);


            return vecReg.template as<DataType>()[idx];

        }

        operator[](size_t idx) {…}


        private:

          void


          readScalar()

          {

              scalar = true;

              scRegData.read();

          }

          readScalar() {…}


          using VecRegCont =

              VecRegContainer<sizeof(DataType) * NumVecElemPerVecReg>;


          bool scalar;

          bool absMod;

          bool negMod;

          VecRegCont vecReg;

          ScalarOperand<DataType, Const, NumDwords> scRegData;

          std::array<VecRegContainerU32*, NumDwords> vrfData;

    };

    class VecOperand final : public Operand {…};


    template<typename DataType, bool Const,

        size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>


    class ScalarOperand final : public Operand

    {

      static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,

            "Incorrect number of DWORDS for VEGA operand.");

      public:

        ScalarOperand() = delete;


        ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)

            : Operand(gpuDynInst, opIdx)

        {

            std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));

        }

        ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx) {…}


        ~ScalarOperand()

        {

        }

        ~ScalarOperand() {…}


        template<bool Condition = NumDwords == 1 || NumDwords == 2>

        typename std::enable_if<Condition, DataType>::type


        rawData() const

        {

            assert(sizeof(DataType) <= sizeof(srfData));

            DataType raw_data((DataType)0);

            std::memcpy((void*)&raw_data, (void*)srfData.data(),

                sizeof(DataType));


            return raw_data;

        }

        rawData() const {…}


        void*


        rawDataPtr()

        {

            return (void*)srfData.data();

        }

        rawDataPtr() {…}


        void


        read() override

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            if (!isScalarReg(_opIdx)) {

                readSpecialVal();

            } else {

                for (auto i = 0; i < NumDwords; ++i) {

                    int sgprIdx = regIdx(i);

                    srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);

                    DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);

                    cu->srf[wf->simdId]->printReg(wf, sgprIdx);

                }

            }

        }

        read() override {…}


        void


        write() override

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            if (!isScalarReg(_opIdx)) {

                if (_opIdx == REG_EXEC_LO) {

                    ScalarRegU64 new_exec_mask_val

                        = wf->execMask().to_ullong();

                    if (NumDwords == 1) {

                        std::memcpy((void*)&new_exec_mask_val,

                            (void*)srfData.data(), sizeof(VecElemU32));

                    } else if (NumDwords == 2) {

                        std::memcpy((void*)&new_exec_mask_val,

                            (void*)srfData.data(), sizeof(VecElemU64));

                    } else {

                        panic("Trying to write more than 2 DWORDS to EXEC\n");

                    }

                    VectorMask new_exec_mask(new_exec_mask_val);

                    wf->execMask() = new_exec_mask;

                    DPRINTF(GPUSRF, "Write EXEC\n");

                    DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);

                } else if (_opIdx == REG_EXEC_HI) {

                    assert(NumDwords == 1);

                    ScalarRegU32 new_exec_mask_hi_val(0);

                    ScalarRegU64 new_exec_mask_val

                        = wf->execMask().to_ullong();

                    std::memcpy((void*)&new_exec_mask_hi_val,

                        (void*)srfData.data(), sizeof(new_exec_mask_hi_val));

                    replaceBits(new_exec_mask_val, 63, 32,

                                new_exec_mask_hi_val);

                    VectorMask new_exec_mask(new_exec_mask_val);

                    wf->execMask() = new_exec_mask;

                    DPRINTF(GPUSRF, "Write EXEC\n");

                    DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);

                } else {

                    _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);

                }

            } else {

                for (auto i = 0; i < NumDwords; ++i) {

                    int sgprIdx = regIdx(i);

                    auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);

                    if (_gpuDynInst->isLoad()) {

                        assert(sizeof(DataType) <= sizeof(ScalarRegU64));

                        sgpr = reinterpret_cast<ScalarRegU32*>(

                            _gpuDynInst->scalar_data)[i];

                    } else {

                        sgpr = srfData[i];

                    }

                    DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);

                    cu->srf[wf->simdId]->printReg(wf, sgprIdx);

                }

            }

        }

        write() override {…}


        template<bool Condition = NumDwords == 1 || NumDwords == 2>

        typename std::enable_if<Condition, void>::type


        setBit(int bit, int bit_val)

        {

            GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());

            replaceBits(sgpr, bit, bit_val);

        }

        setBit(int bit, int bit_val) {…}


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>

        typename std::enable_if<Condition, ScalarOperand&>::type


        operator=(DataType rhs)

        {

            std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));

            return *this;

        }

        operator=(DataType rhs) {…}


      private:

        void


        readSpecialVal()

        {

            assert(NumDwords == 1 || NumDwords == 2);


            if (_opIdx >= REG_INT_CONST_POS_MIN &&

                _opIdx <= REG_INT_CONST_NEG_MAX) {

                assert(sizeof(DataType) <= sizeof(srfData));

                DataType misc_val(0);

                assert(isConstVal(_opIdx));

                misc_val = (DataType)_gpuDynInst

                    ->readConstVal<DataType>(_opIdx);

                std::memcpy((void*)srfData.data(), (void*)&misc_val,

                            sizeof(DataType));


                return;

            }


            if (_opIdx == REG_M0 || _opIdx == REG_ZERO || _opIdx == REG_SCC) {

                assert(sizeof(DataType) <= sizeof(srfData));

                DataType misc_val(0);

                misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);

                std::memcpy((void*)srfData.data(), (void*)&misc_val,

                            sizeof(DataType));


                return;

            }


            switch(_opIdx) {

              case REG_EXEC_LO:

                {

                    if constexpr (NumDwords == 2) {

                        ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->

                            execMask().to_ullong();

                        std::memcpy((void*)srfData.data(), (void*)&exec_mask,

                            sizeof(exec_mask));

                        DPRINTF(GPUSRF, "Read EXEC\n");

                        DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);

                    } else {

                        ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->

                            execMask().to_ullong();


                        ScalarRegU32 exec_mask_lo = bits(exec_mask, 31, 0);

                        std::memcpy((void*)srfData.data(),

                            (void*)&exec_mask_lo, sizeof(exec_mask_lo));

                        DPRINTF(GPUSRF, "Read EXEC_LO\n");

                        DPRINTF(GPUSRF, "EXEC_LO = %#x\n", exec_mask_lo);

                    }

                }

                break;

              case REG_EXEC_HI:

                {

                    assert(NumDwords == 1);

                    ScalarRegU64 exec_mask = _gpuDynInst->wavefront()

                        ->execMask().to_ullong();


                    ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);

                    std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,

                                sizeof(exec_mask_hi));

                    DPRINTF(GPUSRF, "Read EXEC_HI\n");

                    DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);

                }

                break;

              case REG_SRC_SWDA:

              case REG_SRC_DPP:

              case REG_SRC_LITERAL:

                srfData[0] = _gpuDynInst->srcLiteral();

                if constexpr (NumDwords == 2) {

                    if constexpr (std::is_integral_v<DataType>) {

                        if constexpr (std::is_signed_v<DataType>) {

                            if (bits(srfData[0], 31, 31) == 1) {

                                srfData[1] = 0xffffffff;

                            } else {

                                srfData[1] = 0;

                            }

                        } else {

                            srfData[1] = 0;

                        }

                    } else {

                        srfData[1] = _gpuDynInst->srcLiteral();

                        srfData[0] = 0;

                    }

                }

                break;

              case REG_SHARED_BASE:

                {

                    assert(NumDwords == 2);

                    if constexpr (NumDwords == 2) {

                        ComputeUnit *cu = _gpuDynInst->computeUnit();

                        ScalarRegU64 shared_base = cu->shader->ldsApe().base;

                        std::memcpy((void*)srfData.data(), (void*)&shared_base,

                                sizeof(srfData));

                        DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n",

                                shared_base);

                    }

                }

                break;

              case REG_SHARED_LIMIT:

                {

                    assert(NumDwords == 2);

                    if constexpr (NumDwords == 2) {

                        ComputeUnit *cu = _gpuDynInst->computeUnit();

                        ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;

                        std::memcpy((void*)srfData.data(),

                                (void*)&shared_limit, sizeof(srfData));

                        DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n",

                                shared_limit);

                    }

                }

                break;

              case REG_PRIVATE_BASE:

                {

                    assert(NumDwords == 2);

                    if constexpr (NumDwords == 2) {

                        ComputeUnit *cu = _gpuDynInst->computeUnit();

                        ScalarRegU64 priv_base = cu->shader->scratchApe().base;

                        std::memcpy((void*)srfData.data(), (void*)&priv_base,

                                sizeof(srfData));

                        DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n",

                                priv_base);

                    }

                }

                break;

              case REG_PRIVATE_LIMIT:

                {

                    assert(NumDwords == 2);

                    if constexpr (NumDwords == 2) {

                        ComputeUnit *cu = _gpuDynInst->computeUnit();

                        ScalarRegU64 priv_limit =

                            cu->shader->scratchApe().limit;

                        std::memcpy((void*)srfData.data(), (void*)&priv_limit,

                                sizeof(srfData));

                        DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",

                                priv_limit);

                    }

                }

                break;

              case REG_POS_HALF:

                {

                    typename OpTraits<DataType>::FloatT pos_half = 0.5;

                    std::memcpy((void*)srfData.data(), (void*)&pos_half,

                        sizeof(pos_half));


                }

                break;

              case REG_NEG_HALF:

                {

                    typename OpTraits<DataType>::FloatT neg_half = -0.5;

                    std::memcpy((void*)srfData.data(), (void*)&neg_half,

                        sizeof(neg_half));

                }

                break;

              case REG_POS_ONE:

                {

                    typename OpTraits<DataType>::FloatT pos_one = 1.0;

                    std::memcpy(srfData.data(), &pos_one, sizeof(pos_one));

                }

                break;

              case REG_NEG_ONE:

                {

                    typename OpTraits<DataType>::FloatT neg_one = -1.0;

                    std::memcpy(srfData.data(), &neg_one, sizeof(neg_one));

                }

                break;

              case REG_POS_TWO:

                {

                    typename OpTraits<DataType>::FloatT pos_two = 2.0;

                    std::memcpy(srfData.data(), &pos_two, sizeof(pos_two));

                }

                break;

              case REG_NEG_TWO:

                {

                    typename OpTraits<DataType>::FloatT neg_two = -2.0;

                    std::memcpy(srfData.data(), &neg_two, sizeof(neg_two));

                }

                break;

              case REG_POS_FOUR:

                {

                    typename OpTraits<DataType>::FloatT pos_four = 4.0;

                    std::memcpy(srfData.data(), &pos_four, sizeof(pos_four));

                }

                break;

              case REG_NEG_FOUR:

                {

                    typename OpTraits<DataType>::FloatT neg_four = -4.0;

                    std::memcpy((void*)srfData.data(), (void*)&neg_four ,

                        sizeof(neg_four));

                }

                break;

                case REG_PI:

                {

                    assert(sizeof(DataType) == sizeof(ScalarRegF64)

                        || sizeof(DataType) == sizeof(ScalarRegF32));


                    const ScalarRegU32 pi_u32(0x3e22f983UL);

                    const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);


                    if (sizeof(DataType) == sizeof(ScalarRegF64)) {

                        std::memcpy((void*)srfData.data(),

                            (void*)&pi_u64, sizeof(pi_u64));

                    } else {

                        std::memcpy((void*)srfData.data(),

                            (void*)&pi_u32, sizeof(pi_u32));

                    }

                }

                break;

              default:

                panic("Invalid special register index: %d\n", _opIdx);

                break;

            }

        }

        readSpecialVal() {…}


        int


        regIdx(int dword) const

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();

            int sgprIdx(-1);


            if (_opIdx == REG_VCC_HI) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 1 + dword);

            } else if (_opIdx == REG_VCC_LO) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);

            } else if (_opIdx == REG_FLAT_SCRATCH_HI) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);

            } else if (_opIdx == REG_FLAT_SCRATCH_LO) {

                assert(NumDwords == 1);

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);

            } else {

                sgprIdx = cu->registerManager->mapSgpr(wf, _opIdx + dword);

            }


            assert(sgprIdx > -1);


            return sgprIdx;

        }

        regIdx(int dword) const {…}


        GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;

    };

    class ScalarOperand final : public Operand {…};


    // typedefs for the various sizes/types of scalar operands

    using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;

    using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;

    using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;

    using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;

    using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;

    using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;

    using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;

    using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;

    using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;

    using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;

    using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;

    using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;

    using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;

    // non-writeable versions of scalar operands

    using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;

    using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;

    using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;

    using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;

    using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;

    using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;

    using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;

    using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;

    using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;

    using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;

    using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;

    using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;

    using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;

    // typedefs for the various sizes/types of vector operands

    using VecOperandU8 = VecOperand<VecElemU8, false, 1>;

    using VecOperandI8 = VecOperand<VecElemI8, false, 1>;

    using VecOperandU16 = VecOperand<VecElemU16, false, 1>;

    using VecOperandI16 = VecOperand<VecElemI16, false, 1>;

    using VecOperandU32 = VecOperand<VecElemU32, false>;

    using VecOperandI32 = VecOperand<VecElemI32, false>;

    using VecOperandF32 = VecOperand<VecElemF32, false>;

    using VecOperandU64 = VecOperand<VecElemU64, false>;

    using VecOperandF64 = VecOperand<VecElemF64, false>;

    using VecOperandI64 = VecOperand<VecElemI64, false>;

    using VecOperandU96 = VecOperand<VecElemU32, false, 3>;

    using VecOperandU128 = VecOperand<VecElemU32, false, 4>;

    using VecOperandU256 = VecOperand<VecElemU32, false, 8>;

    using VecOperandU512 = VecOperand<VecElemU32, false, 16>;

    // non-writeable versions of vector operands

    using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;

    using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;

    using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;

    using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;

    using ConstVecOperandU32 = VecOperand<VecElemU32, true>;

    using ConstVecOperandI32 = VecOperand<VecElemI32, true>;

    using ConstVecOperandF32 = VecOperand<VecElemF32, true>;

    using ConstVecOperandU64 = VecOperand<VecElemU64, true>;

    using ConstVecOperandI64 = VecOperand<VecElemI64, true>;

    using ConstVecOperandF64 = VecOperand<VecElemF64, true>;

    using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;

    using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;

    using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;

    using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;


// Helper class for using multiple VecElemU32 to represent data types which

// do not divide a dword evenly.

template<int BITS, int ELEM_SIZE>


class PackedReg

{

    // Logical view is:

    // dword N, dword N - 1, ..., dword 1, dword 0.

    // Within each dword, the element starts at [ELEM_SIZE:0]. For example,

    // for ELEM_SIZE = 6 for fp6 types, [5:0] is the first value, [11:6] is

    // the second, and so forth. For 6 bits specifically, the 6th element

    // spans dword 0 and dword 1.

    static_assert(BITS % 32 == 0);

    static_assert(BITS % ELEM_SIZE == 0);

    static_assert(ELEM_SIZE <= 32);


    static constexpr int NumDwords = BITS / 32;

    uint32_t dwords[NumDwords] = {};


  public:

    PackedReg() = default;


    void


    setDword(int dw, uint32_t value)

    {

        assert(dw < NumDwords);

        dwords[dw] = value;

    }

    setDword(int dw, uint32_t value) {…}


    uint32_t


    getDword(int dw)

    {

        assert(dw < NumDwords);

        return dwords[dw];

    }

    getDword(int dw) {…}


    uint32_t


    getElem(int elem)

    {

        assert(elem < (BITS / ELEM_SIZE));


        // Get the upper/lower *bit* location of the element.

        int ubit, lbit;

        ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);

        lbit = elem * ELEM_SIZE;


        // Convert the bit locations to upper/lower dwords. It is possible

        // to span two dwords but this does not have to support spanning

        // more than two dwords.

        int udw, ldw;

        udw = ubit / 32;

        ldw = lbit / 32;

        assert(udw == ldw || udw == ldw + 1);


        if (udw == ldw) {

            // Easy case, just shift the dword value and mask to get value.

            int dw_lbit = lbit % 32;


            uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;

            uint32_t rv = (dwords[ldw] >> dw_lbit) & elem_mask;


            return rv;

        }


        // Harder case. To make it easier put into a quad word and shift

        // that variable instead of trying to work with two.

        uint64_t qword =

            uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);


        int qw_lbit = lbit % 32;


        uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;

        uint32_t rv = uint32_t((qword >> qw_lbit) & elem_mask);


        return rv;

    }

    getElem(int elem) {…}


    void


    setElem(int elem, uint32_t value)

    {

        assert(elem < (BITS / ELEM_SIZE));


        // Get the upper/lower *bit* location of the element.

        int ubit, lbit;

        ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);

        lbit = elem * ELEM_SIZE;


        // Convert the bit locations to upper/lower dwords. It is possible

        // to span two dwords but this does not have to support spanning

        // more than two dwords.

        int udw, ldw;

        udw = ubit / 32;

        ldw = lbit / 32;

        assert(udw == ldw || udw == ldw + 1);


        if (udw == ldw) {

            // Easy case, just shift the dword value and mask to get value.

            int dw_lbit = lbit % 32;


            // Make sure the value is not going to clobber another element.

            uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;

            value &= elem_mask;


            // Clear the bits we are setting.

            elem_mask <<= dw_lbit;

            dwords[ldw] &= ~elem_mask;


            value <<= dw_lbit;

            dwords[ldw] |= value;


            return;

        }


        // Harder case. Put the two dwords in a quad word and manipulate that.

        // Then place the two new dwords back into the storage.

        uint64_t qword =

            uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);


        int qw_lbit = lbit % 32;


        // Make sure the value is not going to clobber another element.

        uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;

        value &= elem_mask;


        // Clear the bits where the value goes so that operator| can be used.

        elem_mask <<= qw_lbit;

        qword &= ~elem_mask;


        // Promote to 64-bit to prevent shifting out of range

        uint64_t value64 = value;

        value64 <<= qw_lbit;

        qword |= value64;


        dwords[udw] = uint32_t(qword >> 32);

        dwords[ldw] = uint32_t(qword & mask(32));

    }

    setElem(int elem, uint32_t value) {…}

};

class PackedReg {…};


}


} // namespace gem5


#endif // __ARCH_VEGA_OPERAND_HH__

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::ComputeUnit
Definition compute_unit.hh:203

gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition compute_unit.hh:298

gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition compute_unit.hh:279

gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition compute_unit.hh:296

gem5::ComputeUnit::shader
Shader * shader
Definition compute_unit.hh:358

gem5::RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition register_manager.cc:95

gem5::RegisterManager::mapSgpr
int mapSgpr(Wavefront *w, int sgprIndex)
Definition register_manager.cc:102

gem5::Shader::scratchApe
const ApertureRegister & scratchApe() const
Definition shader.hh:159

gem5::Shader::ldsApe
const ApertureRegister & ldsApe() const
Definition shader.hh:146

gem5::VecRegContainer< sizeof(DataType) *NumVecElemPerVecReg >

gem5::VegaISA::Operand
Definition operand.hh:65

gem5::VegaISA::Operand::write
virtual void write()=0

gem5::VegaISA::Operand::Operand
Operand()=delete

gem5::VegaISA::Operand::Operand
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:69

gem5::VegaISA::Operand::_gpuDynInst
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
Definition operand.hh:87

gem5::VegaISA::Operand::_opIdx
int _opIdx
op selector value for this operand.
Definition operand.hh:94

gem5::VegaISA::Operand::read
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.

gem5::VegaISA::PackedReg
Definition operand.hh:844

gem5::VegaISA::PackedReg::getElem
uint32_t getElem(int elem)
Definition operand.hh:876

gem5::VegaISA::PackedReg::setDword
void setDword(int dw, uint32_t value)
Definition operand.hh:862

gem5::VegaISA::PackedReg::setElem
void setElem(int elem, uint32_t value)
Definition operand.hh:917

gem5::VegaISA::PackedReg::getDword
uint32_t getDword(int dw)
Definition operand.hh:869

gem5::VegaISA::PackedReg::PackedReg
PackedReg()=default

gem5::VegaISA::ScalarOperand
Definition operand.hh:367

gem5::VegaISA::ScalarOperand::~ScalarOperand
~ScalarOperand()
Definition operand.hh:379

gem5::VegaISA::ScalarOperand::ScalarOperand
ScalarOperand()=delete

gem5::VegaISA::ScalarOperand::write
void write() override
Definition operand.hh:427

gem5::VegaISA::ScalarOperand::regIdx
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
Definition operand.hh:741

gem5::VegaISA::ScalarOperand::read
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409

gem5::VegaISA::ScalarOperand::rawData
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392

gem5::VegaISA::ScalarOperand::readSpecialVal
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
Definition operand.hh:513

gem5::VegaISA::ScalarOperand::rawDataPtr
void * rawDataPtr()
Definition operand.hh:403

gem5::VegaISA::ScalarOperand::ScalarOperand
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:373

gem5::VegaISA::ScalarOperand::operator=
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
Definition operand.hh:499

gem5::VegaISA::ScalarOperand::setBit
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:491

gem5::VegaISA::VecOperand
Definition operand.hh:103

gem5::VegaISA::VecOperand::VecOperand
VecOperand()=delete

gem5::VegaISA::VecOperand::vecReg
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
Definition operand.hh:351

gem5::VegaISA::VecOperand::scalar
bool scalar
whether this operand a scalar or not.
Definition operand.hh:337

gem5::VegaISA::VecOperand::read
void read() override
read from the vrf.
Definition operand.hh:147

gem5::VegaISA::VecOperand::readSrc
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:131

gem5::VegaISA::VecOperand::vrfData
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
Definition operand.hh:361

gem5::VegaISA::VecOperand::absModifier
void absModifier()
Definition operand.hh:256

gem5::VegaISA::VecOperand::negModifier
void negModifier()
Definition operand.hh:250

gem5::VegaISA::VecOperand::operator[]
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
Definition operand.hh:311

gem5::VegaISA::VecOperand::negMod
bool negMod
Definition operand.hh:345

gem5::VegaISA::VecOperand::absMod
bool absMod
absolute value and negative modifiers.
Definition operand.hh:344

gem5::VegaISA::VecOperand::~VecOperand
~VecOperand()
Definition operand.hh:118

gem5::VegaISA::VecOperand::write
void write() override
write to the vrf.
Definition operand.hh:199

gem5::VegaISA::VecOperand::operator[]
std::enable_if< Condition, constDataType >::type operator[](size_t idx) const
getter [] operator.
Definition operand.hh:268

gem5::VegaISA::VecOperand::scRegData
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
Definition operand.hh:356

gem5::VegaISA::VecOperand::readScalar
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
Definition operand.hh:325

gem5::VegaISA::VecOperand::VecOperand
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:110

gem5::Wavefront
Definition wavefront.hh:61

gem5::Wavefront::reservedScalarRegs
int reservedScalarRegs
Definition wavefront.hh:200

gem5::Wavefront::simdId
const int simdId
Definition wavefront.hh:101

gem5::Wavefront::execMask
VectorMask & execMask()
Definition wavefront.cc:1451

gpu_registers.hh

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79

gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188

gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition pcstate.hh:63

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61

gem5::RiscvISA::elem_mask
int elem_mask(const T *vs, const int index)
Definition utility.hh:307

gem5::VegaISA::isVectorReg
bool isVectorReg(int opIdx)
Definition gpu_registers.cc:253

gem5::VegaISA::ScalarRegF32
float ScalarRegF32
Definition gpu_registers.hh:155

gem5::VegaISA::VecElemI32
int32_t VecElemI32
Definition gpu_registers.hh:166

gem5::VegaISA::opSelectorToRegIdx
int opSelectorToRegIdx(int idx, int numScalarRegs)
Definition gpu_registers.cc:137

gem5::VegaISA::ScalarRegU64
uint64_t ScalarRegU64
Definition gpu_registers.hh:156

gem5::VegaISA::VecElemU16
uint16_t VecElemU16
Definition gpu_registers.hh:163

gem5::VegaISA::ScalarRegU16
uint16_t ScalarRegU16
Definition gpu_registers.hh:151

gem5::VegaISA::ScalarRegU8
uint8_t ScalarRegU8
Definition gpu_registers.hh:149

gem5::VegaISA::ScalarRegI8
int8_t ScalarRegI8
Definition gpu_registers.hh:150

gem5::VegaISA::ScalarRegI64
int64_t ScalarRegI64
Definition gpu_registers.hh:157

gem5::VegaISA::VecElemI64
int64_t VecElemI64
Definition gpu_registers.hh:169

gem5::VegaISA::isConstVal
bool isConstVal(int opIdx)
Definition gpu_registers.cc:209

gem5::VegaISA::ScalarRegI32
int32_t ScalarRegI32
Definition gpu_registers.hh:154

gem5::VegaISA::VecElemU32
uint32_t VecElemU32
Definition gpu_registers.hh:165

gem5::VegaISA::VecElemI8
int8_t VecElemI8
Definition gpu_registers.hh:162

gem5::VegaISA::VecElemF32
float VecElemF32
Definition gpu_registers.hh:167

gem5::VegaISA::VecElemU8
uint8_t VecElemU8
Definition gpu_registers.hh:161

gem5::VegaISA::isScalarReg
bool isScalarReg(int opIdx)
Definition gpu_registers.cc:240

gem5::VegaISA::VecElemU64
uint64_t VecElemU64
Definition gpu_registers.hh:168

gem5::VegaISA::VecElemI16
int16_t VecElemI16
Definition gpu_registers.hh:164

gem5::VegaISA::ScalarRegI16
int16_t ScalarRegI16
Definition gpu_registers.hh:152

gem5::VegaISA::MaxOperandDwords
constexpr size_t MaxOperandDwords(16)

gem5::VegaISA::REG_NEG_ONE
@ REG_NEG_ONE
Definition gpu_registers.hh:117

gem5::VegaISA::REG_SRC_LITERAL
@ REG_SRC_LITERAL
Definition gpu_registers.hh:130

gem5::VegaISA::REG_POS_ONE
@ REG_POS_ONE
Definition gpu_registers.hh:116

gem5::VegaISA::REG_SHARED_LIMIT
@ REG_SHARED_LIMIT
Definition gpu_registers.hh:110

gem5::VegaISA::REG_SRC_SWDA
@ REG_SRC_SWDA
Definition gpu_registers.hh:124

gem5::VegaISA::REG_FLAT_SCRATCH_LO
@ REG_FLAT_SCRATCH_LO
Definition gpu_registers.hh:52

gem5::VegaISA::REG_SHARED_BASE
@ REG_SHARED_BASE
Definition gpu_registers.hh:109

gem5::VegaISA::REG_POS_HALF
@ REG_POS_HALF
Definition gpu_registers.hh:114

gem5::VegaISA::REG_PRIVATE_BASE
@ REG_PRIVATE_BASE
Definition gpu_registers.hh:111

gem5::VegaISA::REG_ZERO
@ REG_ZERO
Definition gpu_registers.hh:78

gem5::VegaISA::REG_VCC_HI
@ REG_VCC_HI
Definition gpu_registers.hh:57

gem5::VegaISA::REG_PRIVATE_LIMIT
@ REG_PRIVATE_LIMIT
Definition gpu_registers.hh:112

gem5::VegaISA::REG_NEG_HALF
@ REG_NEG_HALF
Definition gpu_registers.hh:115

gem5::VegaISA::REG_INT_CONST_NEG_MAX
@ REG_INT_CONST_NEG_MAX
Definition gpu_registers.hh:82

gem5::VegaISA::REG_SRC_DPP
@ REG_SRC_DPP
Definition gpu_registers.hh:125

gem5::VegaISA::REG_SCC
@ REG_SCC
Definition gpu_registers.hh:128

gem5::VegaISA::REG_POS_FOUR
@ REG_POS_FOUR
Definition gpu_registers.hh:120

gem5::VegaISA::REG_POS_TWO
@ REG_POS_TWO
Definition gpu_registers.hh:118

gem5::VegaISA::REG_NEG_FOUR
@ REG_NEG_FOUR
Definition gpu_registers.hh:121

gem5::VegaISA::REG_NEG_TWO
@ REG_NEG_TWO
Definition gpu_registers.hh:119

gem5::VegaISA::REG_EXEC_LO
@ REG_EXEC_LO
Definition gpu_registers.hh:76

gem5::VegaISA::REG_VCC_LO
@ REG_VCC_LO
Definition gpu_registers.hh:56

gem5::VegaISA::REG_FLAT_SCRATCH_HI
@ REG_FLAT_SCRATCH_HI
Definition gpu_registers.hh:53

gem5::VegaISA::REG_INT_CONST_POS_MIN
@ REG_INT_CONST_POS_MIN
Definition gpu_registers.hh:79

gem5::VegaISA::REG_PI
@ REG_PI
Definition gpu_registers.hh:122

gem5::VegaISA::REG_EXEC_HI
@ REG_EXEC_HI
Definition gpu_registers.hh:77

gem5::VegaISA::REG_M0
@ REG_M0
Definition gpu_registers.hh:74

gem5::VegaISA::ScalarRegF64
double ScalarRegF64
Definition gpu_registers.hh:158

gem5::VegaISA::ScalarRegU32
uint32_t ScalarRegU32
Definition gpu_registers.hh:153

gem5::VegaISA::VecElemF64
double VecElemF64
Definition gpu_registers.hh:170

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::GEM5_ALIGNED
typedef GEM5_ALIGNED(8) uint64_t uint64_ta

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48

std
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81

scalar_register_file.hh

shader.hh

gem5::ApertureRegister::base
Addr base
Definition shader.hh:76

gem5::ApertureRegister::limit
Addr limit
Definition shader.hh:77

gem5::VegaISA::OpTraits< ScalarRegF64 >::FloatT
double FloatT
Definition operand.hh:61

gem5::VegaISA::OpTraits< ScalarRegU64 >::FloatT
double FloatT
Definition operand.hh:62

gem5::VegaISA::OpTraits
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Definition operand.hh:60

gem5::VegaISA::OpTraits::FloatT
float FloatT
Definition operand.hh:60

vec_reg.hh
Vector Registers layout specification.

vector_register_file.hh

wavefront.hh