release/v23-0-0-1/gcn3_2operand_8hh_source.html

/*

 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __ARCH_GCN3_OPERAND_HH__

#define __ARCH_GCN3_OPERAND_HH__


#include <array>


#include "arch/amdgpu/gcn3/gpu_registers.hh"

#include "arch/generic/vec_reg.hh"

#include "gpu-compute/scalar_register_file.hh"

#include "gpu-compute/vector_register_file.hh"

#include "gpu-compute/wavefront.hh"


namespace gem5

{


namespace Gcn3ISA

{

    template<typename T> struct OpTraits { typedef float FloatT; };

    template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };

    template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };


    class Operand

    {

      public:

        Operand() = delete;


        Operand(GPUDynInstPtr gpuDynInst, int opIdx)

            : _gpuDynInst(gpuDynInst), _opIdx(opIdx)

        {

            assert(_gpuDynInst);

            assert(_opIdx >= 0);

        }


        virtual void read() = 0;

        virtual void write() = 0;


      protected:

        GPUDynInstPtr _gpuDynInst;

        int _opIdx;

    };


    template<typename DataType, bool Const, size_t NumDwords>

    class ScalarOperand;


    template<typename DataType, bool Const,

        size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>

    class VecOperand final : public Operand

    {

      static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,

            "Incorrect number of DWORDS for GCN3 operand.");


      public:

        VecOperand() = delete;


        VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)

            : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),

              negMod(false), scRegData(gpuDynInst, _opIdx),

              vrfData{{ nullptr }}

        {

            vecReg.zero();

        }


        ~VecOperand()

        {

        }


        void

        readSrc()

        {

            if (isVectorReg(_opIdx)) {

                _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()

                    ->reservedScalarRegs);

                read();

            } else {

                readScalar();

            }

        }


        void

        read() override

        {

            assert(_gpuDynInst);

            assert(_gpuDynInst->wavefront());

            assert(_gpuDynInst->computeUnit());

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            for (auto i = 0; i < NumDwords; ++i) {

                int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx + i);

                vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);


                DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx);

            }


            if (NumDwords == 1) {

                assert(vrfData[0]);

                auto vgpr = vecReg.template as<DataType>();

                auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();

                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    std::memcpy((void*)&vgpr[lane],

                        (void*)&reg_file_vgpr[lane], sizeof(DataType));

                }

            } else if (NumDwords == 2) {

                assert(vrfData[0]);

                assert(vrfData[1]);

                auto vgpr = vecReg.template as<VecElemU64>();

                auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();

                auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    VecElemU64 tmp_val(0);

                    ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];

                    ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];

                    vgpr[lane] = tmp_val;

                }

            }

        }


        void

        write() override

        {

            assert(_gpuDynInst);

            assert(_gpuDynInst->wavefront());

            assert(_gpuDynInst->computeUnit());

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();

            VectorMask &exec_mask = _gpuDynInst->isLoad()

                ? _gpuDynInst->exec_mask : wf->execMask();


            if (NumDwords == 1) {

                int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx);

                vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);

                assert(vrfData[0]);

                auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();

                auto vgpr = vecReg.template as<DataType>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {

                        std::memcpy((void*)&reg_file_vgpr[lane],

                            (void*)&vgpr[lane], sizeof(DataType));

                    }

                }


                DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx);

            } else if (NumDwords == 2) {

                int vgprIdx0 = cu->registerManager->mapVgpr(wf, _opIdx);

                int vgprIdx1 = cu->registerManager->mapVgpr(wf, _opIdx + 1);

                vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);

                vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);

                assert(vrfData[0]);

                assert(vrfData[1]);

                auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();

                auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();

                auto vgpr = vecReg.template as<VecElemU64>();


                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {

                    if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {

                        reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];

                        reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];

                    }

                }


                DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);

                cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);

            }

        }


        void

        negModifier()

        {

            negMod = true;

        }


        void

        absModifier()

        {

            absMod = true;

        }


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>

        typename std::enable_if_t<Condition, const DataType>

        operator[](size_t idx) const

        {

            assert(idx < NumVecElemPerVecReg);


            if (scalar) {

                DataType ret_val = scRegData.rawData();


                if (absMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = std::fabs(ret_val);

                }


                if (negMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = -ret_val;

                }


                return ret_val;

            } else {

                auto vgpr = vecReg.template as<DataType>();

                DataType ret_val = vgpr[idx];


                if (absMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = std::fabs(ret_val);

                }


                if (negMod) {

                    assert(std::is_floating_point_v<DataType>);

                    ret_val = -ret_val;

                }


                return ret_val;

            }

        }


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>

        typename std::enable_if_t<Condition, DataType&>

        operator[](size_t idx)

        {

            assert(!scalar);

            assert(idx < NumVecElemPerVecReg);


            return vecReg.template as<DataType>()[idx];

        }


        private:

          void

          readScalar()

          {

              scalar = true;

              scRegData.read();

          }


          using VecRegCont =

              VecRegContainer<sizeof(DataType) * NumVecElemPerVecReg>;


          bool scalar;

          bool absMod;

          bool negMod;

          VecRegCont vecReg;

          ScalarOperand<DataType, Const, NumDwords> scRegData;

          std::array<VecRegContainerU32*, NumDwords> vrfData;

    };


    template<typename DataType, bool Const,

        size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>

    class ScalarOperand final : public Operand

    {

      static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,

            "Incorrect number of DWORDS for GCN3 operand.");

      public:

        ScalarOperand() = delete;


        ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)

            : Operand(gpuDynInst, opIdx)

        {

            std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));

        }


        ~ScalarOperand()

        {

        }


        template<bool Condition = NumDwords == 1 || NumDwords == 2>

        typename std::enable_if_t<Condition, DataType>

        rawData() const

        {

            assert(sizeof(DataType) <= sizeof(srfData));

            DataType raw_data((DataType)0);

            std::memcpy((void*)&raw_data, (void*)srfData.data(),

                sizeof(DataType));


            return raw_data;

        }


        void*

        rawDataPtr()

        {

            return (void*)srfData.data();

        }


        void

        read() override

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            if (!isScalarReg(_opIdx)) {

                readSpecialVal();

            } else {

                for (auto i = 0; i < NumDwords; ++i) {

                    int sgprIdx = regIdx(i);

                    srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);

                    DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);

                    cu->srf[wf->simdId]->printReg(wf, sgprIdx);

                }

            }

        }


        void

        write() override

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();


            if (!isScalarReg(_opIdx)) {

                if (_opIdx == REG_EXEC_LO) {

                    ScalarRegU64 new_exec_mask_val

                        = wf->execMask().to_ullong();

                    if (NumDwords == 1) {

                        std::memcpy((void*)&new_exec_mask_val,

                            (void*)srfData.data(), sizeof(VecElemU32));

                    } else if (NumDwords == 2) {

                        std::memcpy((void*)&new_exec_mask_val,

                            (void*)srfData.data(), sizeof(VecElemU64));

                    } else {

                        panic("Trying to write more than 2 DWORDS to EXEC\n");

                    }

                    VectorMask new_exec_mask(new_exec_mask_val);

                    wf->execMask() = new_exec_mask;

                    DPRINTF(GPUSRF, "Write EXEC\n");

                    DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);

                } else if (_opIdx == REG_EXEC_HI) {

                    assert(NumDwords == 1);

                    ScalarRegU32 new_exec_mask_hi_val(0);

                    ScalarRegU64 new_exec_mask_val

                        = wf->execMask().to_ullong();

                    std::memcpy((void*)&new_exec_mask_hi_val,

                        (void*)srfData.data(), sizeof(new_exec_mask_hi_val));

                    replaceBits(new_exec_mask_val, 63, 32,

                                new_exec_mask_hi_val);

                    VectorMask new_exec_mask(new_exec_mask_val);

                    wf->execMask() = new_exec_mask;

                    DPRINTF(GPUSRF, "Write EXEC\n");

                    DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);

                } else {

                    _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);

                }

            } else {

                for (auto i = 0; i < NumDwords; ++i) {

                    int sgprIdx = regIdx(i);

                    auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);

                    if (_gpuDynInst->isLoad()) {

                        assert(sizeof(DataType) <= sizeof(ScalarRegU64));

                        sgpr = reinterpret_cast<ScalarRegU32*>(

                            _gpuDynInst->scalar_data)[i];

                    } else {

                        sgpr = srfData[i];

                    }

                    DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);

                    cu->srf[wf->simdId]->printReg(wf, sgprIdx);

                }

            }

        }


        template<bool Condition = NumDwords == 1 || NumDwords == 2>

        typename std::enable_if_t<Condition, void>

        setBit(int bit, int bit_val)

        {

            DataType &sgpr = *((DataType*)srfData.data());

            replaceBits(sgpr, bit, bit_val);

        }


        template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>

        typename std::enable_if_t<Condition, ScalarOperand&>

        operator=(DataType rhs)

        {

            std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));

            return *this;

        }


      private:

        void

        readSpecialVal()

        {

            assert(NumDwords == 1 || NumDwords == 2);


            switch(_opIdx) {

              case REG_EXEC_LO:

                {

                    if (NumDwords == 1) {

                        ScalarRegU32 exec_mask = _gpuDynInst->wavefront()->

                            execMask().to_ulong();

                        std::memcpy((void*)srfData.data(), (void*)&exec_mask,

                            sizeof(exec_mask));

                        DPRINTF(GPUSRF, "Read EXEC\n");

                        DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);

                    } else {

                        assert(NumDwords == 2);

                        ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->

                            execMask().to_ullong();

                        std::memcpy((void*)srfData.data(), (void*)&exec_mask,

                            sizeof(exec_mask));

                        DPRINTF(GPUSRF, "Read EXEC\n");

                        DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);

                    }

                }

                break;

              case REG_EXEC_HI:

                {

                    assert(NumDwords == 1);

                    ScalarRegU64 exec_mask = _gpuDynInst->wavefront()

                        ->execMask().to_ullong();


                    ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);

                    std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,

                                sizeof(exec_mask_hi));

                    DPRINTF(GPUSRF, "Read EXEC_HI\n");

                    DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);

                }

                break;

              case REG_SRC_SWDA:

              case REG_SRC_DPP:

              case REG_SRC_LITERAL:

                assert(NumDwords == 1);

                srfData[0] = _gpuDynInst->srcLiteral();

                break;

              case REG_POS_HALF:

                {

                    typename OpTraits<DataType>::FloatT pos_half = 0.5;

                    std::memcpy((void*)srfData.data(), (void*)&pos_half,

                        sizeof(pos_half));


                }

                break;

              case REG_NEG_HALF:

                {

                    typename OpTraits<DataType>::FloatT neg_half = -0.5;

                    std::memcpy((void*)srfData.data(), (void*)&neg_half,

                        sizeof(neg_half));

                }

                break;

              case REG_POS_ONE:

                {

                    typename OpTraits<DataType>::FloatT pos_one = 1.0;

                    std::memcpy(srfData.data(), &pos_one, sizeof(pos_one));

                }

                break;

              case REG_NEG_ONE:

                {

                    typename OpTraits<DataType>::FloatT neg_one = -1.0;

                    std::memcpy(srfData.data(), &neg_one, sizeof(neg_one));

                }

                break;

              case REG_POS_TWO:

                {

                    typename OpTraits<DataType>::FloatT pos_two = 2.0;

                    std::memcpy(srfData.data(), &pos_two, sizeof(pos_two));

                }

                break;

              case REG_NEG_TWO:

                {

                    typename OpTraits<DataType>::FloatT neg_two = -2.0;

                    std::memcpy(srfData.data(), &neg_two, sizeof(neg_two));

                }

                break;

              case REG_POS_FOUR:

                {

                    typename OpTraits<DataType>::FloatT pos_four = 4.0;

                    std::memcpy(srfData.data(), &pos_four, sizeof(pos_four));

                }

                break;

              case REG_NEG_FOUR:

                {

                    typename OpTraits<DataType>::FloatT neg_four = -4.0;

                    std::memcpy((void*)srfData.data(), (void*)&neg_four ,

                        sizeof(neg_four));

                }

                break;

                case REG_PI:

                {

                    assert(sizeof(DataType) == sizeof(ScalarRegF64)

                        || sizeof(DataType) == sizeof(ScalarRegF32));


                    const ScalarRegU32 pi_u32(0x3e22f983UL);

                    const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);


                    if (sizeof(DataType) == sizeof(ScalarRegF64)) {

                        std::memcpy((void*)srfData.data(),

                            (void*)&pi_u64, sizeof(pi_u64));

                    } else {

                        std::memcpy((void*)srfData.data(),

                            (void*)&pi_u32, sizeof(pi_u32));

                    }

                }

                break;

              default:

                {

                    assert(sizeof(DataType) <= sizeof(srfData));

                    DataType misc_val(0);

                    if (isConstVal(_opIdx)) {

                        misc_val = (DataType)_gpuDynInst

                            ->readConstVal<DataType>(_opIdx);

                    } else {

                        misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);

                    }

                    std::memcpy((void*)srfData.data(), (void*)&misc_val,

                                sizeof(DataType));

                }

            }

        }


        int

        regIdx(int dword) const

        {

            Wavefront *wf = _gpuDynInst->wavefront();

            ComputeUnit *cu = _gpuDynInst->computeUnit();

            int sgprIdx(-1);


            if (_opIdx == REG_VCC_HI) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 1 + dword);

            } else if (_opIdx == REG_VCC_LO) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);

            } else if (_opIdx == REG_FLAT_SCRATCH_HI) {

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);

            } else if (_opIdx == REG_FLAT_SCRATCH_LO) {

                assert(NumDwords == 1);

                sgprIdx = cu->registerManager

                    ->mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);

            } else {

                sgprIdx = cu->registerManager->mapSgpr(wf, _opIdx + dword);

            }


            assert(sgprIdx > -1);


            return sgprIdx;

        }


        std::array<ScalarRegU32, NumDwords> srfData;

    };


    // typedefs for the various sizes/types of scalar operands

    using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;

    using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;

    using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;

    using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;

    using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;

    using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;

    using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;

    using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;

    using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;

    using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;

    using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;

    using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;

    using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;

    // non-writeable versions of scalar operands

    using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;

    using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;

    using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;

    using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;

    using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;

    using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;

    using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;

    using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;

    using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;

    using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;

    using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;

    using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;

    using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;

    // typedefs for the various sizes/types of vector operands

    using VecOperandU8 = VecOperand<VecElemU8, false, 1>;

    using VecOperandI8 = VecOperand<VecElemI8, false, 1>;

    using VecOperandU16 = VecOperand<VecElemU16, false, 1>;

    using VecOperandI16 = VecOperand<VecElemI16, false, 1>;

    using VecOperandU32 = VecOperand<VecElemU32, false>;

    using VecOperandI32 = VecOperand<VecElemI32, false>;

    using VecOperandF32 = VecOperand<VecElemF32, false>;

    using VecOperandU64 = VecOperand<VecElemU64, false>;

    using VecOperandF64 = VecOperand<VecElemF64, false>;

    using VecOperandI64 = VecOperand<VecElemI64, false>;

    using VecOperandU96 = VecOperand<VecElemU32, false, 3>;

    using VecOperandU128 = VecOperand<VecElemU32, false, 4>;

    using VecOperandU256 = VecOperand<VecElemU32, false, 8>;

    using VecOperandU512 = VecOperand<VecElemU32, false, 16>;

    // non-writeable versions of vector operands

    using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;

    using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;

    using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;

    using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;

    using ConstVecOperandU32 = VecOperand<VecElemU32, true>;

    using ConstVecOperandI32 = VecOperand<VecElemI32, true>;

    using ConstVecOperandF32 = VecOperand<VecElemF32, true>;

    using ConstVecOperandU64 = VecOperand<VecElemU64, true>;

    using ConstVecOperandI64 = VecOperand<VecElemI64, true>;

    using ConstVecOperandF64 = VecOperand<VecElemF64, true>;

    using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;

    using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;

    using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;

    using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;

}


} // namespace gem5


#endif // __ARCH_GCN3_OPERAND_HH__

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:210

gem5::ComputeUnit
Definition compute_unit.hh:202

gem5::ComputeUnit::srf
std::vector< ScalarRegisterFile * > srf
Definition compute_unit.hh:297

gem5::ComputeUnit::registerManager
RegisterManager * registerManager
Definition compute_unit.hh:278

gem5::ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition compute_unit.hh:295

gem5::Gcn3ISA::Operand
Definition operand.hh:64

gem5::Gcn3ISA::Operand::Operand
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:68

gem5::Gcn3ISA::Operand::_gpuDynInst
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
Definition operand.hh:86

gem5::Gcn3ISA::Operand::_opIdx
int _opIdx
op selector value for this operand.
Definition operand.hh:93

gem5::Gcn3ISA::Operand::write
virtual void write()=0

gem5::Gcn3ISA::Operand::Operand
Operand()=delete

gem5::Gcn3ISA::Operand::read
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.

gem5::Gcn3ISA::ScalarOperand
Definition operand.hh:366

gem5::Gcn3ISA::ScalarOperand::ScalarOperand
ScalarOperand()=delete

gem5::Gcn3ISA::ScalarOperand::setBit
std::enable_if_t< Condition, void > setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:490

gem5::Gcn3ISA::ScalarOperand::rawData
std::enable_if_t< Condition, DataType > rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:391

gem5::Gcn3ISA::ScalarOperand::~ScalarOperand
~ScalarOperand()
Definition operand.hh:378

gem5::Gcn3ISA::ScalarOperand::read
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:408

gem5::Gcn3ISA::ScalarOperand::rawDataPtr
void * rawDataPtr()
Definition operand.hh:402

gem5::Gcn3ISA::ScalarOperand::write
void write() override
Definition operand.hh:426

gem5::Gcn3ISA::ScalarOperand::srfData
std::array< ScalarRegU32, NumDwords > srfData
in GCN3 each register is represented as a 32b unsigned value, however operands may require up to 16 r...
Definition operand.hh:687

gem5::Gcn3ISA::ScalarOperand::readSpecialVal
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
Definition operand.hh:512

gem5::Gcn3ISA::ScalarOperand::ScalarOperand
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:372

gem5::Gcn3ISA::ScalarOperand::regIdx
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
Definition operand.hh:651

gem5::Gcn3ISA::ScalarOperand::operator=
std::enable_if_t< Condition, ScalarOperand & > operator=(DataType rhs)
Definition operand.hh:498

gem5::Gcn3ISA::VecOperand
Definition operand.hh:102

gem5::Gcn3ISA::VecOperand::readSrc
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:130

gem5::Gcn3ISA::VecOperand::negModifier
void negModifier()
Definition operand.hh:249

gem5::Gcn3ISA::VecOperand::operator[]
std::enable_if_t< Condition, DataType & > operator[](size_t idx)
setter [] operator.
Definition operand.hh:310

gem5::Gcn3ISA::VecOperand::VecOperand
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:109

gem5::Gcn3ISA::VecOperand::write
void write() override
write to the vrf.
Definition operand.hh:198

gem5::Gcn3ISA::VecOperand::scRegData
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
Definition operand.hh:355

gem5::Gcn3ISA::VecOperand::absModifier
void absModifier()
Definition operand.hh:255

gem5::Gcn3ISA::VecOperand::negMod
bool negMod
Definition operand.hh:344

gem5::Gcn3ISA::VecOperand::read
void read() override
read from the vrf.
Definition operand.hh:146

gem5::Gcn3ISA::VecOperand::~VecOperand
~VecOperand()
Definition operand.hh:117

gem5::Gcn3ISA::VecOperand::operator[]
std::enable_if_t< Condition, const DataType > operator[](size_t idx) const
getter [] operator.
Definition operand.hh:267

gem5::Gcn3ISA::VecOperand::absMod
bool absMod
absolute value and negative modifiers.
Definition operand.hh:343

gem5::Gcn3ISA::VecOperand::readScalar
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
Definition operand.hh:324

gem5::Gcn3ISA::VecOperand::vrfData
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
Definition operand.hh:360

gem5::Gcn3ISA::VecOperand::VecOperand
VecOperand()=delete

gem5::Gcn3ISA::VecOperand::scalar
bool scalar
whether this operand a scalar or not.
Definition operand.hh:336

gem5::Gcn3ISA::VecOperand::vecReg
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
Definition operand.hh:350

gem5::RegisterManager::mapVgpr
int mapVgpr(Wavefront *w, int vgprIndex)
Definition register_manager.cc:95

gem5::RegisterManager::mapSgpr
int mapSgpr(Wavefront *w, int sgprIndex)
Definition register_manager.cc:102

gem5::VecRegContainer< sizeof(DataType) *NumVecElemPerVecReg >

gem5::Wavefront
Definition wavefront.hh:61

gem5::Wavefront::reservedScalarRegs
int reservedScalarRegs
Definition wavefront.hh:196

gem5::Wavefront::simdId
const int simdId
Definition wavefront.hh:99

gem5::Wavefront::execMask
VectorMask & execMask()
Definition wavefront.cc:1407

gpu_registers.hh

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76

gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:213

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::NumVecElemPerVecReg
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61

gem5::Gcn3ISA::VecElemU64
uint64_t VecElemU64
Definition gpu_registers.hh:168

gem5::Gcn3ISA::opSelectorToRegIdx
int opSelectorToRegIdx(int opIdx, int numScalarRegs)
Definition registers.cc:125

gem5::Gcn3ISA::ScalarRegF64
double ScalarRegF64
Definition gpu_registers.hh:158

gem5::Gcn3ISA::isScalarReg
bool isScalarReg(int opIdx)
Definition registers.cc:219

gem5::Gcn3ISA::ScalarRegU32
uint32_t ScalarRegU32
Definition gpu_registers.hh:153

gem5::Gcn3ISA::MaxOperandDwords
constexpr size_t MaxOperandDwords(16)

gem5::Gcn3ISA::isVectorReg
bool isVectorReg(int opIdx)
Definition registers.cc:232

gem5::Gcn3ISA::isConstVal
bool isConstVal(int opIdx)
Definition registers.cc:188

gem5::Gcn3ISA::ScalarRegU64
uint64_t ScalarRegU64
Definition gpu_registers.hh:156

gem5::Gcn3ISA::REG_SRC_LITERAL
@ REG_SRC_LITERAL
Definition gpu_registers.hh:130

gem5::Gcn3ISA::REG_PI
@ REG_PI
Definition gpu_registers.hh:122

gem5::Gcn3ISA::REG_FLAT_SCRATCH_HI
@ REG_FLAT_SCRATCH_HI
Definition gpu_registers.hh:53

gem5::Gcn3ISA::REG_POS_FOUR
@ REG_POS_FOUR
Definition gpu_registers.hh:120

gem5::Gcn3ISA::REG_EXEC_HI
@ REG_EXEC_HI
Definition gpu_registers.hh:77

gem5::Gcn3ISA::REG_SRC_SWDA
@ REG_SRC_SWDA
Definition gpu_registers.hh:124

gem5::Gcn3ISA::REG_VCC_LO
@ REG_VCC_LO
Definition gpu_registers.hh:56

gem5::Gcn3ISA::REG_SRC_DPP
@ REG_SRC_DPP
Definition gpu_registers.hh:125

gem5::Gcn3ISA::REG_NEG_HALF
@ REG_NEG_HALF
Definition gpu_registers.hh:115

gem5::Gcn3ISA::REG_POS_TWO
@ REG_POS_TWO
Definition gpu_registers.hh:118

gem5::Gcn3ISA::REG_EXEC_LO
@ REG_EXEC_LO
Definition gpu_registers.hh:76

gem5::Gcn3ISA::REG_NEG_FOUR
@ REG_NEG_FOUR
Definition gpu_registers.hh:121

gem5::Gcn3ISA::REG_POS_ONE
@ REG_POS_ONE
Definition gpu_registers.hh:116

gem5::Gcn3ISA::REG_FLAT_SCRATCH_LO
@ REG_FLAT_SCRATCH_LO
Definition gpu_registers.hh:52

gem5::Gcn3ISA::REG_VCC_HI
@ REG_VCC_HI
Definition gpu_registers.hh:57

gem5::Gcn3ISA::REG_POS_HALF
@ REG_POS_HALF
Definition gpu_registers.hh:114

gem5::Gcn3ISA::REG_NEG_ONE
@ REG_NEG_ONE
Definition gpu_registers.hh:117

gem5::Gcn3ISA::REG_NEG_TWO
@ REG_NEG_TWO
Definition gpu_registers.hh:119

gem5::Gcn3ISA::VecElemU32
uint32_t VecElemU32
Definition gpu_registers.hh:165

gem5::Gcn3ISA::ScalarRegF32
float ScalarRegF32
Definition gpu_registers.hh:155

gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition gpu_translation_state.hh:38

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48

scalar_register_file.hh

gem5::Gcn3ISA::OpTraits< ScalarRegF64 >::FloatT
double FloatT
Definition operand.hh:60

gem5::Gcn3ISA::OpTraits< ScalarRegU64 >::FloatT
double FloatT
Definition operand.hh:61

gem5::Gcn3ISA::OpTraits
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Definition operand.hh:59

gem5::Gcn3ISA::OpTraits::FloatT
float FloatT
Definition operand.hh:59

vec_reg.hh
Vector Registers layout specification.

vector_register_file.hh

wavefront.hh