develop/mxfp_8hh_source.html

/*

 * Copyright (c) 2024 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__

#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__


#include <cmath>

#include <cstdint>

#include <iostream>


#include "arch/amdgpu/common/dtype/mxfp_convert.hh"


namespace gem5

{


namespace AMDGPU

{


// Base class for all microscaling types. The sizes of everything are

// determined by the enum fields in the FMT struct. All of these share the

// same operator overloads which convert to float before arithmetic and

// convert back if assigned to a microscaling type.

template<typename FMT>


class mxfp

{

  public:

    mxfp() = default;


    mxfp(float f) : mode(roundTiesToEven)

    {

        data = float_to_mxfp(f);

    }


    // Set raw bits, used by gem5 to set a raw value read from VGPRs.


    mxfp(const uint32_t& raw)

    {

        // The info unions end up being "left" aligned. For example, in FP4

        // only the bits 31:28 are used. Shift the input by the storage size

        // of 32 by the type size (sign + exponent + mantissa bits).

        data = raw;

        data <<= (32 - int(FMT::sbits) - int(FMT::ebits) - int(FMT::mbits));

    }


    mxfp(const mxfp& f)

    {

        FMT conv_out;

        conv_out = convertMXFP<FMT, decltype(f.getFmt())>(f.getFmt());

        data = conv_out.storage;

    }


    mxfp&


    operator=(const float& f)

    {

       data = float_to_mxfp(f);

       return *this;

    }


    mxfp&


    operator=(const mxfp& f)

    {

        FMT conv_out;

        conv_out = convertMXFP<FMT, decltype(f.getFmt())>(f.getFmt());

        data = conv_out.storage;

        return *this;

    }


    operator float() const

    {

        binary32 out;

        FMT in;

        in.storage = data;

        out = convertMXFP<binary32, FMT>(in, mode);


        return out.fp32;

    }


    constexpr static int


    size()

    {

        return int(FMT::mbits) + int(FMT::ebits) + int(FMT::sbits);

    }


    // Intentionally use storage > size() so that a storage type is not needed

    // as a template parameter.

    uint32_t data = 0;


    FMT


    getFmt() const

    {

        FMT out;

        out.storage = data;

        return out;

    }


    void


    setFmt(FMT in)

    {

        data = in.storage;

    }


    // Used for upcasting

    void


    scaleMul(const float& f)

    {

        binary32 bfp;

        bfp.fp32 = f;

        int scale_val = bfp.exp;


        // Scale value of 0xFF is NaN. Scaling by NaN returns NaN.

        // In this implementation, types without NaN define it as max().

        if (scale_val == 0xFF) {

            data = FMT::nan;

            return;

        }


        scale_val -= bfp.bias;


        FMT in = getFmt();

        int exp = in.exp;


        // Our value is zero, scaling by anything remains zero.

        if (exp == 0 && in.mant == 0) {

            return;

        }


        if (exp + scale_val > max_exp<FMT>()) {

            in.exp = max_exp<FMT>();

        } else if (exp + scale_val < min_exp<FMT>()) {

            in.exp = min_exp<FMT>();

        } else {

            in.exp = exp + scale_val;

        }


        data = in.storage;

    }


    // Used for downcasting

    void


    scaleDiv(const float& f)

    {

        binary32 bfp;

        bfp.fp32 = f;

        int scale_val = bfp.exp;


        // Scale value of 0xFF is NaN. Scaling by NaN returns NaN.

        // In this implementation, types without NaN define it as max().

        if (scale_val == 0xFF) {

            data = FMT::nan;

            return;

        }


        scale_val -= bfp.bias;


        FMT in = getFmt();

        int exp = in.exp;


        // Our value is zero, scaling by anything remains zero.

        if (exp == 0 && in.mant == 0) {

            return;

        }


        if (exp - scale_val > max_exp<FMT>()) {

            in.exp = max_exp<FMT>();

        } else if (exp - scale_val < min_exp<FMT>()) {

            in.exp = min_exp<FMT>();

        } else {

            in.exp = exp - scale_val;


            // Output become denorm

            if (in.exp == 0) {

                uint32_t m = in.mant | 1 << FMT::mbits;

                m >>= 1;

                in.mant = m & mask(FMT::mbits);

            }

        }


        data = in.storage;

    }


    // Helper method specific to AMDGPU instructions.

    void


    omodModifier(unsigned omod)

    {

        // When the VOP3 form is used, instructions with a floating-point

        // result can apply an output modifier (OMOD field) that multiplies

        // the result by: 0.5, 1.0, 2.0 or 4.0

        //

        // 2-bit field in encoding:

        //   0:  Do nothing

        //   1:  Multiply by 2

        //   2:  Multiply by 4

        //   3:  Divide by 2 (multiply by 1/2)

        assert(omod < 4);


        if (omod == 1) scaleMul(2.0f);

        if (omod == 2) scaleMul(4.0f);

        if (omod == 3) scaleDiv(2.0f);

    }


    void


    clamp(bool do_clamp)

    {

        if (do_clamp) {

            if (*this > 1.0f) {

                *this = 1.0f;

            } else if (*this < 0.0f) {

                *this = 0.0f;

            }

        }

    }


    void


    fabs()

    {

        data &= 0x7fffffff;

    }


    void


    neg()

    {

        data ^= 0x80000000;

    }


  private:

    mxfpRoundingMode mode = roundTiesToEven;


    uint32_t


    float_to_mxfp(float f)

    {

        binary32 in;

        in.fp32 = f;


        FMT out;

        out.storage = 0;


        out = convertMXFP<FMT, binary32>(in, mode);


        return out.storage;

    }


};


// Unary operators

template<typename T>


inline T operator+(T a)

{

    return a;

}


template<typename T>


inline T operator-(T a)

{

    // Flip sign bit

    a.data ^= 0x80000000;

    return a;

}


template<typename T>


inline T operator++(T a)

{

    a = a + T(1.0f);

    return a;

}


template<typename T>


inline T operator--(T a)

{

    a = a - T(1.0f);

    return a;

}


template<typename T>


inline T operator++(T a, int)

{

    T original = a;

    ++a;

    return original;

}


template<typename T>


inline T operator--(T a, int)

{

    T original = a;

    --a;

    return original;

}


// Math operators

template<typename T>


inline T operator+(T a, T b)

{

    return T(float(a) + float(b));

}


template<typename T>


inline T operator-(T a, T b)

{

    return T(float(a) - float(b));

}


template<typename T>


inline T operator*(T a, T b)

{

    return T(float(a) * float(b));

}


template<typename T>


inline T operator/(T a, T b)

{

    return T(float(a) / float(b));

}


template<typename T>


inline T operator+=(T &a, T b)

{

    a = a + b;

    return a;

}


template<typename T>


inline T operator-=(T &a, T b)

{

    a = a - b;

    return a;

}


template<typename T>


inline T operator*=(T &a, T b)

{

    a = a * b;

    return a;

}


template<typename T>


inline T operator/=(T &a, T b)

{

    a = a / b;

    return a;

}


// Comparison operators

template<typename T>


inline bool operator<(T a, T b)

{

    return float(a) < float(b);

}


template<typename T>


inline bool operator>(T a, T b)

{

    return float(a) > float(b);

}


template<typename T>


inline bool operator<=(T a, T b)

{

    return float(a) <= float(b);

}


template<typename T>


inline bool operator>=(T a, T b)

{

    return float(a) >= float(b);

}


template<typename T>


inline bool operator==(T a, T b)

{

    return float(a) == float(b);

}


template<typename T>


inline bool operator!=(T a, T b)

{

    return float(a) != float(b);

}


} // namespace AMDGPU


} // namespace gem5


#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__

gem5::AMDGPU::mxfp
Definition mxfp.hh:53

gem5::AMDGPU::mxfp::clamp
void clamp(bool do_clamp)
Definition mxfp.hh:228

gem5::AMDGPU::mxfp::mxfp
mxfp(const mxfp &f)
Definition mxfp.hh:71

gem5::AMDGPU::mxfp::getFmt
FMT getFmt() const
Definition mxfp.hh:115

gem5::AMDGPU::mxfp::neg
void neg()
Definition mxfp.hh:246

gem5::AMDGPU::mxfp::scaleMul
void scaleMul(const float &f)
Definition mxfp.hh:130

gem5::AMDGPU::mxfp::setFmt
void setFmt(FMT in)
Definition mxfp.hh:123

gem5::AMDGPU::mxfp::omodModifier
void omodModifier(unsigned omod)
Definition mxfp.hh:209

gem5::AMDGPU::mxfp::operator=
mxfp & operator=(const mxfp &f)
Definition mxfp.hh:86

gem5::AMDGPU::mxfp::float_to_mxfp
uint32_t float_to_mxfp(float f)
Definition mxfp.hh:255

gem5::AMDGPU::mxfp::size
static constexpr int size()
Definition mxfp.hh:105

gem5::AMDGPU::mxfp::mxfp
mxfp(const uint32_t &raw)
Definition mxfp.hh:62

gem5::AMDGPU::mxfp< fp4_e2m1_info >::mode
mxfpRoundingMode mode
Definition mxfp.hh:252

gem5::AMDGPU::mxfp::scaleDiv
void scaleDiv(const float &f)
Definition mxfp.hh:166

gem5::AMDGPU::mxfp::mxfp
mxfp()=default

gem5::AMDGPU::mxfp::operator=
mxfp & operator=(const float &f)
Definition mxfp.hh:79

gem5::AMDGPU::mxfp::mxfp
mxfp(float f)
Definition mxfp.hh:56

gem5::AMDGPU::mxfp< fp4_e2m1_info >::data
uint32_t data
Definition mxfp.hh:112

gem5::AMDGPU::mxfp::fabs
void fabs()
Definition mxfp.hh:240

mxfp_convert.hh

gem5::AMDGPU
Definition binary32.hh:39

gem5::AMDGPU::min_exp
int min_exp()
Definition mxfp_convert.hh:328

gem5::AMDGPU::operator-=
T operator-=(T &a, T b)
Definition mxfp.hh:347

gem5::AMDGPU::operator<
bool operator<(T a, T b)
Definition mxfp.hh:369

gem5::AMDGPU::operator==
bool operator==(T a, T b)
Definition mxfp.hh:393

gem5::AMDGPU::operator*=
T operator*=(T &a, T b)
Definition mxfp.hh:354

gem5::AMDGPU::operator--
T operator--(T a)
Definition mxfp.hh:292

gem5::AMDGPU::operator++
T operator++(T a)
Definition mxfp.hh:285

gem5::AMDGPU::operator*
T operator*(T a, T b)
Definition mxfp.hh:328

gem5::AMDGPU::operator-
T operator-(T a)
Definition mxfp.hh:277

gem5::AMDGPU::binary32
union gem5::AMDGPU::binary32_u binary32

gem5::AMDGPU::operator+
T operator+(T a)
Definition mxfp.hh:271

gem5::AMDGPU::operator+=
T operator+=(T &a, T b)
Definition mxfp.hh:340

gem5::AMDGPU::convertMXFP
dFMT convertMXFP(sFMT in, mxfpRoundingMode mode=roundTiesToEven, uint32_t seed=0)
Definition mxfp_convert.hh:62

gem5::AMDGPU::operator!=
bool operator!=(T a, T b)
Definition mxfp.hh:399

gem5::AMDGPU::operator<=
bool operator<=(T a, T b)
Definition mxfp.hh:381

gem5::AMDGPU::max_exp
int max_exp()
Definition mxfp_convert.hh:334

gem5::AMDGPU::operator/
T operator/(T a, T b)
Definition mxfp.hh:334

gem5::AMDGPU::operator>=
bool operator>=(T a, T b)
Definition mxfp.hh:387

gem5::AMDGPU::operator>
bool operator>(T a, T b)
Definition mxfp.hh:375

gem5::AMDGPU::mxfpRoundingMode
mxfpRoundingMode
Definition mxfp_convert.hh:49

gem5::AMDGPU::roundTiesToEven
@ roundTiesToEven
Definition mxfp_convert.hh:50

gem5::AMDGPU::operator/=
T operator/=(T &a, T b)
Definition mxfp.hh:361

gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition pcstate.hh:63

gem5::ArmISA::b
Bitfield< 7 > b
Definition misc_types.hh:471

gem5::ArmISA::a
Bitfield< 8 > a
Definition misc_types.hh:66

gem5::ArmISA::f
Bitfield< 6 > f
Definition misc_types.hh:68

gem5::ArmISA::m
Bitfield< 0 > m
Definition misc_types.hh:482

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::AMDGPU::binary32_u::bias
@ bias
Definition binary32.hh:52

gem5::AMDGPU::binary32_u::exp
unsigned exp
Definition binary32.hh:64

gem5::AMDGPU::binary32_u::fp32
float fp32
Definition binary32.hh:60

gem5::AMDGPU::fp4_e2m1_info::storage
uint32_t storage
Definition fp4_e2m1.hh:58