develop/vfp_8cc_source.html

/*

 * Copyright (c) 2010-2013, 2019, 2024-2025 ARM Limited

 * All rights reserved

 *

 * The license below extends only to copyright in the software and shall

 * not be construed as granting a license to any other intellectual

 * property including but not limited to intellectual property relating

 * to a hardware implementation of the functionality of the software

 * licensed hereunder.  You may use the software subject to the license

 * terms below provided that you ensure that this notice is replicated

 * unmodified and in its entirety in all distributions of the software,

 * modified or unmodified, in source code or in binary form.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are

 * met: redistributions of source code must retain the above copyright

 * notice, this list of conditions and the following disclaimer;

 * redistributions in binary form must reproduce the above copyright

 * notice, this list of conditions and the following disclaimer in the

 * documentation and/or other materials provided with the distribution;

 * neither the name of the copyright holders nor the names of its

 * contributors may be used to endorse or promote products derived from

 * this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 */


#include "arch/arm/insts/vfp.hh"


namespace gem5

{


using namespace ArmISA;


/*

 * The asm statements below are to keep gcc from reordering code. Otherwise

 * the rounding mode might be set after the operation it was intended for, the

 * exception bits read before it, etc.

 */


std::string


FpCondCompRegOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss, "", false);

    printIntReg(ss, op1);

    ccprintf(ss, ", ");

    printIntReg(ss, op2);

    ccprintf(ss, ", #%d", defCc);

    ccprintf(ss, ", ");

    printCondition(ss, condCode, true);

    return ss.str();

}


std::string


FpCondSelOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss, "", false);

    printIntReg(ss, dest);

    ccprintf(ss, ", ");

    printIntReg(ss, op1);

    ccprintf(ss, ", ");

    printIntReg(ss, op2);

    ccprintf(ss, ", ");

    printCondition(ss, condCode, true);

    return ss.str();

}


std::string


FpRegRegOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    return ss.str();

}


std::string


FpRegImmOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ccprintf(ss, ", #%d", imm);

    return ss.str();

}


std::string


FpRegRegImmOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    ccprintf(ss, ", #%d", imm);

    return ss.str();

}


std::string


FpRegRegRegOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    ss << ", ";

    printFloatReg(ss, op2);

    return ss.str();

}


std::string


FpRegRegRegCondOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab)

    const

{

    std::stringstream ss;

    printMnemonic(ss, "", /*withPred=*/false, /*withCond64=*/true, cond);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    ss << ", ";

    printFloatReg(ss, op2);

    return ss.str();

}


std::string


FpRegRegRegRegOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    ss << ", ";

    printFloatReg(ss, op2);

    ss << ", ";

    printFloatReg(ss, op3);

    return ss.str();

}


std::string


FpRegRegRegImmOp::generateDisassembly(

        Addr pc, const loader::SymbolTable *symtab) const

{

    std::stringstream ss;

    printMnemonic(ss);

    printFloatReg(ss, dest);

    ss << ", ";

    printFloatReg(ss, op1);

    ss << ", ";

    printFloatReg(ss, op2);

    ccprintf(ss, ", #%d", imm);

    return ss.str();

}


namespace ArmISA

{


VfpSavedState


prepFpState(uint32_t rMode)

{

    int roundingMode = fegetround();

    feclearexcept(FeAllExceptions);

    switch (rMode) {

      case VfpRoundNearest:

        fesetround(FeRoundNearest);

        break;

      case VfpRoundUpward:

        fesetround(FeRoundUpward);

        break;

      case VfpRoundDown:

        fesetround(FeRoundDown);

        break;

      case VfpRoundZero:

        fesetround(FeRoundZero);

        break;

    }

    return roundingMode;

}


void


finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)

{

    int exceptions = fetestexcept(FeAllExceptions);

    bool underflow = false;

    if ((exceptions & FeInvalid) && mask.ioc) {

        fpscr.ioc = 1;

    }

    if ((exceptions & FeDivByZero) && mask.dzc) {

        fpscr.dzc = 1;

    }

    if ((exceptions & FeOverflow) && mask.ofc) {

        fpscr.ofc = 1;

    }

    if (exceptions & FeUnderflow) {

        underflow = true;

        if (mask.ufc)

            fpscr.ufc = 1;

    }

    if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {

        fpscr.ixc = 1;

    }

    fesetround(state);

}


template <class fpType>

fpType


fixDest(bool flush, bool defaultNan, fpType val, fpType op1)

{

    int fpClass = std::fpclassify(val);

    fpType junk = 0.0;

    if (fpClass == FP_NAN) {

        const bool single = (sizeof(val) == sizeof(float));

        const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;

        const bool nan = std::isnan(op1);

        if (!nan || defaultNan) {

            val = bitsToFp(qnan, junk);

        } else if (nan) {

            val = bitsToFp(fpToBits(op1) | qnan, junk);

        }

    } else if (fpClass == FP_SUBNORMAL && flush == 1) {

        // Turn val into a zero with the correct sign;

        uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);

        val = bitsToFp(fpToBits(val) & bitMask, junk);

        feclearexcept(FeInexact);

        feraiseexcept(FeUnderflow);

    }

    return val;

}


template

float fixDest<float>(bool flush, bool defaultNan, float val, float op1);

template

double fixDest<double>(bool flush, bool defaultNan, double val, double op1);


template <class fpType>

fpType


fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)

{

    int fpClass = std::fpclassify(val);

    fpType junk = 0.0;

    if (fpClass == FP_NAN) {

        const bool single = (sizeof(val) == sizeof(float));

        const uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;

        const bool nan1 = std::isnan(op1);

        const bool nan2 = std::isnan(op2);

        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);

        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

        if ((!nan1 && !nan2) || defaultNan) {

            val = bitsToFp(qnan, junk);

        } else if (signal1) {

            val = bitsToFp(fpToBits(op1) | qnan, junk);

        } else if (signal2) {

            val = bitsToFp(fpToBits(op2) | qnan, junk);

        } else if (nan1) {

            val = op1;

        } else if (nan2) {

            val = op2;

        }

    } else if (fpClass == FP_SUBNORMAL && flush) {

        // Turn val into a zero with the correct sign;

        uint64_t bitMask = 0x1ULL << (sizeof(fpType) * 8 - 1);

        val = bitsToFp(fpToBits(val) & bitMask, junk);

        feclearexcept(FeInexact);

        feraiseexcept(FeUnderflow);

    }

    return val;

}


template

float fixDest<float>(bool flush, bool defaultNan,

                     float val, float op1, float op2);

template

double fixDest<double>(bool flush, bool defaultNan,

                       double val, double op1, double op2);


template <class fpType>

fpType


fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)

{

    fpType mid = fixDest(flush, defaultNan, val, op1, op2);

    const bool single = (sizeof(fpType) == sizeof(float));

    const fpType junk = 0.0;

    if ((single && (val == bitsToFp(0x00800000, junk) ||

                    val == bitsToFp(0x80800000, junk))) ||

        (!single && (val == bitsToFp(0x0010000000000000ULL, junk) ||

                     val == bitsToFp(0x8010000000000000ULL, junk)))

        ) {

        __asm__ __volatile__("" : "=m" (op1) : "m" (op1));

        fesetround(FeRoundZero);

        fpType temp = 0.0;

        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));

        temp = op1 / op2;

        if (flushToZero(temp)) {

            feraiseexcept(FeUnderflow);

            if (flush) {

                feclearexcept(FeInexact);

                mid = temp;

            }

        }

        __asm__ __volatile__("" :: "m" (temp));

    }

    return mid;

}


template

float fixDivDest<float>(bool flush, bool defaultNan,

                        float val, float op1, float op2);

template

double fixDivDest<double>(bool flush, bool defaultNan,

                          double val, double op1, double op2);


float


fixFpDFpSDest(FPSCR fpscr, double val)

{

    const float junk = 0.0;

    float op1 = 0.0;

    if (std::isnan(val)) {

        uint64_t valBits = fpToBits(val);

        uint32_t op1Bits = bits(valBits, 50, 29) |

                           (mask(9) << 22) |

                           (bits(valBits, 63) << 31);

        op1 = bitsToFp(op1Bits, junk);

    }

    float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);

    if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==

                    (FeUnderflow | FeInexact)) {

        feclearexcept(FeInexact);

    }

    if (mid == bitsToFp(0x00800000, junk) ||

        mid == bitsToFp(0x80800000, junk)) {

        __asm__ __volatile__("" : "=m" (val) : "m" (val));

        fesetround(FeRoundZero);

        float temp = 0.0;

        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));

        temp = val;

        if (flushToZero(temp)) {

            feraiseexcept(FeUnderflow);

            if (fpscr.fz) {

                feclearexcept(FeInexact);

                mid = temp;

            }

        }

        __asm__ __volatile__("" :: "m" (temp));

    }

    return mid;

}


double


fixFpSFpDDest(FPSCR fpscr, float val)

{

    const double junk = 0.0;

    double op1 = 0.0;

    if (std::isnan(val)) {

        uint32_t valBits = fpToBits(val);

        uint64_t op1Bits = ((uint64_t)bits(valBits, 21, 0) << 29) |

                           (mask(12) << 51) |

                           ((uint64_t)bits(valBits, 31) << 63);

        op1 = bitsToFp(op1Bits, junk);

    }

    double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);

    if (mid == bitsToFp(0x0010000000000000ULL, junk) ||

        mid == bitsToFp(0x8010000000000000ULL, junk)) {

        __asm__ __volatile__("" : "=m" (val) : "m" (val));

        fesetround(FeRoundZero);

        double temp = 0.0;

        __asm__ __volatile__("" : "=m" (temp) : "m" (temp));

        temp = val;

        if (flushToZero(temp)) {

            feraiseexcept(FeUnderflow);

            if (fpscr.fz) {

                feclearexcept(FeInexact);

                mid = temp;

            }

        }

        __asm__ __volatile__("" :: "m" (temp));

    }

    return mid;

}


static inline uint16_t


vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,

          uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)

{

    uint32_t mWidth;

    uint32_t eWidth;

    uint32_t eHalfRange;

    uint32_t sBitPos;


    if (isDouble) {

        mWidth = 52;

        eWidth = 11;

    } else {

        mWidth = 23;

        eWidth = 8;

    }

    sBitPos    = eWidth + mWidth;

    eHalfRange = (1 << (eWidth-1)) - 1;


    // Extract the operand.

    bool neg = bits(opBits, sBitPos);

    uint32_t exponent = bits(opBits, sBitPos-1, mWidth);

    uint64_t oldMantissa = bits(opBits, mWidth-1, 0);

    uint32_t mantissa = oldMantissa >> (mWidth - 10);

    // Do the conversion.

    uint64_t extra = oldMantissa & mask(mWidth - 10);

    if (exponent == mask(eWidth)) {

        if (oldMantissa != 0) {

            // Nans.

            if (bits(mantissa, 9) == 0) {

                // Signalling nan.

                fpscr.ioc = 1;

            }

            if (ahp) {

                mantissa = 0;

                exponent = 0;

                fpscr.ioc = 1;

            } else if (defaultNan) {

                mantissa = (1 << 9);

                exponent = 0x1f;

                neg = false;

            } else {

                exponent = 0x1f;

                mantissa |= (1 << 9);

            }

        } else {

            // Infinities.

            exponent = 0x1F;

            if (ahp) {

                fpscr.ioc = 1;

                mantissa = 0x3ff;

            } else {

                mantissa = 0;

            }

        }

    } else if (exponent == 0 && oldMantissa == 0) {

        // Zero, don't need to do anything.

    } else {

        // Normalized or denormalized numbers.


        bool inexact = (extra != 0);


        if (exponent == 0) {

            // Denormalized.

            // If flush to zero is on, this shouldn't happen.

            assert(!flush);


            // Check for underflow

            if (inexact || fpscr.ufe)

                fpscr.ufc = 1;


            // Handle rounding.

            unsigned mode = rMode;

            if ((mode == VfpRoundUpward && !neg && extra) ||

                (mode == VfpRoundDown && neg && extra) ||

                (mode == VfpRoundNearest &&

                 (extra > (1 << 9) ||

                  (extra == (1 << 9) && bits(mantissa, 0))))) {

                mantissa++;

            }


            // See if the number became normalized after rounding.

            if (mantissa == (1 << 10)) {

                mantissa = 0;

                exponent = 1;

            }

        } else {

            // Normalized.


            // We need to track the dropped bits differently since

            // more can be dropped by denormalizing.

            bool topOne = bits(extra, mWidth - 10 - 1);

            bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;


            if (exponent <= (eHalfRange - 15)) {

                // The result is too small. Denormalize.

                mantissa |= (1 << 10);

                while (mantissa && exponent <= (eHalfRange - 15)) {

                    restZeros = restZeros && !topOne;

                    topOne = bits(mantissa, 0);

                    mantissa = mantissa >> 1;

                    exponent++;

                }

                if (topOne || !restZeros)

                    inexact = true;

                exponent = 0;

            } else {

                // Change bias.

                exponent -= (eHalfRange - 15);

            }


            if (exponent == 0 && (inexact || fpscr.ufe)) {

                // Underflow

                fpscr.ufc = 1;

            }


            // Handle rounding.

            unsigned mode = rMode;

            bool nonZero = topOne || !restZeros;

            if ((mode == VfpRoundUpward && !neg && nonZero) ||

                (mode == VfpRoundDown && neg && nonZero) ||

                (mode == VfpRoundNearest && topOne &&

                 (!restZeros || bits(mantissa, 0)))) {

                mantissa++;

            }


            // See if we rounded up and need to bump the exponent.

            if (mantissa == (1 << 10)) {

                mantissa = 0;

                exponent++;

            }


            // Deal with overflow

            if (ahp) {

                if (exponent >= 0x20) {

                    exponent = 0x1f;

                    mantissa = 0x3ff;

                    fpscr.ioc = 1;

                    // Supress inexact exception.

                    inexact = false;

                }

            } else {

                if (exponent >= 0x1f) {

                    if ((mode == VfpRoundNearest) ||

                        (mode == VfpRoundUpward && !neg) ||

                        (mode == VfpRoundDown && neg)) {

                        // Overflow to infinity.

                        exponent = 0x1f;

                        mantissa = 0;

                    } else {

                        // Overflow to max normal.

                        exponent = 0x1e;

                        mantissa = 0x3ff;

                    }

                    fpscr.ofc = 1;

                    inexact = true;

                }

            }

        }


        if (inexact) {

            fpscr.ixc = 1;

        }

    }

    // Reassemble and install the result.

    uint32_t result = bits(mantissa, 9, 0);

    replaceBits(result, 14, 10, exponent);

    if (neg)

        result |= (1 << 15);

    return result;

}


uint16_t


vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,

           uint32_t rMode, bool ahp, float op)

{

    uint64_t opBits = fpToBits(op);

    return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);

}


uint16_t


vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,

           uint32_t rMode, bool ahp, double op)

{

    uint64_t opBits = fpToBits(op);

    return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);

}


static inline uint64_t


vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)

{

    uint32_t mWidth;

    uint32_t eWidth;

    uint32_t eHalfRange;

    uint32_t sBitPos;


    if (isDouble) {

        mWidth = 52;

        eWidth = 11;

    } else {

        mWidth = 23;

        eWidth = 8;

    }

    sBitPos    = eWidth + mWidth;

    eHalfRange = (1 << (eWidth-1)) - 1;


    // Extract the bitfields.

    bool neg = bits(op, 15);

    uint32_t exponent = bits(op, 14, 10);

    uint64_t mantissa = bits(op, 9, 0);

    // Do the conversion.

    if (exponent == 0) {

        if (mantissa != 0) {

            // Normalize the value.

            exponent = exponent + (eHalfRange - 15) + 1;

            while (mantissa < (1 << 10)) {

                mantissa = mantissa << 1;

                exponent--;

            }

        }

        mantissa = mantissa << (mWidth - 10);

    } else if (exponent == 0x1f && !ahp) {

        // Infinities and nans.

        exponent = mask(eWidth);

        if (mantissa != 0) {

            // Nans.

            mantissa = mantissa << (mWidth - 10);

            if (bits(mantissa, mWidth-1) == 0) {

                // Signalling nan.

                fpscr.ioc = 1;

                mantissa |= (((uint64_t) 1) << (mWidth-1));

            }

            if (defaultNan) {

                mantissa &= ~mask(mWidth-1);

                neg = false;

            }

        }

    } else {

        exponent = exponent + (eHalfRange - 15);

        mantissa = mantissa << (mWidth - 10);

    }

    // Reassemble the result.

    uint64_t result = bits(mantissa, mWidth-1, 0);

    replaceBits(result, sBitPos-1, mWidth, exponent);

    if (neg) {

        result |= (((uint64_t) 1) << sBitPos);

    }

    return result;

}


double


vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)

{

    double junk = 0.0;

    uint64_t result;


    result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);

    return bitsToFp(result, junk);

}


float


vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)

{

    float junk = 0.0;

    uint64_t result;


    result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);

    return bitsToFp(result, junk);

}


float


vfpUFixedToFpS(bool flush, bool defaultNan,

        uint64_t val, uint8_t width, uint8_t imm)

{

    fesetround(FeRoundNearest);

    if (width == 16)

        val = (uint16_t)val;

    else if (width == 32)

        val = (uint32_t)val;

    else if (width != 64)

        panic("Unsupported width %d", width);

    float scale = powf(2.0, imm);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    feclearexcept(FeAllExceptions);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);

}


float


vfpSFixedToFpS(bool flush, bool defaultNan,

        int64_t val, uint8_t width, uint8_t imm)

{

    fesetround(FeRoundNearest);

    if (width == 16)

        val = szext<16>(val);

    else if (width == 32)

        val = szext<32>(val);

    else if (width != 64)

        panic("Unsupported width %d", width);


    float scale = powf(2.0, imm);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    feclearexcept(FeAllExceptions);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);

}


double


vfpUFixedToFpD(bool flush, bool defaultNan,

        uint64_t val, uint8_t width, uint8_t imm)

{

    fesetround(FeRoundNearest);

    if (width == 16)

        val = (uint16_t)val;

    else if (width == 32)

        val = (uint32_t)val;

    else if (width != 64)

        panic("Unsupported width %d", width);


    double scale = pow(2.0, imm);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    feclearexcept(FeAllExceptions);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);

}


double


vfpSFixedToFpD(bool flush, bool defaultNan,

        int64_t val, uint8_t width, uint8_t imm)

{

    fesetround(FeRoundNearest);

    if (width == 16)

        val = szext<16>(val);

    else if (width == 32)

        val = szext<32>(val);

    else if (width != 64)

        panic("Unsupported width %d", width);


    double scale = pow(2.0, imm);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    feclearexcept(FeAllExceptions);

    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));

    return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);

}


// This function implements a magic formula taken from the architecture

// reference manual. It was originally called recip_sqrt_estimate.

static double


recipSqrtEstimate(double a)

{

    int64_t q0, q1, s;

    double r;

    if (a < 0.5) {

        q0 = (int64_t)(a * 512.0);

        r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);

    } else {

        q1 = (int64_t)(a * 256.0);

        r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);

    }

    s = (int64_t)(256.0 * r + 0.5);

    return (double)s / 256.0;

}


// This function is only intended for use in Neon instructions because

// it ignores certain bits in the FPSCR.

float


fprSqrtEstimate(FPSCR &fpscr, float op)

{

    const uint32_t qnan = 0x7fc00000;

    float junk = 0.0;

    int fpClass = std::fpclassify(op);

    if (fpClass == FP_NAN) {

        if ((fpToBits(op) & qnan) != qnan)

            fpscr.ioc = 1;

        return bitsToFp(qnan, junk);

    } else if (fpClass == FP_ZERO) {

        fpscr.dzc = 1;

        // Return infinity with the same sign as the operand.

        return bitsToFp((std::signbit(op) << 31) |

                       (0xFF << 23) | (0 << 0), junk);

    } else if (std::signbit(op)) {

        // Set invalid op bit.

        fpscr.ioc = 1;

        return bitsToFp(qnan, junk);

    } else if (fpClass == FP_INFINITE) {

        return 0.0;

    } else {

        uint64_t opBits = fpToBits(op);

        double scaled;

        if (bits(opBits, 23)) {

            scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |

                              (0x3fdULL << 52) | (bits(opBits, 31) << 63),

                              (double)0.0);

        } else {

            scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |

                              (0x3feULL << 52) | (bits(opBits, 31) << 63),

                              (double)0.0);

        }

        uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;


        uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));


        return bitsToFp((bits(estimate, 63) << 31) |

                        (bits(resultExp, 7, 0) << 23) |

                        (bits(estimate, 51, 29) << 0), junk);

    }

}


uint16_t


fprSqrtEstimateFpH(FPSCR &fpscr, uint16_t op)

{

    const uint16_t qnan = 0x7e00;

    bool sign = bits(op, 15);

    int fpClass = fpclassifyFpH(op);

    if (fpClass == FP_NAN) {

        if ((op & qnan) != qnan)

            fpscr.ioc = 1;

        return qnan;

    } else if ((fpscr.fz16 && fpClass == FP_SUBNORMAL)

               || fpClass == FP_ZERO) {

        fpscr.dzc = 1;

        // Return infinity with the same sign as the operand.

        return (sign << 15) | (0x1F << 10) | (0 << 0);

    } else if (sign) {

        // Set invalid op bit.

        fpscr.ioc = 1;

        return qnan;

    } else if (fpClass == FP_INFINITE) {

        return 0;

    } else {

        uint64_t opBits = op;

        uint64_t fraction = bits(opBits, 9, 0) << 42;

        int16_t exp = bits(opBits, 14, 10);


        if (exp == 0) {

            while (bits(fraction, 51) == 0) {

                fraction = bits(fraction, 50, 0) << 1;

                exp = exp - 1;

            }

            fraction = bits(fraction, 50, 0) << 1;

        }


        // scaled input value to the range of [0.25, 0.5) and [0.5, 1).

        double scaled;

        if (bits(exp, 0)) {

            scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 42) << 42) |

                              (0x3fdULL << 52) | (0ULL << 63),

                              (double)0.0);

        } else {

            scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 41) << 41) |

                              (0x3feULL << 52) | (0ULL << 63),

                              (double)0.0);

        }

        uint64_t resultExp = (44 - exp) / 2;


        uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));


        // if flush-to-zero, flush denormal.

        if (fpscr.fz16) {

            if (resultExp == 0) {

                return 0;

            }

        }


        return (bits(resultExp, 4, 0) << 10) | (bits(estimate, 51, 44) << 2);

    }

}


uint32_t


unsignedRSqrtEstimate(uint32_t op)

{

    if (bits(op, 31, 30) == 0) {

        return -1;

    } else {

        double dpOp;

        if (bits(op, 31)) {

            dpOp = bitsToFp((0ULL << 63) |

                            (0x3feULL << 52) |

                            (bits((uint64_t)op, 30, 0) << 21) |

                            (0 << 0), (double)0.0);

        } else {

            dpOp = bitsToFp((0ULL << 63) |

                            (0x3fdULL << 52) |

                            (bits((uint64_t)op, 29, 0) << 22) |

                            (0 << 0), (double)0.0);

        }

        uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));

        return (1 << 31) | bits(estimate, 51, 21);

    }

}


// This function implements a magic formula taken from the architecture

// reference manual. It was originally called recip_estimate.


static double


recipEstimate(double a)

{

    int64_t q, s;

    double r;

    q = (int64_t)(a * 512.0);

    r = 1.0 / (((double)q + 0.5) / 512.0);

    s = (int64_t)(256.0 * r + 0.5);

    return (double)s / 256.0;

}


// This function is only intended for use in Neon instructions because

// it ignores certain bits in the FPSCR.

float


fpRecipEstimate(FPSCR &fpscr, float op)

{

    const uint32_t qnan = 0x7fc00000;

    float junk = 0.0;

    int fpClass = std::fpclassify(op);

    if (fpClass == FP_NAN) {

        if ((fpToBits(op) & qnan) != qnan)

            fpscr.ioc = 1;

        return bitsToFp(qnan, junk);

    } else if (fpClass == FP_INFINITE) {

        return bitsToFp(std::signbit(op) << 31, junk);

    } else if (fpClass == FP_ZERO) {

        fpscr.dzc = 1;

        // Return infinity with the same sign as the operand.

        return bitsToFp((std::signbit(op) << 31) |

                       (0xFF << 23) | (0 << 0), junk);

    } else if (fabs(op) >= pow(2.0, 126)) {

        fpscr.ufc = 1;

        return bitsToFp(std::signbit(op) << 31, junk);

    } else {

        uint64_t opBits = fpToBits(op);

        double scaled;

        scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |

                          (0x3feULL << 52) | (0ULL << 63),

                          (double)0.0);

        uint64_t resultExp = 253 - bits(opBits, 30, 23);


        uint64_t estimate = fpToBits(recipEstimate(scaled));


        return bitsToFp((bits(opBits, 31) << 31) |

                        (bits(resultExp, 7, 0) << 23) |

                        (bits(estimate, 51, 29) << 0), junk);

    }

}


uint16_t


fpRecipEstimateFpH(FPSCR &fpscr, uint16_t op)

{

    const uint16_t qnan = 0x7e00;

    bool sign = bits(op, 15);

    int fpClass = fpclassifyFpH(op);

    if (fpClass == FP_NAN) {

        if ((op & qnan) != qnan)

            fpscr.ioc = 1;

        return qnan;

    } else if (fpClass == FP_INFINITE) {

        return sign << 15;

    } else if ((fpscr.fz16 && fpClass == FP_SUBNORMAL)

               || fpClass == FP_ZERO) {

        fpscr.dzc = 1;

        // Return infinity with the same sign as the operand.

        return (sign << 15) | (0x1F << 10) | (0 << 0);

    } else if (bits(op, 14, 8) == 0) {

        fpscr.ofc = 1;

        fpscr.ixc = 1;

        return (sign << 15) | (0x1F << 10) | (0 << 0);

    } else if (fpscr.fz16 && bits(op, 14, 10) >= 29) {

        fpscr.ufc = 1;

        return sign << 15;

    } else {

        uint64_t opBits = op;

        uint64_t fraction = bits(opBits, 9, 0) << 42;

        int16_t exp = bits(opBits, 14, 10);


        if (exp == 0) {

            if (bits(fraction, 51) == 0) {

                exp = -1;

                fraction = bits(fraction, 49, 0) << 2;

            } else {

                fraction = bits(fraction, 50, 0) << 1;

            }

        }


        // scaled input value to the range of [0.5, 1)

        double scaled;

        scaled = bitsToFp((0 << 0) | (bits(fraction, 51, 44) << 44) |

                          (0x3feULL << 52) | (0ULL << 63),

                          (double)0.0);

        uint64_t resultExp = 29 - exp;


        uint64_t estimate = fpToBits(recipEstimate(scaled));

        fraction = bits(estimate, 51, 0);


        if (resultExp == 0) {

            fraction = (1ULL << 51) | bits(fraction, 51, 1);

        } else if (resultExp == -1) {

            fraction = (1ULL << 50) | bits(fraction, 51, 2);

            resultExp = 0;

        }


        // if flush-to-zero, flush denormal.

        if (fpscr.fz16) {

            if (resultExp == 0) {

                return 0;

            }

        }


        return (sign << 15) | (bits(resultExp, 4, 0) << 10) |

               (bits(fraction, 51, 42) << 0);

    }

}


uint32_t


unsignedRecipEstimate(uint32_t op)

{

    if (bits(op, 31) == 0) {

        return -1;

    } else {

        double dpOp;

        dpOp = bitsToFp((0ULL << 63) |

                        (0x3feULL << 52) |

                        (bits((uint64_t)op, 30, 0) << 21) |

                        (0 << 0), (double)0.0);

        uint64_t estimate = fpToBits(recipEstimate(dpOp));

        return (1 << 31) | bits(estimate, 51, 21);

    }

}


FPSCR


fpStandardFPSCRValue(const FPSCR &fpscr)

{

    FPSCR new_fpscr(0);

    new_fpscr.ahp = fpscr.ahp;

    new_fpscr.dn = 1;

    new_fpscr.fz = 1;

    new_fpscr.fz16 = fpscr.fz16;

    return new_fpscr;

};


template <class fpType>

fpType


FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,

                  fpType op1, fpType op2) const

{

    done = true;

    fpType junk = 0.0;

    fpType dest = 0.0;

    const bool single = (sizeof(fpType) == sizeof(float));

    const uint64_t qnan =

        single ? 0x7fc00000 : 0x7ff8000000000000ULL;

    const bool nan1 = std::isnan(op1);

    const bool nan2 = std::isnan(op2);

    const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);

    const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

    if (nan1 || nan2) {

        if (defaultNan) {

            dest = bitsToFp(qnan, junk);

        }  else if (signal1) {

            dest = bitsToFp(fpToBits(op1) | qnan, junk);

        } else if (signal2) {

            dest = bitsToFp(fpToBits(op2) | qnan, junk);

        } else if (nan1) {

            dest = op1;

        } else if (nan2) {

            dest = op2;

        }

        if (signal1 || signal2) {

            fpscr.ioc = 1;

        }

    } else {

        done = false;

    }

    return dest;

}


template

float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,

                        float op1, float op2) const;

template

double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,

                         double op1, double op2) const;


// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB

template <class fpType>

fpType


FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,

                fpType (*func)(fpType, fpType, fpType),

                bool flush, bool defaultNan, uint32_t rMode) const

{

    const bool single = (sizeof(fpType) == sizeof(float));

    fpType junk = 0.0;


    if (flush && (flushToZero(op1, op2) || flushToZero(op3)))

        fpscr.idc = 1;

    VfpSavedState state = prepFpState(rMode);

    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)

                             :  "m" (op1),  "m" (op2),  "m" (op3),  "m" (state));

    fpType dest = func(op1, op2, op3);

    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));


    int fpClass = std::fpclassify(dest);

    // Get NAN behavior right. This varies between x86 and ARM.

    if (fpClass == FP_NAN) {

        const uint64_t qnan =

            single ? 0x7fc00000 : 0x7ff8000000000000ULL;

        const bool nan1 = std::isnan(op1);

        const bool nan2 = std::isnan(op2);

        const bool nan3 = std::isnan(op3);

        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);

        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

        const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);

        if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {

            dest = bitsToFp(qnan, junk);

        } else if (signal1) {

            dest = bitsToFp(fpToBits(op1) | qnan, junk);

        } else if (signal2) {

            dest = bitsToFp(fpToBits(op2) | qnan, junk);

        } else if (signal3) {

            dest = bitsToFp(fpToBits(op3) | qnan, junk);

        } else if (nan1) {

            dest = op1;

        } else if (nan2) {

            dest = op2;

        } else if (nan3) {

            dest = op3;

        }

    } else if (flush && flushToZero(dest)) {

        feraiseexcept(FeUnderflow);

    } else if ((

                (single && (dest == bitsToFp(0x00800000, junk) ||

                     dest == bitsToFp(0x80800000, junk))) ||

                (!single &&

                    (dest == bitsToFp(0x0010000000000000ULL, junk) ||

                     dest == bitsToFp(0x8010000000000000ULL, junk)))

               ) && rMode != VfpRoundZero) {

        /*

         * Correct for the fact that underflow is detected -before- rounding

         * in ARM and -after- rounding in x86.

         */

        fesetround(FeRoundZero);

        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)

                                 :  "m" (op1),  "m" (op2),  "m" (op3));

        fpType temp = func(op1, op2, op2);

        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));

        if (flush && flushToZero(temp)) {

            dest = temp;

        }

    }

    finishVfp(fpscr, state, flush);

    return dest;

}


template

float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,

                      float (*func)(float, float, float),

                      bool flush, bool defaultNan, uint32_t rMode) const;

template

double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,

                       double (*func)(double, double, double),

                       bool flush, bool defaultNan, uint32_t rMode) const;


template <class fpType>

fpType


FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,

               fpType (*func)(fpType, fpType),

               bool flush, bool defaultNan, uint32_t rMode) const

{

    const bool single = (sizeof(fpType) == sizeof(float));

    fpType junk = 0.0;


    if (flush && flushToZero(op1, op2))

        fpscr.idc = 1;

    VfpSavedState state = prepFpState(rMode);

    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (state)

                             : "m" (op1), "m" (op2), "m" (state));

    fpType dest = func(op1, op2);

    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));


    // Get NAN behavior right. This varies between x86 and ARM.

    if (std::isnan(dest)) {

        const uint64_t qnan =

            single ? 0x7fc00000 : 0x7ff8000000000000ULL;

        const bool nan1 = std::isnan(op1);

        const bool nan2 = std::isnan(op2);

        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);

        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);

        if ((!nan1 && !nan2) || (defaultNan == 1)) {

            dest = bitsToFp(qnan, junk);

        } else if (signal1) {

            dest = bitsToFp(fpToBits(op1) | qnan, junk);

        } else if (signal2) {

            dest = bitsToFp(fpToBits(op2) | qnan, junk);

        } else if (nan1) {

            dest = op1;

        } else if (nan2) {

            dest = op2;

        }

    } else if (flush && flushToZero(dest)) {

        feraiseexcept(FeUnderflow);

    } else if ((

                (single && (dest == bitsToFp(0x00800000, junk) ||

                     dest == bitsToFp(0x80800000, junk))) ||

                (!single &&

                    (dest == bitsToFp(0x0010000000000000ULL, junk) ||

                     dest == bitsToFp(0x8010000000000000ULL, junk)))

               ) && rMode != VfpRoundZero) {

        /*

         * Correct for the fact that underflow is detected -before- rounding

         * in ARM and -after- rounding in x86.

         */

        fesetround(FeRoundZero);

        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2)

                                 : "m" (op1), "m" (op2));

        fpType temp = func(op1, op2);

        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));

        if (flush && flushToZero(temp)) {

            dest = temp;

        }

    }

    finishVfp(fpscr, state, flush);

    return dest;

}


template

float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,

                     float (*func)(float, float),

                     bool flush, bool defaultNan, uint32_t rMode) const;

template

double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,

                      double (*func)(double, double),

                      bool flush, bool defaultNan, uint32_t rMode) const;


template <class fpType>

fpType


FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),

              bool flush, uint32_t rMode) const

{

    const bool single = (sizeof(fpType) == sizeof(float));

    fpType junk = 0.0;


    if (flush && flushToZero(op1))

        fpscr.idc = 1;

    VfpSavedState state = prepFpState(rMode);

    __asm__ __volatile__ ("" : "=m" (op1), "=m" (state)

                             : "m" (op1), "m" (state));

    fpType dest = func(op1);

    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));


    // Get NAN behavior right. This varies between x86 and ARM.

    if (std::isnan(dest)) {

        const uint64_t qnan =

            single ? 0x7fc00000 : 0x7ff8000000000000ULL;

        const bool nan = std::isnan(op1);

        if (!nan || fpscr.dn == 1) {

            dest = bitsToFp(qnan, junk);

        } else if (nan) {

            dest = bitsToFp(fpToBits(op1) | qnan, junk);

        }

    } else if (flush && flushToZero(dest)) {

        feraiseexcept(FeUnderflow);

    } else if ((

                (single && (dest == bitsToFp(0x00800000, junk) ||

                     dest == bitsToFp(0x80800000, junk))) ||

                (!single &&

                    (dest == bitsToFp(0x0010000000000000ULL, junk) ||

                     dest == bitsToFp(0x8010000000000000ULL, junk)))

               ) && rMode != VfpRoundZero) {

        /*

         * Correct for the fact that underflow is detected -before- rounding

         * in ARM and -after- rounding in x86.

         */

        fesetround(FeRoundZero);

        __asm__ __volatile__ ("" : "=m" (op1) : "m" (op1));

        fpType temp = func(op1);

        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));

        if (flush && flushToZero(temp)) {

            dest = temp;

        }

    }

    finishVfp(fpscr, state, flush);

    return dest;

}


template

float FpOp::unaryOp(FPSCR &fpscr, float op1, float (*func)(float),

                    bool flush, uint32_t rMode) const;

template

double FpOp::unaryOp(FPSCR &fpscr, double op1, double (*func)(double),

                     bool flush, uint32_t rMode) const;


RegIndex


VfpMacroOp::addStride(RegIndex idx, unsigned stride)

{

    if (wide) {

        stride *= 2;

    }

    unsigned offset = idx % 8;

    idx = (RegIndex)(idx - offset);

    offset += stride;

    idx = (RegIndex)(idx + (offset % 8));

    return idx;

}


void


VfpMacroOp::nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2)

{

    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;

    assert(!inScalarBank(dest));

    dest = addStride(dest, stride);

    op1 = addStride(op1, stride);

    if (!inScalarBank(op2)) {

        op2 = addStride(op2, stride);

    }

}


void


VfpMacroOp::nextIdxs(RegIndex &dest, RegIndex &op1)

{

    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;

    assert(!inScalarBank(dest));

    dest = addStride(dest, stride);

    if (!inScalarBank(op1)) {

        op1 = addStride(op1, stride);

    }

}


void


VfpMacroOp::nextIdxs(RegIndex &dest)

{

    unsigned stride = (machInst.fpscrStride == 0) ? 1 : 2;

    assert(!inScalarBank(dest));

    dest = addStride(dest, stride);

}


FPSCR


fpVASimdFPSCRValue(const FPSCR &fpscr)

{

    FPSCR new_fpscr(0);

    new_fpscr.ahp = 0;  // bit 26

    new_fpscr.dn = 1;   // bit 25

    new_fpscr.fz = 1;   // bit 24

    new_fpscr.rMode = VfpRoundNearest;  // bit 23:22

    new_fpscr.fz16 = fpscr.fz16;    // bit 19

    return new_fpscr;

}


FPSCR


fpVASimdCvtFPSCRValue(const FPSCR &fpscr)

{

    FPSCR new_fpscr(0);

    new_fpscr.ahp = fpscr.ahp;  // bit 26

    new_fpscr.dn = 1;   // bit 25

    new_fpscr.fz = 1;   // bit 24

    new_fpscr.rMode = VfpRoundNearest;  // bit 23:22

    new_fpscr.fz16 = fpscr.fz16;    // bit 19

    return new_fpscr;

}


FPSCR


fpRestoreFPSCRValue(const FPSCR fpscr_exec, const FPSCR &fpscr)

{

    FPSCR new_fpscr(fpscr_exec);

    new_fpscr.idc = fpscr_exec.idc | fpscr.idc;     // bit 7

    new_fpscr.ixc = fpscr_exec.ixc | fpscr.ixc;     // bit 4

    new_fpscr.ufc = fpscr_exec.ufc | fpscr.ufc;     // bit 3

    new_fpscr.ofc = fpscr_exec.ofc | fpscr.ofc;     // bit 2

    new_fpscr.dzc = fpscr_exec.dzc | fpscr.dzc;     // bit 1

    new_fpscr.ioc = fpscr_exec.ioc | fpscr.ioc;     // bit 0

    return new_fpscr;

}


} // namespace ArmISA

} // namespace gem5

gem5::ArmISA::ArmStaticInst::printCondition
void printCondition(std::ostream &os, unsigned code, bool noImplicit=false) const
Definition static_inst.cc:418

gem5::ArmISA::ArmStaticInst::printMnemonic
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
Definition static_inst.cc:378

gem5::ArmISA::ArmStaticInst::machInst
ExtMachInst machInst
Definition static_inst.hh:151

gem5::ArmISA::ArmStaticInst::printIntReg
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
Definition static_inst.cc:299

gem5::ArmISA::ArmStaticInst::printFloatReg
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
Definition static_inst.cc:345

gem5::ArmISA::FpCondCompRegOp::condCode
ConditionCode condCode
Definition vfp.hh:1065

gem5::ArmISA::FpCondCompRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:52

gem5::ArmISA::FpCondCompRegOp::defCc
uint8_t defCc
Definition vfp.hh:1066

gem5::ArmISA::FpCondCompRegOp::op2
RegIndex op2
Definition vfp.hh:1064

gem5::ArmISA::FpCondCompRegOp::op1
RegIndex op1
Definition vfp.hh:1064

gem5::ArmISA::FpCondSelOp::op2
RegIndex op2
Definition vfp.hh:1082

gem5::ArmISA::FpCondSelOp::op1
RegIndex op1
Definition vfp.hh:1082

gem5::ArmISA::FpCondSelOp::condCode
ConditionCode condCode
Definition vfp.hh:1083

gem5::ArmISA::FpCondSelOp::dest
RegIndex dest
Definition vfp.hh:1082

gem5::ArmISA::FpCondSelOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:67

gem5::ArmISA::FpOp::unaryOp
fpType unaryOp(FPSCR &fpscr, fpType op1, fpType(*func)(fpType), bool flush, uint32_t rMode) const
Definition vfp.cc:1236

gem5::ArmISA::FpOp::processNans
fpType processNans(FPSCR &fpscr, bool &done, bool defaultNan, fpType op1, fpType op2) const
Definition vfp.cc:1043

gem5::ArmISA::FpOp::ternaryOp
fpType ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, fpType(*func)(fpType, fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:1087

gem5::ArmISA::FpOp::binaryOp
fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType(*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const
Definition vfp.cc:1165

gem5::ArmISA::FpRegImmOp::dest
RegIndex dest
Definition vfp.hh:1117

gem5::ArmISA::FpRegImmOp::imm
uint64_t imm
Definition vfp.hh:1118

gem5::ArmISA::FpRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:95

gem5::ArmISA::FpRegRegImmOp::dest
RegIndex dest
Definition vfp.hh:1135

gem5::ArmISA::FpRegRegImmOp::op1
RegIndex op1
Definition vfp.hh:1136

gem5::ArmISA::FpRegRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:106

gem5::ArmISA::FpRegRegImmOp::imm
uint64_t imm
Definition vfp.hh:1137

gem5::ArmISA::FpRegRegOp::dest
RegIndex dest
Definition vfp.hh:1099

gem5::ArmISA::FpRegRegOp::op1
RegIndex op1
Definition vfp.hh:1100

gem5::ArmISA::FpRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:83

gem5::ArmISA::FpRegRegRegCondOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:133

gem5::ArmISA::FpRegRegRegCondOp::op1
RegIndex op1
Definition vfp.hh:1174

gem5::ArmISA::FpRegRegRegCondOp::op2
RegIndex op2
Definition vfp.hh:1175

gem5::ArmISA::FpRegRegRegCondOp::dest
RegIndex dest
Definition vfp.hh:1173

gem5::ArmISA::FpRegRegRegCondOp::cond
ConditionCode cond
Definition vfp.hh:1176

gem5::ArmISA::FpRegRegRegImmOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:164

gem5::ArmISA::FpRegRegRegImmOp::dest
RegIndex dest
Definition vfp.hh:1216

gem5::ArmISA::FpRegRegRegImmOp::op1
RegIndex op1
Definition vfp.hh:1217

gem5::ArmISA::FpRegRegRegImmOp::op2
RegIndex op2
Definition vfp.hh:1218

gem5::ArmISA::FpRegRegRegImmOp::imm
uint64_t imm
Definition vfp.hh:1219

gem5::ArmISA::FpRegRegRegOp::dest
RegIndex dest
Definition vfp.hh:1154

gem5::ArmISA::FpRegRegRegOp::op2
RegIndex op2
Definition vfp.hh:1156

gem5::ArmISA::FpRegRegRegOp::op1
RegIndex op1
Definition vfp.hh:1155

gem5::ArmISA::FpRegRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:119

gem5::ArmISA::FpRegRegRegRegOp::op3
RegIndex op3
Definition vfp.hh:1198

gem5::ArmISA::FpRegRegRegRegOp::dest
RegIndex dest
Definition vfp.hh:1195

gem5::ArmISA::FpRegRegRegRegOp::op2
RegIndex op2
Definition vfp.hh:1197

gem5::ArmISA::FpRegRegRegRegOp::op1
RegIndex op1
Definition vfp.hh:1196

gem5::ArmISA::FpRegRegRegRegOp::generateDisassembly
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition vfp.cc:148

gem5::ArmISA::VfpMacroOp::inScalarBank
static bool inScalarBank(RegIndex idx)
Definition vfp.hh:621

gem5::ArmISA::VfpMacroOp::nextIdxs
void nextIdxs(RegIndex &dest, RegIndex &op1, RegIndex &op2)
Definition vfp.cc:1306

gem5::ArmISA::VfpMacroOp::addStride
RegIndex addStride(RegIndex idx, unsigned stride)
Definition vfp.cc:1293

gem5::ArmISA::VfpMacroOp::wide
bool wide
Definition vfp.hh:627

gem5::loader::SymbolTable
Definition symtab.hh:152

gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79

gem5::szext
constexpr uint64_t szext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:161

gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220

gem5::ArmISA
Definition decoder.cc:55

gem5::ArmISA::flushToZero
flushToZero
Definition misc_types.hh:585

gem5::ArmISA::unsignedRecipEstimate
uint32_t unsignedRecipEstimate(uint32_t op)
Definition vfp.cc:1015

gem5::ArmISA::FeRoundZero
@ FeRoundZero
Definition vfp.hh:102

gem5::ArmISA::FeRoundNearest
@ FeRoundNearest
Definition vfp.hh:101

gem5::ArmISA::FeRoundUpward
@ FeRoundUpward
Definition vfp.hh:103

gem5::ArmISA::FeRoundDown
@ FeRoundDown
Definition vfp.hh:100

gem5::ArmISA::fpToBits
static uint32_t fpToBits(float)
Definition vfp.hh:203

gem5::ArmISA::fixDivDest
fpType fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
Definition vfp.cc:301

gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition pcstate.hh:63

gem5::ArmISA::vcvtFpFpH
static uint16_t vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
Definition vfp.cc:404

gem5::ArmISA::fpRecipEstimateFpH
uint16_t fpRecipEstimateFpH(FPSCR &fpscr, uint16_t op)
Definition vfp.cc:948

gem5::ArmISA::vfpSFixedToFpD
double vfpSFixedToFpD(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:731

gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition misc_types.hh:74

gem5::ArmISA::width
Bitfield< 4 > width
Definition misc_types.hh:72

gem5::ArmISA::stride
Bitfield< 21, 20 > stride
Definition misc_types.hh:533

gem5::ArmISA::imm
Bitfield< 7, 0 > imm
Definition types.hh:132

gem5::ArmISA::s
Bitfield< 4 > s
Definition misc_types.hh:675

gem5::ArmISA::q
Bitfield< 27 > q
Definition misc_types.hh:55

gem5::ArmISA::vcvtFpHFpD
double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:654

gem5::ArmISA::rMode
Bitfield< 23, 22 > rMode
Definition misc_types.hh:534

gem5::ArmISA::vcvtFpHFp
static uint64_t vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
Definition vfp.cc:592

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::ArmISA::fprSqrtEstimateFpH
uint16_t fprSqrtEstimateFpH(FPSCR &fpscr, uint16_t op)
Definition vfp.cc:813

gem5::ArmISA::vfpUFixedToFpS
float vfpUFixedToFpS(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:674

gem5::ArmISA::unsignedRSqrtEstimate
uint32_t unsignedRSqrtEstimate(uint32_t op)
Definition vfp.cc:873

gem5::ArmISA::prepFpState
VfpSavedState prepFpState(uint32_t rMode)
Definition vfp.cc:182

gem5::ArmISA::fixFpDFpSDest
float fixFpDFpSDest(FPSCR fpscr, double val)
Definition vfp.cc:336

gem5::ArmISA::VfpSavedState
int VfpSavedState
Definition vfp.hh:260

gem5::ArmISA::finishVfp
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
Definition vfp.cc:204

gem5::ArmISA::fixFpSFpDDest
double fixFpSFpDDest(FPSCR fpscr, float val)
Definition vfp.cc:372

gem5::ArmISA::FeUnderflow
@ FeUnderflow
Definition vfp.hh:94

gem5::ArmISA::FeDivByZero
@ FeDivByZero
Definition vfp.hh:90

gem5::ArmISA::FeInvalid
@ FeInvalid
Definition vfp.hh:92

gem5::ArmISA::FeOverflow
@ FeOverflow
Definition vfp.hh:93

gem5::ArmISA::FeAllExceptions
@ FeAllExceptions
Definition vfp.hh:95

gem5::ArmISA::FeInexact
@ FeInexact
Definition vfp.hh:91

gem5::ArmISA::vcvtFpHFpS
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
Definition vfp.cc:664

gem5::ArmISA::vfpSFixedToFpS
float vfpSFixedToFpS(bool flush, bool defaultNan, int64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:692

gem5::ArmISA::fixDest
fpType fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
Definition vfp.cc:230

gem5::ArmISA::a
Bitfield< 8 > a
Definition misc_types.hh:66

gem5::ArmISA::vfpUFixedToFpD
double vfpUFixedToFpD(bool flush, bool defaultNan, uint64_t val, uint8_t width, uint8_t imm)
Definition vfp.cc:712

gem5::ArmISA::bitsToFp
static float bitsToFp(uint64_t, float)
Definition vfp.hh:227

gem5::ArmISA::fpclassifyFpH
constexpr int fpclassifyFpH(uint16_t __x)
Definition vfp.hh:121

gem5::ArmISA::recipEstimate
static double recipEstimate(double a)
Definition vfp.cc:899

gem5::ArmISA::fpVASimdCvtFPSCRValue
FPSCR fpVASimdCvtFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1349

gem5::ArmISA::vcvtFpDFpH
uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, double op)
Definition vfp.cc:584

gem5::ArmISA::fpStandardFPSCRValue
FPSCR fpStandardFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1031

gem5::ArmISA::vcvtFpSFpH
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op)
Definition vfp.cc:576

gem5::ArmISA::ahp
Bitfield< 26 > ahp
Definition misc_types.hh:537

gem5::ArmISA::fprSqrtEstimate
float fprSqrtEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:770

gem5::ArmISA::recipSqrtEstimate
static double recipSqrtEstimate(double a)
Definition vfp.cc:752

gem5::ArmISA::fpRecipEstimate
float fpRecipEstimate(FPSCR &fpscr, float op)
Definition vfp.cc:912

gem5::ArmISA::VfpRoundNearest
@ VfpRoundNearest
Definition vfp.hh:108

gem5::ArmISA::VfpRoundZero
@ VfpRoundZero
Definition vfp.hh:111

gem5::ArmISA::VfpRoundUpward
@ VfpRoundUpward
Definition vfp.hh:109

gem5::ArmISA::VfpRoundDown
@ VfpRoundDown
Definition vfp.hh:110

gem5::ArmISA::fpRestoreFPSCRValue
FPSCR fpRestoreFPSCRValue(const FPSCR fpscr_exec, const FPSCR &fpscr)
Definition vfp.cc:1361

gem5::ArmISA::ss
Bitfield< 21 > ss
Definition misc_types.hh:60

gem5::ArmISA::fpVASimdFPSCRValue
FPSCR fpVASimdFPSCRValue(const FPSCR &fpscr)
Definition vfp.cc:1337

gem5::MipsISA::pc
Bitfield< 4 > pc
Definition pra_constants.hh:243

gem5::MipsISA::r
r
Definition pra_constants.hh:98

gem5::X86ISA::op
Bitfield< 4 > op
Definition types.hh:83

gem5::X86ISA::val
Bitfield< 63 > val
Definition misc.hh:804

gem5::X86ISA::scale
scale
Definition types.hh:97

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::RegIndex
uint16_t RegIndex
Definition types.hh:176

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::ccprintf
void ccprintf(cp::Print &print)
Definition cprintf.hh:130

std::isnan
constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a)
Definition fp16_e5m10.hh:83

vfp.hh