develop/gpu__dyn__inst_8hh_source.html

/*

 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __GPU_DYN_INST_HH__

#define __GPU_DYN_INST_HH__


#include <cstdint>

#include <memory>

#include <string>


#include "arch/amdgpu/common/dtype/packed_types.hh"

#include "base/amo.hh"

#include "base/logging.hh"

#include "base/trace.hh"

#include "debug/GPUMem.hh"

#include "enums/StorageClassType.hh"

#include "gpu-compute/compute_unit.hh"

#include "gpu-compute/gpu_exec_context.hh"

#include "gpu-compute/operand_info.hh"


namespace gem5

{


class GPUStaticInst;


template<typename T>


class AtomicOpPkAddBF16 : public TypedAtomicOpFunctor<T>

{

  public:

    T data;

    AtomicOpPkAddBF16(T _data) : data(_data) { }


    void


    execute([[maybe_unused]] T *b)

    {

        if constexpr (sizeof(T) == 4) {

            AMDGPU::PkBfloat16 pk_b, pk_data;

            pk_data = data;

            pk_b = *b;


            pk_b += pk_data;


            *b = pk_b.get();

        } else {

            fatal("Attempted packed atomic bf16 on non 32-bit type");

        }

    }


    AtomicOpFunctor* clone () { return new AtomicOpPkAddBF16(data); }

};


template<typename T>


class AtomicOpCAS : public TypedAtomicOpFunctor<T>

{

  public:

    T c;

    T s;


    ComputeUnit *computeUnit;


    AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)

      : c(_c), s(_s), computeUnit(compute_unit) { }


    void


    execute(T *b)

    {

        computeUnit->stats.numCASOps++;


        if (*b == c) {

            *b = s;

        } else {

            computeUnit->stats.numFailedCASOps++;

        }

    }


    AtomicOpFunctor* clone () { return new AtomicOpCAS(c, s, computeUnit); }

};


class RegisterOperandInfo

{

  public:

    RegisterOperandInfo() = delete;


    RegisterOperandInfo(int op_idx, int num_dwords,

                        const std::vector<int> &virt_indices,

                        const std::vector<int> &phys_indices)

        : opIdx(op_idx), numDWORDs(num_dwords), virtIndices(virt_indices),

          physIndices(phys_indices)

    {

    }


    int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWords; }

    int operandIdx() const { return opIdx; }

    int virtIdx(int reg_num=0) const { return virtIndices.at(reg_num); }


  private:

    const int opIdx;

    const int numDWORDs;

    const std::vector<int> virtIndices;

    const std::vector<int> physIndices;

};


class GPUDynInst : public GPUExecContext

{

  public:

    GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,

               uint64_t instSeqNum);

    ~GPUDynInst();

    void execute(GPUDynInstPtr gpuDynInst);


    const std::vector<OperandInfo>& srcVecRegOperands() const;

    const std::vector<OperandInfo>& dstVecRegOperands() const;

    const std::vector<OperandInfo>& srcScalarRegOperands() const;

    const std::vector<OperandInfo>& dstScalarRegOperands() const;


    int numSrcRegOperands();

    int numDstRegOperands();


    int numSrcVecRegOperands() const;

    int numDstVecRegOperands() const;

    int maxSrcVecRegOperandSize();

    int numSrcVecDWords();

    int numDstVecDWords();


    int numSrcScalarRegOperands() const;

    int numDstScalarRegOperands() const;

    int maxSrcScalarRegOperandSize();

    int numSrcScalarDWords();

    int numDstScalarDWords();


    int maxOperandSize();


    int getNumOperands() const;


    bool hasSourceSgpr() const;

    bool hasDestinationSgpr() const;

    bool hasSourceVgpr() const;

    bool hasDestinationVgpr() const;


    // returns true if the string "opcodeStr" is found in the

    // opcode of the instruction

    bool isOpcode(const std::string& opcodeStr) const;

    bool isOpcode(const std::string& opcodeStr,

                  const std::string& extStr) const;


    const std::string &disassemble() const;


    InstSeqNum seqNum() const;


    Addr pc();

    void pc(Addr _pc);


    enums::StorageClassType executedAs();


    // virtual address for scalar memory operations

    Addr scalarAddr;

    // virtual addressies for vector memory operations

    std::vector<Addr> addr;

    Addr pAddr;


    // vector data to get written

    uint8_t *d_data;

    // scalar data to be transferred

    uint8_t *scalar_data;

    // Additional data (for atomics)

    uint8_t *a_data;

    // Additional data (for atomics)

    uint8_t *x_data;

    // The execution mask

    VectorMask exec_mask;


    // SIMD where the WF of the memory instruction has been mapped to

    int simdId;

    // unique id of the WF where the memory instruction belongs to

    int wfDynId;

    // The kernel id of the requesting wf

    int kern_id;

    // The CU id of the requesting wf

    int cu_id;

    // The workgroup id of the requesting wf

    int wg_id;

    // HW slot id where the WF is mapped to inside a SIMD unit

    int wfSlotId;

    // execution pipeline id where the memory instruction has been scheduled

    int execUnitId;

    // The execution time of this operation

    Tick time;

    // The latency of this operation

    WaitClass latency;


    // Initiate the specified memory operation, by creating a

    // memory request and sending it off to the memory system.

    void initiateAcc(GPUDynInstPtr gpuDynInst);

    // Complete the specified memory operation, by writing

    // value back to the RF in the case of a load or atomic

    // return or, in the case of a store, we do nothing

    void completeAcc(GPUDynInstPtr gpuDynInst);


    void updateStats();


    GPUStaticInst* staticInstruction() { return _staticInst; }


    TheGpuISA::ScalarRegU32 srcLiteral() const;


    bool isALU() const;

    bool isBranch() const;

    bool isCondBranch() const;

    bool isNop() const;

    bool isReturn() const;

    bool isEndOfKernel() const;

    bool isKernelLaunch() const;

    bool isSDWAInst() const;

    bool isDPPInst() const;

    bool isUnconditionalJump() const;

    bool isSpecialOp() const;

    bool isWaitcnt() const;

    bool isSleep() const;


    bool isBarrier() const;

    bool isMemSync() const;

    bool isMemRef() const;

    bool isFlat() const;

    bool isFlatGlobal() const;

    bool isFlatScratch() const;

    bool isLoad() const;

    bool isStore() const;


    bool isAtomic() const;

    bool isAtomicNoRet() const;

    bool isAtomicRet() const;


    bool isScalar() const;

    bool isVector() const;

    bool readsSCC() const;

    bool writesSCC() const;

    bool readsVCC() const;

    bool writesVCC() const;

    bool readsExec() const;

    bool writesExec() const;

    bool readsMode() const;

    bool writesMode() const;

    bool ignoreExec() const;

    bool readsFlatScratch() const;

    bool writesFlatScratch() const;

    bool readsExecMask() const;

    bool writesExecMask() const;

    bool needsToken() const;


    bool isAtomicAnd() const;

    bool isAtomicOr() const;

    bool isAtomicXor() const;

    bool isAtomicCAS() const;

    bool isAtomicExch() const;

    bool isAtomicAdd() const;

    bool isAtomicSub() const;

    bool isAtomicInc() const;

    bool isAtomicDec() const;

    bool isAtomicMax() const;

    bool isAtomicMin() const;

    bool isAtomicPkAddBF16() const;


    bool isArgLoad() const;

    bool isGlobalMem() const;

    bool isLocalMem() const;


    bool isArgSeg() const;

    bool isGlobalSeg() const;

    bool isGroupSeg() const;

    bool isKernArgSeg() const;

    bool isPrivateSeg() const;

    bool isReadOnlySeg() const;

    bool isSpillSeg() const;


    bool isGloballyCoherent() const;

    bool isSystemCoherent() const;


    bool isI8() const;

    bool isF16() const;

    bool isF32() const;

    bool isF64() const;


    bool isFMA() const;

    bool isMAC() const;

    bool isMAD() const;

    bool isMFMA() const;


    // for FLAT memory ops. check the segment address

    // against the APE registers to see if it falls

    // within one of the APE ranges for LDS/SCRATCH/GPUVM.

    // if it does not fall into one of the three APEs, it

    // will be a regular global access.

    void doApertureCheck(const VectorMask &mask);

    // Function to resolve a flat accesses during execution stage.

    void resolveFlatSegment(const VectorMask &mask);


    template<typename c0> AtomicOpFunctorPtr


    makeAtomicOpFunctor(c0 *reg0, c0 *reg1)

    {

        if (isAtomicAnd()) {

            return std::make_unique<AtomicOpAnd<c0>>(*reg0);

        } else if (isAtomicOr()) {

            return std::make_unique<AtomicOpOr<c0>>(*reg0);

        } else if (isAtomicXor()) {

            return std::make_unique<AtomicOpXor<c0>>(*reg0);

        } else if (isAtomicCAS()) {

            return std::make_unique<AtomicOpCAS<c0>>(*reg0, *reg1, cu);

        } else if (isAtomicExch()) {

            return std::make_unique<AtomicOpExch<c0>>(*reg0);

        } else if (isAtomicAdd()) {

            return std::make_unique<AtomicOpAdd<c0>>(*reg0);

        } else if (isAtomicSub()) {

            return std::make_unique<AtomicOpSub<c0>>(*reg0);

        } else if (isAtomicInc()) {

            return std::make_unique<AtomicOpInc<c0>>();

        } else if (isAtomicDec()) {

            return std::make_unique<AtomicOpDec<c0>>();

        } else if (isAtomicMax()) {

            return std::make_unique<AtomicOpMax<c0>>(*reg0);

        } else if (isAtomicMin()) {

            return std::make_unique<AtomicOpMin<c0>>(*reg0);

        } else if (isAtomicPkAddBF16()) {

            return std::make_unique<AtomicOpPkAddBF16<c0>>(*reg0);

        } else {

            fatal("Unrecognized atomic operation");

        }

    }


    void


    setRequestFlags(RequestPtr req) const

    {

        if (isGloballyCoherent()) {

            req->setCacheCoherenceFlags(Request::GLC_BIT);

        }


        if (isSystemCoherent()) {

            req->setCacheCoherenceFlags(Request::SLC_BIT);

        }


        if (isAtomicRet()) {

            req->setFlags(Request::ATOMIC_RETURN_OP);

        } else if (isAtomicNoRet()) {

            req->setFlags(Request::ATOMIC_NO_RETURN_OP);

        }


        if (isMemSync()) {

            // the path for kernel launch and kernel end is different

            // from non-kernel mem sync.

            assert(!isKernelLaunch());

            assert(!isEndOfKernel());


            // must be wbinv inst if not kernel launch/end

            req->setCacheCoherenceFlags(Request::INV_L1);

        }

    }


    // reset the number of pending memory requests for all lanes

    void


    resetEntireStatusVector()

    {

        assert(statusVector.size() == TheGpuISA::NumVecElemPerVecReg);

        for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {

            resetStatusVector(lane);

        }

    }


    // reset the number of pending memory requests for the inputted lane

    void


    resetStatusVector(int lane)

    {

        setStatusVector(lane, 0);

    }


    // set the number of pending memory requests for the inputted lane

    void


    setStatusVector(int lane, int newVal)

    {

        // Currently we can have up to 4 memory requests per lane. This can

        // occur on a memory request loading 4x dwords where the memory is

        // swizzled.

        assert((newVal >= 0) && (newVal <= 4));

        statusVector[lane] = newVal;

    }


    // subtracts the number of pending memory requests for the inputted lane

    // by 1

    void


    decrementStatusVector(int lane)

    {

        // this lane may have multiple requests, so only subtract one for

        // this request

        assert(statusVector[lane] >= 1);

        statusVector[lane]--;

    }


    // return the current number of pending memory requests for the inputted

    // lane

    int


    getLaneStatus(int lane) const

    {

        return statusVector[lane];

    }


    // returns true if all memory requests from all lanes have been received,

    // else returns false

    bool


    allLanesZero() const

    {

        // local variables

        bool allZero = true;


        // iterate over all lanes, checking the number of pending memory

        // requests they have

        for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {

            // if any lane still has pending requests, return false

            if (statusVector[lane] > 0) {

                DPRINTF(GPUMem, "CU%d: WF[%d][%d]: lane: %d has %d pending "

                        "request(s) for %#x\n", cu_id, simdId, wfSlotId, lane,

                        statusVector[lane], addr[lane]);

                allZero = false;

            }

        }


        if (allZero) {

            DPRINTF(GPUMem, "CU%d: WF[%d][%d]: all lanes have no pending"

                    " requests for %#x\n", cu_id, simdId, wfSlotId, addr[0]);

        }

        return allZero;

    }


    // returns a string representing the current state of the statusVector

    std::string


    printStatusVector() const

    {

        std::string statusVec_str = "[";


        // iterate over all lanes, adding the current number of pending

        // requests for this lane to the string

        for (int lane = 0; lane < TheGpuISA::NumVecElemPerVecReg; ++lane) {

            statusVec_str += std::to_string(statusVector[lane]);

        }

        statusVec_str += "]";


        return statusVec_str;

    }


    // Map returned packets and the addresses they satisfy with which lane they

    // were requested from

    typedef std::unordered_map<Addr, std::vector<int>> StatusVector;

    StatusVector memStatusVector;


    // Track the status of memory requests per lane, an int per lane to allow

    // unaligned accesses

    std::vector<int> statusVector;

    // for ld_v# or st_v#

    std::vector<int> tlbHitLevel;


    // for misaligned scalar ops we track the number

    // of outstanding reqs here

    int numScalarReqs;


    Tick getAccessTime() const { return accessTime; }


    void setAccessTime(Tick currentTime) { accessTime = currentTime; }


    void profileRoundTripTime(Tick currentTime, int hopId);

    std::vector<Tick> getRoundTripTime() const { return roundTripTime; }


    void profileLineAddressTime(Addr addr, Tick currentTime, int hopId);


    const std::map<Addr, std::vector<Tick>>& getLineAddressTime() const

    { return lineAddressTime; }


    // inst used to save/restore a wavefront context

    bool isSaveRestore;


    bool isSystemReq() { return systemReq; }

    void setSystemReq() { systemReq = true; }


  private:

    GPUStaticInst *_staticInst;

    const InstSeqNum _seqNum;

    int maxSrcVecRegOpSize;

    int maxSrcScalarRegOpSize;

    bool systemReq = false;


    // the time the request was started

    Tick accessTime = -1;


    // hold the tick when the instruction arrives at certain hop points

    // on it's way to main memory

    std::vector<Tick> roundTripTime;


    // hold each cache block address for the instruction and a vector

    // to hold the tick when the block arrives at certain hop points

    std::map<Addr, std::vector<Tick>> lineAddressTime;

};


} // namespace gem5


#endif // __GPU_DYN_INST_HH__

amo.hh

trace.hh

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::AMDGPU::PkBfloat16
Definition packed_types.hh:44

gem5::AMDGPU::PkBfloat16::get
uint32_t get()
Definition packed_types.hh:48

gem5::AtomicOpCAS::clone
AtomicOpFunctor * clone()
Definition gpu_dyn_inst.hh:103

gem5::AtomicOpCAS::execute
void execute(T *b)
Definition gpu_dyn_inst.hh:93

gem5::AtomicOpCAS::computeUnit
ComputeUnit * computeUnit
Definition gpu_dyn_inst.hh:87

gem5::AtomicOpCAS::c
T c
Definition gpu_dyn_inst.hh:84

gem5::AtomicOpCAS::AtomicOpCAS
AtomicOpCAS(T _c, T _s, ComputeUnit *compute_unit)
Definition gpu_dyn_inst.hh:89

gem5::AtomicOpCAS::s
T s
Definition gpu_dyn_inst.hh:85

gem5::AtomicOpPkAddBF16::execute
void execute(T *b)
Definition gpu_dyn_inst.hh:62

gem5::AtomicOpPkAddBF16::AtomicOpPkAddBF16
AtomicOpPkAddBF16(T _data)
Definition gpu_dyn_inst.hh:59

gem5::AtomicOpPkAddBF16::data
T data
Definition gpu_dyn_inst.hh:58

gem5::AtomicOpPkAddBF16::clone
AtomicOpFunctor * clone()
Definition gpu_dyn_inst.hh:77

gem5::ComputeUnit
Definition compute_unit.hh:204

gem5::GPUDynInst::isFlatScratch
bool isFlatScratch() const
Definition gpu_dyn_inst.cc:436

gem5::GPUDynInst::isKernelLaunch
bool isKernelLaunch() const
Definition gpu_dyn_inst.cc:358

gem5::GPUDynInst::StatusVector
std::unordered_map< Addr, std::vector< int > > StatusVector
Definition gpu_dyn_inst.hh:488

gem5::GPUDynInst::isAtomicCAS
bool isAtomicCAS() const
Definition gpu_dyn_inst.cc:610

gem5::GPUDynInst::isSpecialOp
bool isSpecialOp() const
Definition gpu_dyn_inst.cc:388

gem5::GPUDynInst::roundTripTime
std::vector< Tick > roundTripTime
Definition gpu_dyn_inst.hh:530

gem5::GPUDynInst::isI8
bool isI8() const
Definition gpu_dyn_inst.cc:735

gem5::GPUDynInst::isLocalMem
bool isLocalMem() const
Definition gpu_dyn_inst.cc:675

gem5::GPUDynInst::hasDestinationSgpr
bool hasDestinationSgpr() const
Definition gpu_dyn_inst.cc:254

gem5::GPUDynInst::writesVCC
bool writesVCC() const
Definition gpu_dyn_inst.cc:506

gem5::GPUDynInst::isAtomicDec
bool isAtomicDec() const
Definition gpu_dyn_inst.cc:639

gem5::GPUDynInst::numDstScalarDWords
int numDstScalarDWords()
Definition gpu_dyn_inst.cc:218

gem5::GPUDynInst::readsVCC
bool readsVCC() const
Definition gpu_dyn_inst.cc:496

gem5::GPUDynInst::isNop
bool isNop() const
Definition gpu_dyn_inst.cc:346

gem5::GPUDynInst::isSystemReq
bool isSystemReq()
Definition gpu_dyn_inst.hh:515

gem5::GPUDynInst::isF16
bool isF16() const
Definition gpu_dyn_inst.cc:741

gem5::GPUDynInst::numDstScalarRegOperands
int numDstScalarRegOperands() const
Definition gpu_dyn_inst.cc:192

gem5::GPUDynInst::lineAddressTime
std::map< Addr, std::vector< Tick > > lineAddressTime
Definition gpu_dyn_inst.hh:534

gem5::GPUDynInst::doApertureCheck
void doApertureCheck(const VectorMask &mask)
Definition gpu_dyn_inst.cc:783

gem5::GPUDynInst::isAtomicRet
bool isAtomicRet() const
Definition gpu_dyn_inst.cc:466

gem5::GPUDynInst::resolveFlatSegment
void resolveFlatSegment(const VectorMask &mask)
Definition gpu_dyn_inst.cc:855

gem5::GPUDynInst::isSaveRestore
bool isSaveRestore
Definition gpu_dyn_inst.hh:513

gem5::GPUDynInst::tlbHitLevel
std::vector< int > tlbHitLevel
Definition gpu_dyn_inst.hh:495

gem5::GPUDynInst::wfDynId
int wfDynId
Definition gpu_dyn_inst.hh:215

gem5::GPUDynInst::isGlobalMem
bool isGlobalMem() const
Definition gpu_dyn_inst.cc:669

gem5::GPUDynInst::isAtomicMin
bool isAtomicMin() const
Definition gpu_dyn_inst.cc:651

gem5::GPUDynInst::isAtomicExch
bool isAtomicExch() const
Definition gpu_dyn_inst.cc:615

gem5::GPUDynInst::numDstRegOperands
int numDstRegOperands()
Definition gpu_dyn_inst.cc:142

gem5::GPUDynInst::getRoundTripTime
std::vector< Tick > getRoundTripTime() const
Definition gpu_dyn_inst.hh:506

gem5::GPUDynInst::isFlatGlobal
bool isFlatGlobal() const
Definition gpu_dyn_inst.cc:430

gem5::GPUDynInst::isBranch
bool isBranch() const
Definition gpu_dyn_inst.cc:334

gem5::GPUDynInst::isF32
bool isF32() const
Definition gpu_dyn_inst.cc:747

gem5::GPUDynInst::isAtomicSub
bool isAtomicSub() const
Definition gpu_dyn_inst.cc:627

gem5::GPUDynInst::simdId
int simdId
Definition gpu_dyn_inst.hh:213

gem5::GPUDynInst::_staticInst
GPUStaticInst * _staticInst
Definition gpu_dyn_inst.hh:519

gem5::GPUDynInst::hasDestinationVgpr
bool hasDestinationVgpr() const
Definition gpu_dyn_inst.cc:242

gem5::GPUDynInst::statusVector
std::vector< int > statusVector
Definition gpu_dyn_inst.hh:493

gem5::GPUDynInst::profileLineAddressTime
void profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
Definition gpu_dyn_inst.cc:1054

gem5::GPUDynInst::decrementStatusVector
void decrementStatusVector(int lane)
Definition gpu_dyn_inst.hh:427

gem5::GPUDynInst::isUnconditionalJump
bool isUnconditionalJump() const
Definition gpu_dyn_inst.cc:382

gem5::GPUDynInst::maxSrcVecRegOpSize
int maxSrcVecRegOpSize
Definition gpu_dyn_inst.hh:521

gem5::GPUDynInst::staticInstruction
GPUStaticInst * staticInstruction()
Definition gpu_dyn_inst.hh:241

gem5::GPUDynInst::numSrcScalarRegOperands
int numSrcScalarRegOperands() const
Definition gpu_dyn_inst.cc:186

gem5::GPUDynInst::isOpcode
bool isOpcode(const std::string &opcodeStr) const
Definition gpu_dyn_inst.cc:268

gem5::GPUDynInst::a_data
uint8_t * a_data
Definition gpu_dyn_inst.hh:206

gem5::GPUDynInst::numScalarReqs
int numScalarReqs
Definition gpu_dyn_inst.hh:499

gem5::GPUDynInst::GPUDynInst
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition gpu_dyn_inst.cc:44

gem5::GPUDynInst::isAtomicXor
bool isAtomicXor() const
Definition gpu_dyn_inst.cc:604

gem5::GPUDynInst::getLineAddressTime
const std::map< Addr, std::vector< Tick > > & getLineAddressTime() const
Definition gpu_dyn_inst.hh:509

gem5::GPUDynInst::isALU
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
Definition gpu_dyn_inst.cc:328

gem5::GPUDynInst::isReadOnlySeg
bool isReadOnlySeg() const
Definition gpu_dyn_inst.cc:711

gem5::GPUDynInst::isSystemCoherent
bool isSystemCoherent() const
Definition gpu_dyn_inst.cc:729

gem5::GPUDynInst::isMemRef
bool isMemRef() const
Definition gpu_dyn_inst.cc:418

gem5::GPUDynInst::isAtomicAnd
bool isAtomicAnd() const
Definition gpu_dyn_inst.cc:592

gem5::GPUDynInst::isStore
bool isStore() const
Definition gpu_dyn_inst.cc:448

gem5::GPUDynInst::isDPPInst
bool isDPPInst() const
Definition gpu_dyn_inst.cc:370

gem5::GPUDynInst::isSleep
bool isSleep() const
Definition gpu_dyn_inst.cc:400

gem5::GPUDynInst::wg_id
int wg_id
Definition gpu_dyn_inst.hh:221

gem5::GPUDynInst::numSrcScalarDWords
int numSrcScalarDWords()
Definition gpu_dyn_inst.cc:212

gem5::GPUDynInst::exec_mask
VectorMask exec_mask
Definition gpu_dyn_inst.hh:210

gem5::GPUDynInst::isMemSync
bool isMemSync() const
Definition gpu_dyn_inst.cc:412

gem5::GPUDynInst::needsToken
bool needsToken() const
Definition gpu_dyn_inst.cc:586

gem5::GPUDynInst::writesSCC
bool writesSCC() const
Definition gpu_dyn_inst.cc:490

gem5::GPUDynInst::numDstVecDWords
int numDstVecDWords()
Definition gpu_dyn_inst.cc:180

gem5::GPUDynInst::hasSourceVgpr
bool hasSourceVgpr() const
Definition gpu_dyn_inst.cc:236

gem5::GPUDynInst::numDstVecRegOperands
int numDstVecRegOperands() const
Definition gpu_dyn_inst.cc:154

gem5::GPUDynInst::ignoreExec
bool ignoreExec() const
Definition gpu_dyn_inst.cc:540

gem5::GPUDynInst::memStatusVector
StatusVector memStatusVector
Definition gpu_dyn_inst.hh:489

gem5::GPUDynInst::setSystemReq
void setSystemReq()
Definition gpu_dyn_inst.hh:516

gem5::GPUDynInst::hasSourceSgpr
bool hasSourceSgpr() const
Definition gpu_dyn_inst.cc:248

gem5::GPUDynInst::scalar_data
uint8_t * scalar_data
Definition gpu_dyn_inst.hh:204

gem5::GPUDynInst::execUnitId
int execUnitId
Definition gpu_dyn_inst.hh:225

gem5::GPUDynInst::isReturn
bool isReturn() const
Definition gpu_dyn_inst.cc:376

gem5::GPUDynInst::readsSCC
bool readsSCC() const
Definition gpu_dyn_inst.cc:484

gem5::GPUDynInst::isMAD
bool isMAD() const
Definition gpu_dyn_inst.cc:771

gem5::GPUDynInst::getLaneStatus
int getLaneStatus(int lane) const
Definition gpu_dyn_inst.hh:438

gem5::GPUDynInst::readsFlatScratch
bool readsFlatScratch() const
Definition gpu_dyn_inst.cc:576

gem5::GPUDynInst::initiateAcc
void initiateAcc(GPUDynInstPtr gpuDynInst)
Definition gpu_dyn_inst.cc:305

gem5::GPUDynInst::cu_id
int cu_id
Definition gpu_dyn_inst.hh:219

gem5::GPUDynInst::getNumOperands
int getNumOperands() const
Definition gpu_dyn_inst.cc:230

gem5::GPUDynInst::writesExec
bool writesExec() const
Definition gpu_dyn_inst.cc:534

gem5::GPUDynInst::isSDWAInst
bool isSDWAInst() const
Definition gpu_dyn_inst.cc:364

gem5::GPUDynInst::isWaitcnt
bool isWaitcnt() const
Definition gpu_dyn_inst.cc:394

gem5::GPUDynInst::numSrcVecDWords
int numSrcVecDWords()
Definition gpu_dyn_inst.cc:174

gem5::GPUDynInst::writesMode
bool writesMode() const
Definition gpu_dyn_inst.cc:522

gem5::GPUDynInst::executedAs
enums::StorageClassType executedAs()
Definition gpu_dyn_inst.cc:298

gem5::GPUDynInst::scalarAddr
Addr scalarAddr
Definition gpu_dyn_inst.hh:196

gem5::GPUDynInst::latency
WaitClass latency
Definition gpu_dyn_inst.hh:229

gem5::GPUDynInst::isFlat
bool isFlat() const
Definition gpu_dyn_inst.cc:424

gem5::GPUDynInst::dstVecRegOperands
const std::vector< OperandInfo > & dstVecRegOperands() const
Definition gpu_dyn_inst.cc:118

gem5::GPUDynInst::profileRoundTripTime
void profileRoundTripTime(Tick currentTime, int hopId)
Definition gpu_dyn_inst.cc:1044

gem5::GPUDynInst::resetStatusVector
void resetStatusVector(int lane)
Definition gpu_dyn_inst.hh:408

gem5::GPUDynInst::isCondBranch
bool isCondBranch() const
Definition gpu_dyn_inst.cc:340

gem5::GPUDynInst::writesExecMask
bool writesExecMask() const
Definition gpu_dyn_inst.cc:546

gem5::GPUDynInst::isPrivateSeg
bool isPrivateSeg() const
Definition gpu_dyn_inst.cc:705

gem5::GPUDynInst::isEndOfKernel
bool isEndOfKernel() const
Definition gpu_dyn_inst.cc:352

gem5::GPUDynInst::resetEntireStatusVector
void resetEntireStatusVector()
Definition gpu_dyn_inst.hh:398

gem5::GPUDynInst::srcVecRegOperands
const std::vector< OperandInfo > & srcVecRegOperands() const
Definition gpu_dyn_inst.cc:112

gem5::GPUDynInst::isAtomicInc
bool isAtomicInc() const
Definition gpu_dyn_inst.cc:633

gem5::GPUDynInst::isGloballyCoherent
bool isGloballyCoherent() const
Definition gpu_dyn_inst.cc:723

gem5::GPUDynInst::x_data
uint8_t * x_data
Definition gpu_dyn_inst.hh:208

gem5::GPUDynInst::readsExecMask
bool readsExecMask() const
Definition gpu_dyn_inst.cc:556

gem5::GPUDynInst::isGroupSeg
bool isGroupSeg() const
Definition gpu_dyn_inst.cc:693

gem5::GPUDynInst::wfSlotId
int wfSlotId
Definition gpu_dyn_inst.hh:223

gem5::GPUDynInst::srcLiteral
TheGpuISA::ScalarRegU32 srcLiteral() const
Definition gpu_dyn_inst.cc:997

gem5::GPUDynInst::getAccessTime
Tick getAccessTime() const
Definition gpu_dyn_inst.hh:501

gem5::GPUDynInst::systemReq
bool systemReq
Definition gpu_dyn_inst.hh:523

gem5::GPUDynInst::readsExec
bool readsExec() const
Definition gpu_dyn_inst.cc:528

gem5::GPUDynInst::maxSrcScalarRegOperandSize
int maxSrcScalarRegOperandSize()
Definition gpu_dyn_inst.cc:198

gem5::GPUDynInst::isScalar
bool isScalar() const
Definition gpu_dyn_inst.cc:478

gem5::GPUDynInst::isVector
bool isVector() const
Definition gpu_dyn_inst.cc:472

gem5::GPUDynInst::seqNum
InstSeqNum seqNum() const
Definition gpu_dyn_inst.cc:280

gem5::GPUDynInst::~GPUDynInst
~GPUDynInst()
Definition gpu_dyn_inst.cc:96

gem5::GPUDynInst::maxOperandSize
int maxOperandSize()
Definition gpu_dyn_inst.cc:224

gem5::GPUDynInst::isFMA
bool isFMA() const
Definition gpu_dyn_inst.cc:759

gem5::GPUDynInst::srcScalarRegOperands
const std::vector< OperandInfo > & srcScalarRegOperands() const
Definition gpu_dyn_inst.cc:124

gem5::GPUDynInst::isMFMA
bool isMFMA() const
Definition gpu_dyn_inst.cc:777

gem5::GPUDynInst::isAtomicAdd
bool isAtomicAdd() const
Definition gpu_dyn_inst.cc:621

gem5::GPUDynInst::dstScalarRegOperands
const std::vector< OperandInfo > & dstScalarRegOperands() const
Definition gpu_dyn_inst.cc:130

gem5::GPUDynInst::numSrcVecRegOperands
int numSrcVecRegOperands() const
Definition gpu_dyn_inst.cc:148

gem5::GPUDynInst::kern_id
int kern_id
Definition gpu_dyn_inst.hh:217

gem5::GPUDynInst::numSrcRegOperands
int numSrcRegOperands()
Definition gpu_dyn_inst.cc:136

gem5::GPUDynInst::d_data
uint8_t * d_data
Definition gpu_dyn_inst.hh:202

gem5::GPUDynInst::updateStats
void updateStats()
Definition gpu_dyn_inst.cc:1003

gem5::GPUDynInst::_seqNum
const InstSeqNum _seqNum
Definition gpu_dyn_inst.hh:520

gem5::GPUDynInst::isBarrier
bool isBarrier() const
Definition gpu_dyn_inst.cc:406

gem5::GPUDynInst::isLoad
bool isLoad() const
Definition gpu_dyn_inst.cc:442

gem5::GPUDynInst::setRequestFlags
void setRequestFlags(RequestPtr req) const
Definition gpu_dyn_inst.hh:369

gem5::GPUDynInst::pAddr
Addr pAddr
Definition gpu_dyn_inst.hh:199

gem5::GPUDynInst::addr
std::vector< Addr > addr
Definition gpu_dyn_inst.hh:198

gem5::GPUDynInst::printStatusVector
std::string printStatusVector() const
Definition gpu_dyn_inst.hh:472

gem5::GPUDynInst::writesFlatScratch
bool writesFlatScratch() const
Definition gpu_dyn_inst.cc:566

gem5::GPUDynInst::allLanesZero
bool allLanesZero() const
Definition gpu_dyn_inst.hh:446

gem5::GPUDynInst::readsMode
bool readsMode() const
Definition gpu_dyn_inst.cc:516

gem5::GPUDynInst::execute
void execute(GPUDynInstPtr gpuDynInst)
Definition gpu_dyn_inst.cc:106

gem5::GPUDynInst::isMAC
bool isMAC() const
Definition gpu_dyn_inst.cc:765

gem5::GPUDynInst::makeAtomicOpFunctor
AtomicOpFunctorPtr makeAtomicOpFunctor(c0 *reg0, c0 *reg1)
Definition gpu_dyn_inst.hh:337

gem5::GPUDynInst::isKernArgSeg
bool isKernArgSeg() const
Definition gpu_dyn_inst.cc:699

gem5::GPUDynInst::isArgLoad
bool isArgLoad() const
Definition gpu_dyn_inst.cc:663

gem5::GPUDynInst::setStatusVector
void setStatusVector(int lane, int newVal)
Definition gpu_dyn_inst.hh:415

gem5::GPUDynInst::setAccessTime
void setAccessTime(Tick currentTime)
Definition gpu_dyn_inst.hh:503

gem5::GPUDynInst::maxSrcScalarRegOpSize
int maxSrcScalarRegOpSize
Definition gpu_dyn_inst.hh:522

gem5::GPUDynInst::time
Tick time
Definition gpu_dyn_inst.hh:227

gem5::GPUDynInst::pc
Addr pc()
Definition gpu_dyn_inst.cc:286

gem5::GPUDynInst::isGlobalSeg
bool isGlobalSeg() const
Definition gpu_dyn_inst.cc:687

gem5::GPUDynInst::isArgSeg
bool isArgSeg() const
Definition gpu_dyn_inst.cc:681

gem5::GPUDynInst::isAtomic
bool isAtomic() const
Definition gpu_dyn_inst.cc:454

gem5::GPUDynInst::isAtomicOr
bool isAtomicOr() const
Definition gpu_dyn_inst.cc:598

gem5::GPUDynInst::isAtomicPkAddBF16
bool isAtomicPkAddBF16() const
Definition gpu_dyn_inst.cc:657

gem5::GPUDynInst::maxSrcVecRegOperandSize
int maxSrcVecRegOperandSize()
Definition gpu_dyn_inst.cc:160

gem5::GPUDynInst::isAtomicNoRet
bool isAtomicNoRet() const
Definition gpu_dyn_inst.cc:460

gem5::GPUDynInst::isSpillSeg
bool isSpillSeg() const
Definition gpu_dyn_inst.cc:717

gem5::GPUDynInst::accessTime
Tick accessTime
Definition gpu_dyn_inst.hh:526

gem5::GPUDynInst::disassemble
const std::string & disassemble() const
Definition gpu_dyn_inst.cc:274

gem5::GPUDynInst::completeAcc
void completeAcc(GPUDynInstPtr gpuDynInst)
Definition gpu_dyn_inst.cc:314

gem5::GPUDynInst::isF64
bool isF64() const
Definition gpu_dyn_inst.cc:753

gem5::GPUDynInst::isAtomicMax
bool isAtomicMax() const
Definition gpu_dyn_inst.cc:645

gem5::GPUExecContext::cu
ComputeUnit * cu
Definition gpu_exec_context.hh:62

gem5::GPUExecContext::GPUExecContext
GPUExecContext(ComputeUnit *_cu, Wavefront *_wf)
Definition gpu_exec_context.cc:38

gem5::GPUStaticInst
Definition gpu_static_inst.hh:62

gem5::RegisterOperandInfo::virtIndices
const std::vector< int > virtIndices
Definition gpu_dyn_inst.hh:139

gem5::RegisterOperandInfo::opIdx
const int opIdx
Index of this operand within the set of its parent instruction's operand list.
Definition gpu_dyn_inst.hh:134

gem5::RegisterOperandInfo::physIndices
const std::vector< int > physIndices
Definition gpu_dyn_inst.hh:140

gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo(int op_idx, int num_dwords, const std::vector< int > &virt_indices, const std::vector< int > &phys_indices)
Definition gpu_dyn_inst.hh:110

gem5::RegisterOperandInfo::numDWORDs
const int numDWORDs
Size of this operand in DWORDs.
Definition gpu_dyn_inst.hh:138

gem5::RegisterOperandInfo::operandIdx
int operandIdx() const
Definition gpu_dyn_inst.hh:122

gem5::RegisterOperandInfo::virtIdx
int virtIdx(int reg_num=0) const
We typically only need the first virtual register for the operand regardless of its size.
Definition gpu_dyn_inst.hh:127

gem5::RegisterOperandInfo::numRegisters
int numRegisters() const
The number of registers required to store this operand.
Definition gpu_dyn_inst.hh:121

gem5::RegisterOperandInfo::RegisterOperandInfo
RegisterOperandInfo()=delete

gem5::Request::ATOMIC_RETURN_OP
@ ATOMIC_RETURN_OP
The request is an atomic that returns data.
Definition request.hh:175

gem5::Request::ATOMIC_NO_RETURN_OP
@ ATOMIC_NO_RETURN_OP
The request is an atomic that does not return data.
Definition request.hh:177

gem5::Request::INV_L1
@ INV_L1
Definition request.hh:327

gem5::Request::GLC_BIT
@ GLC_BIT
Definition request.hh:337

gem5::Request::SLC_BIT
@ SLC_BIT
user-policy flags
Definition request.hh:335

gem5::WaitClass
Definition misc.hh:68

gem5::Wavefront
Definition wavefront.hh:62

std::vector
STL vector class.
Definition stl.hh:37

compute_unit.hh

gpu_exec_context.hh

gem5::AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269

fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:232

logging.hh

gem5::ArmISA::mask
Bitfield< 3, 0 > mask
Definition pcstate.hh:63

gem5::ArmISA::b
Bitfield< 7 > b
Definition misc_types.hh:471

gem5::X86ISA::addr
Bitfield< 3 > addr
Definition types.hh:84

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

gem5::VectorMask
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48

gem5::InstSeqNum
uint64_t InstSeqNum
Definition inst_seq.hh:40

operand_info.hh

packed_types.hh

gem5::AtomicOpFunctor
Definition amo.hh:44

gem5::TypedAtomicOpFunctor
Definition amo.hh:57