develop/hsa__queue__entry_8hh_source.html

/*

 * Copyright (c) 2017-2018 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __GPU_COMPUTE_HSA_QUEUE_ENTRY__

#define __GPU_COMPUTE_HSA_QUEUE_ENTRY__


#include <bitset>

#include <cstdint>

#include <cstring>

#include <iostream>

#include <vector>


#include "base/intmath.hh"

#include "base/types.hh"

#include "dev/hsa/hsa_packet.hh"

#include "dev/hsa/hsa_queue.hh"

#include "enums/GfxVersion.hh"

#include "gpu-compute/kernel_code.hh"


namespace gem5

{


class HSAQueueEntry

{

  public:


    HSAQueueEntry(std::string kernel_name, uint32_t queue_id,

                  int dispatch_id, void *disp_pkt, AMDKernelCode *akc,

                  Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)

        : _gfxVersion(gfx_version), kernName(kernel_name),

          _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,

                  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,

                  (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_z}},

          _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,

                    (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,

                    (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},

          _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),

          _hostDispPktAddr(host_pkt_addr),

          _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)

                            ->completion_signal),

          codeAddress(code_addr),

          kernargAddress(((_hsa_dispatch_packet_t*)disp_pkt)->kernarg_address),

          _outstandingInvs(-1), _outstandingWbs(0),

          _ldsSize((int)((_hsa_dispatch_packet_t*)disp_pkt)->

                   group_segment_size),

          _privMemPerItem((int)((_hsa_dispatch_packet_t*)disp_pkt)->

                         private_segment_size),

          _contextId(0), _wgId{{ 0, 0, 0 }},

          _numWgTotal(1), numWgArrivedAtBarrier(0), _numWgCompleted(0),

          _globalWgId(0), dispatchComplete(false)


    {

        // Use the resource descriptors to determine number of GPRs. This will

        // round up in some cases, however the exact number field in the AMD

        // kernel code struct is not backwards compatible and that field is

        // not populated in newer compiles. The resource descriptor dword must

        // be backwards compatible, so use that always.

        // LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html

        //     #code-object-v3-kernel-descriptor

        //

        // Currently, gem5 supported gfx version use a multiplier of 8. The

        // only exception is gfx900 (Vega10).

        if (gfx_version == GfxVersion::gfx90a ||

            gfx_version == GfxVersion::gfx942 ||

            gfx_version == GfxVersion::gfx950) {

            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;

        } else {

            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;

        }


        // SGPR allocation granulary is 16 in GFX9

        // Source: https://llvm.org/docs/AMDGPUUsage.html

        if (gfx_version == GfxVersion::gfx900 ||

            gfx_version == GfxVersion::gfx902 ||

            gfx_version == GfxVersion::gfx908 ||

            gfx_version == GfxVersion::gfx90a ||

            gfx_version == GfxVersion::gfx942 ||

            gfx_version == GfxVersion::gfx950) {

            numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;

        } else {

            panic("Saw unknown gfx version setting up GPR counts\n");

        }


        initialVgprState.reset();

        initialSgprState.reset();


        for (int i = 0; i < MAX_DIM; ++i) {

            _numWg[i] = divCeil(_gridSize[i], _wgSize[i]);

            _numWgTotal *= _numWg[i];

        }


        parseKernelCode(akc);


        // Offset of a first AccVGPR in the unified register file.

        // Granularity 4. Value 0-63. 0 - accum-offset = 4,

        // 1 - accum-offset = 8, ..., 63 - accum-offset = 256.

        _accumOffset = (akc->accum_offset + 1) * 4;

    }


    const GfxVersion&


    gfxVersion() const

    {

        return _gfxVersion;

    }


    const std::string&


    kernelName() const

    {

        return kernName;

    }


    int


    wgSize(int dim) const

    {

        assert(dim < MAX_DIM);

        return _wgSize[dim];

    }


    int


    gridSize(int dim) const

    {

        assert(dim < MAX_DIM);

        return _gridSize[dim];

    }


    int


    numVectorRegs() const

    {

        return numVgprs;

    }


    int


    numScalarRegs() const

    {

        return numSgprs;

    }


    uint32_t


    queueId() const

    {

        return _queueId;

    }


    int


    dispatchId() const

    {

        return _dispatchId;

    }


    void*


    dispPktPtr()

    {

        return dispPkt;

    }


    Addr


    hostDispPktAddr() const

    {

        return _hostDispPktAddr;

    }


    Addr


    completionSignal() const

    {

        return _completionSignal;

    }


    Addr


    codeAddr() const

    {

        return codeAddress;

    }


    Addr


    kernargAddr() const

    {

        return kernargAddress;

    }


    int


    ldsSize() const

    {

        return _ldsSize;

    }


    int privMemPerItem() const { return _privMemPerItem; }


    int


    contextId() const

    {

        return _contextId;

    }


    bool


    dispComplete() const

    {

        return dispatchComplete;

    }


    int


    wgId(int dim) const

    {

        assert(dim < MAX_DIM);

        return _wgId[dim];

    }


    void


    wgId(int dim, int val)

    {

        assert(dim < MAX_DIM);

        _wgId[dim] = val;

    }


    int


    globalWgId() const

    {

        return _globalWgId;

    }


    void


    globalWgId(int val)

    {

        _globalWgId = val;

    }


    int


    numWg(int dim) const

    {

        assert(dim < MAX_DIM);

        return _numWg[dim];

    }


    void


    notifyWgCompleted()

    {

        ++_numWgCompleted;

    }


    int


    numWgCompleted() const

    {

        return _numWgCompleted;

    }


    int


    numWgTotal() const

    {

        return _numWgTotal;

    }


    void


    markWgDispatch()

    {

        ++_wgId[0];

        ++_globalWgId;


        if (wgId(0) * wgSize(0) >= gridSize(0)) {

            _wgId[0] = 0;

            ++_wgId[1];


            if (wgId(1) * wgSize(1) >= gridSize(1)) {

                _wgId[1] = 0;

                ++_wgId[2];


                if (wgId(2) * wgSize(2) >= gridSize(2)) {

                    dispatchComplete = true;

                }

            }

        }

    }


    int


    numWgAtBarrier() const

    {

        return numWgArrivedAtBarrier;

    }


    bool vgprBitEnabled(int bit) const

    {

        return initialVgprState.test(bit);

    }


    bool sgprBitEnabled(int bit) const

    {

        return initialSgprState.test(bit);

    }


    Addr hostAMDQueueAddr;


    _amd_queue_t amdQueue;


    // the maximum number of dimensions for a grid or workgroup

    const static int MAX_DIM = 3;


    /* getter */

    int


    outstandingInvs() {

        return _outstandingInvs;

    }


    bool


    isInvStarted()

    {

        return (_outstandingInvs != -1);

    }


    void


    updateOutstandingInvs(int val)

    {

        _outstandingInvs += val;

        assert(_outstandingInvs >= 0);

    }


    void


    markInvDone()

    {

        _outstandingInvs = 0;

    }


    bool


    isInvDone() const

    {

        assert(_outstandingInvs >= 0);

        return (_outstandingInvs == 0);

    }


    int


    outstandingWbs() const

    {

        return _outstandingWbs;

    }


    void


    updateOutstandingWbs(int val)

    {

        _outstandingWbs += val;

        assert(_outstandingWbs >= 0);

    }


    unsigned


    accumOffset() const

    {

        return _accumOffset;

    }


    void


    preloadLength(unsigned val)

    {

        _preloadLength = val;


        if (_preloadLength) {

            initialSgprState.set(KernargPreload, true);

        }

    }


    unsigned


    preloadLength() const

    {

        return _preloadLength;

    }


    uint32_t *


    preloadArgs()

    {

        return &(_preloadArgs[0]);

    }


  private:

    void


    parseKernelCode(AMDKernelCode *akc)

    {

        initialSgprState.set(PrivateSegBuf,

            akc->enable_sgpr_private_segment_buffer);

        initialSgprState.set(DispatchPtr,

            akc->enable_sgpr_dispatch_ptr);

        initialSgprState.set(QueuePtr,

            akc->enable_sgpr_queue_ptr);

        initialSgprState.set(KernargSegPtr,

            akc->enable_sgpr_kernarg_segment_ptr);

        initialSgprState.set(DispatchId,

            akc->enable_sgpr_dispatch_id);

        initialSgprState.set(FlatScratchInit,

            akc->enable_sgpr_flat_scratch_init);

        initialSgprState.set(PrivateSegSize,

            akc->enable_sgpr_private_segment_size);

        initialSgprState.set(WorkgroupIdX,

            akc->enable_sgpr_workgroup_id_x);

        initialSgprState.set(WorkgroupIdY,

            akc->enable_sgpr_workgroup_id_y);

        initialSgprState.set(WorkgroupIdZ,

            akc->enable_sgpr_workgroup_id_z);

        initialSgprState.set(WorkgroupInfo,

            akc->enable_sgpr_workgroup_info);

        initialSgprState.set(PrivSegWaveByteOffset,

            akc->enable_private_segment);


        initialVgprState.set(WorkitemIdX, true);

        initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id > 0);

        initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id > 1);

    }


    // store gfx version for version specific task handling

    GfxVersion _gfxVersion;

    // name of the kernel associated with the AQL entry

    std::string kernName;

    // workgroup Size (3 dimensions)

    std::array<int, MAX_DIM> _wgSize;

    // grid Size (3 dimensions)

    std::array<int, MAX_DIM> _gridSize;

    // total number of VGPRs per work-item

    int numVgprs;

    // total number of SGPRs per wavefront

    int numSgprs;

    // id of AQL queue in which this entry is placed

    uint32_t _queueId;

    int _dispatchId;

    // raw AQL packet pointer

    void *dispPkt;

    // host-side addr of the dispatch packet

    Addr _hostDispPktAddr;

    // pointer to bool

    Addr _completionSignal;

    // base address of the raw machine code

    Addr codeAddress;

    // base address of the kernel args

    Addr kernargAddress;

    int _outstandingInvs;

    int _outstandingWbs;

    int _ldsSize;

    int _privMemPerItem;

    int _contextId;

    std::array<int, MAX_DIM> _wgId;

    std::array<int, MAX_DIM> _numWg;

    int _numWgTotal;

    int numWgArrivedAtBarrier;

    // The number of completed work groups

    int _numWgCompleted;

    int _globalWgId;

    bool dispatchComplete;


    std::bitset<NumVectorInitFields> initialVgprState;

    std::bitset<NumScalarInitFields> initialSgprState;


    unsigned _accumOffset;


    // For preloading args there are extra bytes of space after the dispatch

    // packet containing values that should be preloaded into SGPRs. This

    // field serves as a buffer to DMA into and therefore is sized at the

    // max amount. It is of dword type to easily access during wave start.

    unsigned _preloadLength = 0;

    uint32_t _preloadArgs[KernargPreloadPktSize / sizeof(uint32_t)];

};


} // namespace gem5


#endif // __GPU_COMPUTE_HSA_QUEUE_ENTRY__

types.hh
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...

gem5::HSAQueueEntry::dispPkt
void * dispPkt
Definition hsa_queue_entry.hh:493

gem5::HSAQueueEntry::_numWgCompleted
int _numWgCompleted
Definition hsa_queue_entry.hh:527

gem5::HSAQueueEntry::amdQueue
_amd_queue_t amdQueue
Keep a copy of the AMD HSA queue because we need info from some of its fields to initialize register ...
Definition hsa_queue_entry.hh:335

gem5::HSAQueueEntry::isInvStarted
bool isInvStarted()
Whether invalidate has started or finished -1 is the initial value indicating inv has not started for...
Definition hsa_queue_entry.hh:352

gem5::HSAQueueEntry::dispPktPtr
void * dispPktPtr()
Definition hsa_queue_entry.hh:187

gem5::HSAQueueEntry::kernargAddr
Addr kernargAddr() const
Definition hsa_queue_entry.hh:211

gem5::HSAQueueEntry::_contextId
int _contextId
Definition hsa_queue_entry.hh:521

gem5::HSAQueueEntry::globalWgId
int globalWgId() const
Definition hsa_queue_entry.hh:251

gem5::HSAQueueEntry::_hostDispPktAddr
Addr _hostDispPktAddr
Definition hsa_queue_entry.hh:495

gem5::HSAQueueEntry::numVgprs
int numVgprs
Definition hsa_queue_entry.hh:486

gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition hsa_queue_entry.hh:175

gem5::HSAQueueEntry::sgprBitEnabled
bool sgprBitEnabled(int bit) const
Definition hsa_queue_entry.hh:319

gem5::HSAQueueEntry::initialSgprState
std::bitset< NumScalarInitFields > initialSgprState
Definition hsa_queue_entry.hh:532

gem5::HSAQueueEntry::numWg
int numWg(int dim) const
Definition hsa_queue_entry.hh:263

gem5::HSAQueueEntry::wgId
int wgId(int dim) const
Definition hsa_queue_entry.hh:237

gem5::HSAQueueEntry::gfxVersion
const GfxVersion & gfxVersion() const
Definition hsa_queue_entry.hh:137

gem5::HSAQueueEntry::kernargAddress
Addr kernargAddress
Definition hsa_queue_entry.hh:501

gem5::HSAQueueEntry::globalWgId
void globalWgId(int val)
Definition hsa_queue_entry.hh:257

gem5::HSAQueueEntry::preloadLength
void preloadLength(unsigned val)
Definition hsa_queue_entry.hh:413

gem5::HSAQueueEntry::_preloadLength
unsigned _preloadLength
Definition hsa_queue_entry.hh:540

gem5::HSAQueueEntry::hostDispPktAddr
Addr hostDispPktAddr() const
Definition hsa_queue_entry.hh:193

gem5::HSAQueueEntry::_gfxVersion
GfxVersion _gfxVersion
Definition hsa_queue_entry.hh:478

gem5::HSAQueueEntry::MAX_DIM
static const int MAX_DIM
Definition hsa_queue_entry.hh:338

gem5::HSAQueueEntry::markWgDispatch
void markWgDispatch()
Definition hsa_queue_entry.hh:288

gem5::HSAQueueEntry::wgSize
int wgSize(int dim) const
Definition hsa_queue_entry.hh:149

gem5::HSAQueueEntry::outstandingInvs
int outstandingInvs()
Definition hsa_queue_entry.hh:342

gem5::HSAQueueEntry::_numWgTotal
int _numWgTotal
Definition hsa_queue_entry.hh:524

gem5::HSAQueueEntry::_queueId
uint32_t _queueId
Definition hsa_queue_entry.hh:490

gem5::HSAQueueEntry::wgId
void wgId(int dim, int val)
Definition hsa_queue_entry.hh:244

gem5::HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition hsa_queue_entry.hh:163

gem5::HSAQueueEntry::numWgAtBarrier
int numWgAtBarrier() const
Definition hsa_queue_entry.hh:309

gem5::HSAQueueEntry::_privMemPerItem
int _privMemPerItem
Definition hsa_queue_entry.hh:520

gem5::HSAQueueEntry::parseKernelCode
void parseKernelCode(AMDKernelCode *akc)
Definition hsa_queue_entry.hh:440

gem5::HSAQueueEntry::ldsSize
int ldsSize() const
Definition hsa_queue_entry.hh:217

gem5::HSAQueueEntry::hostAMDQueueAddr
Addr hostAMDQueueAddr
Host-side addr of the amd_queue_t on which this task was queued.
Definition hsa_queue_entry.hh:328

gem5::HSAQueueEntry::_preloadArgs
uint32_t _preloadArgs[KernargPreloadPktSize/sizeof(uint32_t)]
Definition hsa_queue_entry.hh:541

gem5::HSAQueueEntry::vgprBitEnabled
bool vgprBitEnabled(int bit) const
Definition hsa_queue_entry.hh:314

gem5::HSAQueueEntry::markInvDone
void markInvDone()
Forcefully change the state to be inv done.
Definition hsa_queue_entry.hh:373

gem5::HSAQueueEntry::kernelName
const std::string & kernelName() const
Definition hsa_queue_entry.hh:143

gem5::HSAQueueEntry::_dispatchId
int _dispatchId
Definition hsa_queue_entry.hh:491

gem5::HSAQueueEntry::outstandingWbs
int outstandingWbs() const
Definition hsa_queue_entry.hh:389

gem5::HSAQueueEntry::_accumOffset
unsigned _accumOffset
Definition hsa_queue_entry.hh:534

gem5::HSAQueueEntry::_globalWgId
int _globalWgId
Definition hsa_queue_entry.hh:528

gem5::HSAQueueEntry::contextId
int contextId() const
Definition hsa_queue_entry.hh:225

gem5::HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition hsa_queue_entry.hh:205

gem5::HSAQueueEntry::_wgId
std::array< int, MAX_DIM > _wgId
Definition hsa_queue_entry.hh:522

gem5::HSAQueueEntry::dispatchId
int dispatchId() const
Definition hsa_queue_entry.hh:181

gem5::HSAQueueEntry::privMemPerItem
int privMemPerItem() const
Definition hsa_queue_entry.hh:222

gem5::HSAQueueEntry::numWgCompleted
int numWgCompleted() const
Definition hsa_queue_entry.hh:276

gem5::HSAQueueEntry::_gridSize
std::array< int, MAX_DIM > _gridSize
Definition hsa_queue_entry.hh:484

gem5::HSAQueueEntry::preloadArgs
uint32_t * preloadArgs()
Definition hsa_queue_entry.hh:433

gem5::HSAQueueEntry::numWgTotal
int numWgTotal() const
Definition hsa_queue_entry.hh:282

gem5::HSAQueueEntry::_outstandingWbs
int _outstandingWbs
Number of outstanding wbs for the kernel values: 0: 1)initial value, flush has not started for the ke...
Definition hsa_queue_entry.hh:518

gem5::HSAQueueEntry::notifyWgCompleted
void notifyWgCompleted()
Definition hsa_queue_entry.hh:270

gem5::HSAQueueEntry::_wgSize
std::array< int, MAX_DIM > _wgSize
Definition hsa_queue_entry.hh:482

gem5::HSAQueueEntry::dispComplete
bool dispComplete() const
Definition hsa_queue_entry.hh:231

gem5::HSAQueueEntry::_numWg
std::array< int, MAX_DIM > _numWg
Definition hsa_queue_entry.hh:523

gem5::HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition hsa_queue_entry.hh:199

gem5::HSAQueueEntry::isInvDone
bool isInvDone() const
Is invalidate done?
Definition hsa_queue_entry.hh:382

gem5::HSAQueueEntry::numWgArrivedAtBarrier
int numWgArrivedAtBarrier
Definition hsa_queue_entry.hh:525

gem5::HSAQueueEntry::gridSize
int gridSize(int dim) const
Definition hsa_queue_entry.hh:156

gem5::HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition hsa_queue_entry.hh:169

gem5::HSAQueueEntry::_completionSignal
Addr _completionSignal
Definition hsa_queue_entry.hh:497

gem5::HSAQueueEntry::preloadLength
unsigned preloadLength() const
Definition hsa_queue_entry.hh:427

gem5::HSAQueueEntry::_outstandingInvs
int _outstandingInvs
Number of outstanding invs for the kernel.
Definition hsa_queue_entry.hh:510

gem5::HSAQueueEntry::kernName
std::string kernName
Definition hsa_queue_entry.hh:480

gem5::HSAQueueEntry::numSgprs
int numSgprs
Definition hsa_queue_entry.hh:488

gem5::HSAQueueEntry::initialVgprState
std::bitset< NumVectorInitFields > initialVgprState
Definition hsa_queue_entry.hh:531

gem5::HSAQueueEntry::codeAddress
Addr codeAddress
Definition hsa_queue_entry.hh:499

gem5::HSAQueueEntry::dispatchComplete
bool dispatchComplete
Definition hsa_queue_entry.hh:529

gem5::HSAQueueEntry::HSAQueueEntry
HSAQueueEntry(std::string kernel_name, uint32_t queue_id, int dispatch_id, void *disp_pkt, AMDKernelCode *akc, Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
Definition hsa_queue_entry.hh:63

gem5::HSAQueueEntry::accumOffset
unsigned accumOffset() const
Definition hsa_queue_entry.hh:407

gem5::HSAQueueEntry::updateOutstandingWbs
void updateOutstandingWbs(int val)
Update the number of pending writeback requests.
Definition hsa_queue_entry.hh:400

gem5::HSAQueueEntry::_ldsSize
int _ldsSize
Definition hsa_queue_entry.hh:519

gem5::HSAQueueEntry::updateOutstandingInvs
void updateOutstandingInvs(int val)
update the number of pending invalidate requests
Definition hsa_queue_entry.hh:363

gem5::divCeil
static constexpr T divCeil(const T &a, const U &b)
Definition intmath.hh:110

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220

hsa_packet.hh

hsa_queue.hh

intmath.hh

kernel_code.hh

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::X86ISA::val
Bitfield< 63 > val
Definition misc.hh:804

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::AMDKernelCode
struct gem5::GEM5_PACKED AMDKernelCode

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::KernargPreloadPktSize
constexpr int KernargPreloadPktSize
The number of bytes after the dispatch packet which contain kernel arguments that should be preloaded...
Definition kernel_code.hh:84

gem5::WorkgroupIdX
@ WorkgroupIdX
Definition kernel_code.hh:64

gem5::DispatchId
@ DispatchId
Definition kernel_code.hh:60

gem5::DispatchPtr
@ DispatchPtr
Definition kernel_code.hh:57

gem5::QueuePtr
@ QueuePtr
Definition kernel_code.hh:58

gem5::PrivSegWaveByteOffset
@ PrivSegWaveByteOffset
Definition kernel_code.hh:68

gem5::PrivateSegBuf
@ PrivateSegBuf
Definition kernel_code.hh:56

gem5::WorkgroupIdY
@ WorkgroupIdY
Definition kernel_code.hh:65

gem5::PrivateSegSize
@ PrivateSegSize
Definition kernel_code.hh:62

gem5::WorkgroupInfo
@ WorkgroupInfo
Definition kernel_code.hh:67

gem5::WorkgroupIdZ
@ WorkgroupIdZ
Definition kernel_code.hh:66

gem5::FlatScratchInit
@ FlatScratchInit
Definition kernel_code.hh:61

gem5::KernargPreload
@ KernargPreload
Definition kernel_code.hh:63

gem5::KernargSegPtr
@ KernargSegPtr
Definition kernel_code.hh:59

gem5::WorkitemIdX
@ WorkitemIdX
Definition kernel_code.hh:74

gem5::WorkitemIdZ
@ WorkitemIdZ
Definition kernel_code.hh:76

gem5::WorkitemIdY
@ WorkitemIdY
Definition kernel_code.hh:75

gem5::GEM5_PACKED::enable_sgpr_flat_scratch_init
uint32_t enable_sgpr_flat_scratch_init
Definition kernel_code.hh:157

gem5::GEM5_PACKED::enable_sgpr_queue_ptr
uint32_t enable_sgpr_queue_ptr
Definition kernel_code.hh:154

gem5::GEM5_PACKED::enable_private_segment
uint32_t enable_private_segment
Definition kernel_code.hh:129

gem5::GEM5_PACKED::enable_sgpr_workgroup_id_y
uint32_t enable_sgpr_workgroup_id_y
Definition kernel_code.hh:133

gem5::GEM5_PACKED::enable_sgpr_dispatch_ptr
uint32_t enable_sgpr_dispatch_ptr
Definition kernel_code.hh:153

gem5::GEM5_PACKED::enable_sgpr_workgroup_id_z
uint32_t enable_sgpr_workgroup_id_z
Definition kernel_code.hh:134

gem5::GEM5_PACKED::enable_sgpr_dispatch_id
uint32_t enable_sgpr_dispatch_id
Definition kernel_code.hh:156

gem5::GEM5_PACKED::enable_vgpr_workitem_id
uint32_t enable_vgpr_workitem_id
Definition kernel_code.hh:136

gem5::GEM5_PACKED::enable_sgpr_private_segment_size
uint32_t enable_sgpr_private_segment_size
Definition kernel_code.hh:158

gem5::GEM5_PACKED::enable_sgpr_kernarg_segment_ptr
uint32_t enable_sgpr_kernarg_segment_ptr
Definition kernel_code.hh:155

gem5::GEM5_PACKED::enable_sgpr_private_segment_buffer
uint32_t enable_sgpr_private_segment_buffer
Definition kernel_code.hh:152

gem5::GEM5_PACKED::enable_sgpr_workgroup_id_x
uint32_t enable_sgpr_workgroup_id_x
Definition kernel_code.hh:132

gem5::GEM5_PACKED::enable_sgpr_workgroup_info
uint32_t enable_sgpr_workgroup_info
Definition kernel_code.hh:135

gem5::_amd_queue_t
Definition hsa_queue.hh:65

gem5::_hsa_dispatch_packet_t
Definition hsa_packet.hh:54