release/current/fetch__unit_8cc_source.html

/*

 * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "gpu-compute/fetch_unit.hh"


#include "arch/amdgpu/common/gpu_translation_state.hh"

#include "arch/amdgpu/common/tlb.hh"

#include "base/bitfield.hh"

#include "debug/GPUFetch.hh"

#include "debug/GPUPort.hh"

#include "debug/GPUTLB.hh"

#include "gpu-compute/compute_unit.hh"

#include "gpu-compute/gpu_dyn_inst.hh"

#include "gpu-compute/gpu_static_inst.hh"

#include "gpu-compute/shader.hh"

#include "gpu-compute/wavefront.hh"

#include "mem/ruby/system/RubySystem.hh"


namespace gem5

{


uint32_t FetchUnit::globalFetchUnitID;


FetchUnit::FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu)

    : timingSim(true), computeUnit(cu), fetchScheduler(p),

      waveList(nullptr), fetchDepth(p.fetch_depth)

{

}

FetchUnit::FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu) {…}


FetchUnit::~FetchUnit()

{

    fetchQueue.clear();

    fetchStatusQueue.clear();

}

FetchUnit::~FetchUnit() {…}


void


FetchUnit::init()

{

    timingSim = computeUnit.shader->timingSim;

    fetchQueue.clear();

    fetchStatusQueue.resize(computeUnit.shader->n_wf);

    fetchBuf.resize(computeUnit.shader->n_wf, FetchBufDesc());


    for (int i = 0; i < computeUnit.shader->n_wf; ++i) {

        Wavefront *wf = waveList->at(i);

        assert(wf->wfSlotId == i);

        fetchStatusQueue[i] = std::make_pair(wf, false);

        fetchBuf[i].allocateBuf(fetchDepth, computeUnit.cacheLineSize(), wf);

        fetchBuf[i].decoder(&decoder);

    }


    fetchScheduler.bindList(&fetchQueue);

}

FetchUnit::init() {…}


void


FetchUnit::exec()

{

    for (auto &fetch_buf : fetchBuf) {

        if (!fetch_buf.hasFreeSpace()) {

            fetch_buf.checkWaveReleaseBuf();

        }

        if (fetch_buf.hasFetchDataToProcess()) {

            fetch_buf.decodeInsts();

        }

    }


    // re-evaluate waves which are marked as not ready for fetch

    for (int j = 0; j < computeUnit.shader->n_wf; ++j) {

        // Following code assumes 64-bit opertaion and all insts are

        // represented by 64-bit pointers to inst objects.

        Wavefront *curWave = fetchStatusQueue[j].first;

        assert (curWave);


        // The wavefront has to be active, the IB occupancy has to be

        // 4 or less instructions and it can not have any branches to

        // prevent speculative instruction fetches

        if (!fetchStatusQueue[j].second) {

            if ((curWave->getStatus() == Wavefront::S_RUNNING ||

                curWave->getStatus() == Wavefront::S_WAITCNT) &&

                fetchBuf[j].hasFreeSpace() &&

                !curWave->stopFetch() &&

                !curWave->pendingFetch) {

                fetchQueue.push_back(curWave);

                fetchStatusQueue[j].second = true;

            }

        }

    }


    // Fetch only if there is some wave ready to be fetched

    // An empty fetchQueue will cause the schedular to panic

    if (fetchQueue.size()) {

        Wavefront *waveToBeFetched = fetchScheduler.chooseWave();

        waveToBeFetched->pendingFetch = true;

        fetchStatusQueue[waveToBeFetched->wfSlotId].second = false;

        initiateFetch(waveToBeFetched);

    }

}

FetchUnit::exec() {…}


void


FetchUnit::initiateFetch(Wavefront *wavefront)

{

    assert(fetchBuf.at(wavefront->wfSlotId).hasFreeSpace());


    Addr vaddr = fetchBuf.at(wavefront->wfSlotId).nextFetchAddr();


    // this should already be aligned to a cache line

    assert(vaddr == ruby::makeLineAddress(vaddr,

           computeUnit.getCacheLineBits()));


    // shouldn't be fetching a line that is already buffered

    assert(!fetchBuf.at(wavefront->wfSlotId).pcBuffered(vaddr));


    fetchBuf.at(wavefront->wfSlotId).reserveBuf(vaddr);


    DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Id%d: Initiate fetch "

            "from pc: %d %#x\n", computeUnit.cu_id, wavefront->simdId,

            wavefront->wfSlotId, wavefront->wfDynId, wavefront->pc(), vaddr);


    DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",

            computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);


    // set up virtual request

    RequestPtr req = std::make_shared<Request>(

        vaddr, computeUnit.cacheLineSize(), Request::INST_FETCH,

        computeUnit.requestorId(), 0, 0, nullptr);


    PacketPtr pkt = new Packet(req, MemCmd::ReadReq);


    if (timingSim) {

        // SenderState needed on Return

        pkt->senderState = new ComputeUnit::ITLBPort::SenderState(wavefront);


        // Sender State needed by TLB hierarchy

        pkt->senderState =

            new GpuTranslationState(BaseMMU::Execute,

                                                 computeUnit.shader->gpuTc,

                                                 false, pkt->senderState);


        if (computeUnit.sqcTLBPort.isStalled()) {

            assert(computeUnit.sqcTLBPort.retries.size() > 0);


            DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",

                    vaddr);


            computeUnit.sqcTLBPort.retries.push_back(pkt);

        } else if (!computeUnit.sqcTLBPort.sendTimingReq(pkt)) {

            // Stall the data port;

            // No more packet is issued till

            // ruby indicates resources are freed by

            // a recvReqRetry() call back on this port.

            computeUnit.sqcTLBPort.stallPort();


            DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",

                    vaddr);


            computeUnit.sqcTLBPort.retries.push_back(pkt);

        } else {

            DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n", vaddr);

        }

    } else {

        pkt->senderState =

            new GpuTranslationState(BaseMMU::Execute,

                                                 computeUnit.shader->gpuTc);


        computeUnit.sqcTLBPort.sendFunctional(pkt);


        if (!pkt->req->systemReq()) {

            pkt->req->requestorId(computeUnit.vramRequestorId());

        }


        GpuTranslationState *sender_state =

             safe_cast<GpuTranslationState*>(pkt->senderState);


        delete sender_state->tlbEntry;

        delete sender_state;

        // fetch the instructions from the SQC when we operate in

        // functional mode only

        fetch(pkt, wavefront);

    }

}

FetchUnit::initiateFetch(Wavefront *wavefront) {…}


void


FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront)

{

    assert(pkt->req->hasPaddr());

    assert(pkt->req->hasSize());


    DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch Access: %#x\n",

            computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,

            pkt->req->getPaddr());


    PacketPtr oldPkt = pkt;

    pkt = new Packet(oldPkt->req, oldPkt->cmd);

    delete oldPkt;


    if (!fetchBuf.at(wavefront->wfSlotId).isReserved(pkt->req->getVaddr())) {

        wavefront->dropFetch = false;

        wavefront->pendingFetch = false;

        return;

    }


    if (!pkt->req->systemReq()) {

        pkt->req->requestorId(computeUnit.vramRequestorId());

    }


    pkt->dataStatic(fetchBuf.at(wavefront->wfSlotId)

                    .reservedBuf(pkt->req->getVaddr()));


    // New SenderState for the memory access

    pkt->senderState = new ComputeUnit::SQCPort::SenderState(wavefront);


    if (timingSim) {

        // translation is done. Send the appropriate timing memory request.


        if (pkt->req->systemReq()) {

            SystemHubEvent *resp_event = new SystemHubEvent(pkt, this);

            assert(computeUnit.shader->systemHub);

            computeUnit.shader->systemHub->sendRequest(pkt, resp_event);

        } else if (!computeUnit.sqcPort.sendTimingReq(pkt)) {

            computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,

                                                                   wavefront));


            DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",

                    computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,

                    pkt->req->getPaddr());

        } else {

            DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",

                    computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,

                    pkt->req->getPaddr());

        }

    } else {

        computeUnit.sqcPort.sendFunctional(pkt);

        processFetchReturn(pkt);

    }

}

FetchUnit::fetch(PacketPtr pkt, Wavefront *wavefront) {…}


void


FetchUnit::processFetchReturn(PacketPtr pkt)

{

    ComputeUnit::SQCPort::SenderState *sender_state =

        safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->senderState);


    Wavefront *wavefront = sender_state->wavefront;


    DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch addr %#x returned "

            "%d bytes!\n", computeUnit.cu_id, wavefront->simdId,

            wavefront->wfSlotId, pkt->req->getPaddr(), pkt->req->getSize());


    if (wavefront->dropFetch) {

        assert(wavefront->instructionBuffer.empty());

        assert(!fetchBuf.at(wavefront->wfSlotId).hasFetchDataToProcess());

        wavefront->dropFetch = false;

    } else {

        fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt);

    }


    wavefront->pendingFetch = false;


    delete pkt->senderState;

    delete pkt;

}

FetchUnit::processFetchReturn(PacketPtr pkt) {…}


void


FetchUnit::flushBuf(int wfSlotId)

{

    fetchBuf.at(wfSlotId).flushBuf();

}

FetchUnit::flushBuf(int wfSlotId) {…}


void


FetchUnit::bindWaveList(std::vector<Wavefront*> *wave_list)

{

    waveList = wave_list;

}

FetchUnit::bindWaveList(std::vector<Wavefront*> *wave_list) {…}


void


FetchUnit::FetchBufDesc::allocateBuf(int fetch_depth, int cache_line_size,

                                     Wavefront *wf)

{

    wavefront = wf;

    fetchDepth = fetch_depth;

    maxIbSize = wavefront->maxIbSize;

    cacheLineSize = cache_line_size;

    maxFbSize = cacheLineSize * fetchDepth;


    // Calculate the number of bits to address a cache line

    panic_if(!isPowerOf2(cacheLineSize),

        "Cache line size should be a power of two.");

    cacheLineBits = floorLog2(cacheLineSize);


    bufStart = new uint8_t[maxFbSize];

    readPtr = bufStart;

    bufEnd = bufStart + maxFbSize;


    for (int i = 0; i < fetchDepth; ++i) {

        freeList.emplace_back(readPtr + i * cacheLineSize);

    }

}

FetchUnit::FetchBufDesc::allocateBuf(int fetch_depth, int cache_line_size, {…}


void


FetchUnit::FetchBufDesc::flushBuf()

{

    restartFromBranch = true;

    freeList.clear();

    bufferedPCs.clear();

    reservedPCs.clear();

    readPtr = bufStart;


    for (int i = 0; i < fetchDepth; ++i) {

        freeList.push_back(bufStart + i * cacheLineSize);

    }


    DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "

            "buffer\n", wavefront->simdId, wavefront->wfSlotId,

            wavefront->wfDynId);

}

FetchUnit::FetchBufDesc::flushBuf() {…}


Addr


FetchUnit::FetchBufDesc::nextFetchAddr()

{

    Addr next_line = 0;


    if (bufferedAndReservedLines()) {

        Addr last_line_fetched = 0;

        if (!reservedLines()) {

            last_line_fetched = bufferedPCs.rbegin()->first;

        } else {

            last_line_fetched = reservedPCs.rbegin()->first;

        }


        next_line = last_line_fetched + cacheLineSize;


        assert(bufferedPCs.find(next_line) == bufferedPCs.end());

        assert(reservedPCs.find(next_line) == reservedPCs.end());

    } else {

        next_line = ruby::makeLineAddress(wavefront->pc(), cacheLineBits);

        readPtr = bufStart;


        if (restartFromBranch) {

            restartFromBranch = false;

            int byte_offset

                = wavefront->pc() - ruby::makeLineAddress(wavefront->pc(),

                                    cacheLineBits);

            readPtr += byte_offset;

        }

    }


    return next_line;

}

FetchUnit::FetchBufDesc::nextFetchAddr() {…}


void


FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)

{

    // we should have free buffer space, and the line

    // at vaddr should not already be cached.

    assert(hasFreeSpace());

    assert(bufferedPCs.find(vaddr) == bufferedPCs.end());

    assert(reservedPCs.find(vaddr) == reservedPCs.end());

    assert(bufferedAndReservedLines() < fetchDepth);


    DPRINTF(GPUFetch, "WF[%d][%d]: Id%d reserved fetch buffer entry "

            "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,

            wavefront->wfDynId, vaddr);


    uint8_t *inst_buf = freeList.front();

    reservedPCs.emplace(vaddr, inst_buf);

    freeList.pop_front();

}

FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr) {…}


void


FetchUnit::FetchBufDesc::fetchDone(PacketPtr pkt)

{

    // If the return command is MemSyncResp, then it belongs to

    // an SQC invalidation request. This request calls

    // incLGKMInstsIssued() function in its execution path.

    // Since there is no valid memory return response associated with

    // this instruction, decLGKMInstsIssued() is not executed. Do this

    // here to decrement the counter and invalidate all buffers

    if (pkt->cmd == MemCmd::MemSyncResp) {

        wavefront->decLGKMInstsIssued();

        flushBuf();

        restartFromBranch = false;

        return;

    }


    Addr vaddr = pkt->req->getVaddr();


    assert(bufferedPCs.find(vaddr) == bufferedPCs.end());

    DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",

            wavefront->simdId, wavefront->wfSlotId,

            wavefront->wfDynId, vaddr);


    auto reserved_pc = reservedPCs.find(vaddr);

    assert(reserved_pc != reservedPCs.end());

    bufferedPCs.emplace(vaddr, reserved_pc->second);


    if (readPtr == bufEnd) {

        readPtr = bufStart;

    }


    reserved_pc->second = nullptr;

    reservedPCs.erase(reserved_pc);

}

FetchUnit::FetchBufDesc::fetchDone(PacketPtr pkt) {…}


bool


FetchUnit::FetchBufDesc::hasFetchDataToProcess() const

{

    return fetchBytesRemaining() >= sizeof(TheGpuISA::RawMachInst);

}

FetchUnit::FetchBufDesc::hasFetchDataToProcess() const {…}


void


FetchUnit::FetchBufDesc::checkWaveReleaseBuf()

{

    Addr cur_wave_pc = roundDown(wavefront->pc(),

                                 wavefront->computeUnit->cacheLineSize());

    if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {

        DPRINTF(GPUFetch, "WF[%d][%d]: Id%d current wave PC(%#x) still "

                "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,

                wavefront->wfDynId, cur_wave_pc);


        // should be reserved, but not buffered yet

        assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());


        return;

    }


    auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);

    auto oldest_buffered_pc = bufferedPCs.begin();


    DPRINTF(GPUFetch, "WF[%d][%d]: Id%d checking if PC block addr = %#x"

            "(PC = %#x) can be released.\n", wavefront->simdId,

            wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,

            wavefront->pc());


#ifdef GEM5_DEBUG

    int idx = 0;

    for (const auto &buf_pc : bufferedPCs) {

        DPRINTF(GPUFetch, "PC[%d] = %#x\n", idx, buf_pc.first);

        ++idx;

    }

#endif


    // if we haven't buffered data for this PC, we shouldn't

    // be fetching from it.

    assert(current_buffered_pc != bufferedPCs.end());


    if (current_buffered_pc != oldest_buffered_pc) {

        DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for PC = %#x, "

                "removing it from the fetch buffer.\n", wavefront->simdId,

                wavefront->wfSlotId, wavefront->wfDynId,

                oldest_buffered_pc->first);


        freeList.emplace_back(oldest_buffered_pc->second);

        oldest_buffered_pc->second = nullptr;

        bufferedPCs.erase(oldest_buffered_pc);

        DPRINTF(GPUFetch, "WF[%d][%d]: Id%d has %d lines buffered.\n",

                wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,

                bufferedLines());

    }

}

FetchUnit::FetchBufDesc::checkWaveReleaseBuf() {…}


void


FetchUnit::FetchBufDesc::decodeInsts()

{

    assert(readPtr);


    if (splitDecode()) {

        decodeSplitInst();

    }


    while (wavefront->instructionBuffer.size() < maxIbSize

           && hasFetchDataToProcess()) {

        if (splitDecode()) {

            decodeSplitInst();

        } else {

            TheGpuISA::MachInst mach_inst

                = reinterpret_cast<TheGpuISA::MachInst>(readPtr);

            GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);

            readPtr += gpu_static_inst->instSize();


            assert(readPtr <= bufEnd);


            GPUDynInstPtr gpu_dyn_inst

                = std::make_shared<GPUDynInst>(wavefront->computeUnit,

                                               wavefront, gpu_static_inst,

                                               wavefront->computeUnit->

                                                getAndIncSeqNum());

            wavefront->instructionBuffer.push_back(gpu_dyn_inst);


            DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "

                    "%d bytes remain.\n", wavefront->simdId,

                    wavefront->wfSlotId, wavefront->wfDynId,

                    gpu_static_inst->disassemble(),

                    gpu_static_inst->instSize(),

                    fetchBytesRemaining());

        }

    }

}

FetchUnit::FetchBufDesc::decodeInsts() {…}


void


FetchUnit::FetchBufDesc::decodeSplitInst()

{

    TheGpuISA::RawMachInst split_inst = 0;

    int dword_size = sizeof(uint32_t);

    int num_dwords = sizeof(TheGpuISA::RawMachInst) / dword_size;


    for (int i = 0; i < num_dwords; ++i) {

        replaceBits(split_inst, 32*(i+1)-1, 32*i,

            *reinterpret_cast<uint32_t*>(readPtr));

        if (readPtr + dword_size >= bufEnd) {

            readPtr = bufStart;

        }

    }


    assert(readPtr == bufStart);


    TheGpuISA::MachInst mach_inst

        = reinterpret_cast<TheGpuISA::MachInst>(&split_inst);

    GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);

    readPtr += (gpu_static_inst->instSize() - dword_size);

    assert(readPtr < bufEnd);


    GPUDynInstPtr gpu_dyn_inst

        = std::make_shared<GPUDynInst>(wavefront->computeUnit,

                                       wavefront, gpu_static_inst,

                                       wavefront->computeUnit->

                                           getAndIncSeqNum());

    wavefront->instructionBuffer.push_back(gpu_dyn_inst);


    DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "

            "(%d bytes). %d bytes remain in %d buffered lines.\n",

            wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,

            gpu_static_inst->disassemble(), split_inst,

            gpu_static_inst->instSize(), fetchBytesRemaining(),

            bufferedLines());

}

FetchUnit::FetchBufDesc::decodeSplitInst() {…}


bool


FetchUnit::FetchBufDesc::splitDecode() const

{

    bool is_split = (readPtr + sizeof(TheGpuISA::RawMachInst)) > bufEnd;


    return is_split;

}

FetchUnit::FetchBufDesc::splitDecode() const {…}


int


FetchUnit::FetchBufDesc::fetchBytesRemaining() const

{

    int bytes_remaining = 0;


    if (bufferedLines() && readPtr != bufEnd) {

        auto last_buf_pc = bufferedPCs.rbegin();

        uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;

        int byte_diff = end_ptr - readPtr;


        if (end_ptr > readPtr) {

            bytes_remaining = byte_diff;

        } else if (end_ptr < readPtr) {

            bytes_remaining = bufferedBytes() + byte_diff;

        }

    }


    assert(bytes_remaining <= bufferedBytes());

    return bytes_remaining;

}

FetchUnit::FetchBufDesc::fetchBytesRemaining() const {…}


void


FetchUnit::SystemHubEvent::process()

{

    reqPkt->makeResponse();

    fetchUnit->computeUnit.handleSQCReturn(reqPkt);

}

FetchUnit::SystemHubEvent::process() {…}


} // namespace gem5

RubySystem.hh

tlb.hh

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

bitfield.hh

gem5::AMDGPUSystemHub::sendRequest
void sendRequest(PacketPtr pkt, Event *callback)
Definition system_hub.cc:42

gem5::BaseMMU::Execute
@ Execute
Definition mmu.hh:56

gem5::ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition compute_unit.hh:828

gem5::ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition compute_unit.hh:821

gem5::ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition compute_unit.hh:820

gem5::ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition compute_unit.hh:718

gem5::ComputeUnit
Definition compute_unit.hh:203

gem5::ComputeUnit::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from shader.
Definition compute_unit.cc:2215

gem5::ComputeUnit::sqcPort
SQCPort sqcPort
Definition compute_unit.hh:947

gem5::ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition compute_unit.hh:423

gem5::ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition compute_unit.hh:422

gem5::ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition compute_unit.hh:949

gem5::ComputeUnit::cu_id
int cu_id
Definition compute_unit.hh:293

gem5::ComputeUnit::shader
Shader * shader
Definition compute_unit.hh:358

gem5::ComputeUnit::requestorId
RequestorID requestorId()
Definition compute_unit.hh:471

gem5::FetchUnit::FetchBufDesc
fetch buffer descriptor.
Definition fetch_unit.hh:75

gem5::FetchUnit::FetchBufDesc::nextFetchAddr
Addr nextFetchAddr()
Definition fetch_unit.cc:392

gem5::FetchUnit::FetchBufDesc::decodeSplitInst
void decodeSplitInst()
Definition fetch_unit.cc:613

gem5::FetchUnit::FetchBufDesc::reserveBuf
void reserveBuf(Addr vaddr)
reserve an entry in the fetch buffer for PC = vaddr,
Definition fetch_unit.cc:447

gem5::FetchUnit::FetchBufDesc::flushBuf
void flushBuf()
Definition fetch_unit.cc:370

gem5::FetchUnit::FetchBufDesc::readPtr
uint8_t * readPtr
pointer that points to the next chunk of inst data to be decoded.
Definition fetch_unit.hh:227

gem5::FetchUnit::FetchBufDesc::fetchBytesRemaining
int fetchBytesRemaining() const
calculates the number of fetched bytes that have yet to be decoded.
Definition fetch_unit.cc:663

gem5::FetchUnit::FetchBufDesc::checkWaveReleaseBuf
void checkWaveReleaseBuf()
checks if the wavefront can release any of its fetch buffer entries.
Definition fetch_unit.cc:518

gem5::FetchUnit::FetchBufDesc::hasFetchDataToProcess
bool hasFetchDataToProcess() const
checks if the buffer contains valid data.
Definition fetch_unit.cc:512

gem5::FetchUnit::FetchBufDesc::bufEnd
uint8_t * bufEnd
Definition fetch_unit.hh:222

gem5::FetchUnit::FetchBufDesc::fetchDepth
int fetchDepth
Definition fetch_unit.hh:229

gem5::FetchUnit::FetchBufDesc::allocateBuf
void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf)
allocate the fetch buffer space, and set the fetch depth (number of lines that may be buffered),...
Definition fetch_unit.cc:346

gem5::FetchUnit::FetchBufDesc::wavefront
Wavefront * wavefront
Definition fetch_unit.hh:238

gem5::FetchUnit::FetchBufDesc::maxIbSize
int maxIbSize
Definition fetch_unit.hh:231

gem5::FetchUnit::FetchBufDesc::maxFbSize
int maxFbSize
Definition fetch_unit.hh:233

gem5::FetchUnit::FetchBufDesc::fetchDone
void fetchDone(PacketPtr ptr)
Definition fetch_unit.cc:472

gem5::FetchUnit::FetchBufDesc::cacheLineSize
int cacheLineSize
Definition fetch_unit.hh:234

gem5::FetchUnit::FetchBufDesc::splitDecode
bool splitDecode() const
check if the next instruction to be processed out of the fetch buffer is split across the end/beginni...
Definition fetch_unit.cc:651

gem5::FetchUnit::FetchBufDesc::decodeInsts
void decodeInsts()
each time the fetch stage is ticked, we check if there are any data in the fetch buffer that may be d...
Definition fetch_unit.cc:575

gem5::FetchUnit::FetchBufDesc::bufStart
uint8_t * bufStart
raw instruction buffer.
Definition fetch_unit.hh:221

gem5::FetchUnit::FetchBufDesc::cacheLineBits
int cacheLineBits
Definition fetch_unit.hh:235

gem5::FetchUnit::FetchBufDesc::freeList
std::deque< uint8_t * > freeList
represents the fetch buffer free list.
Definition fetch_unit.hh:215

gem5::FetchUnit::SystemHubEvent
Definition fetch_unit.hh:243

gem5::FetchUnit::SystemHubEvent::process
void process()
Definition fetch_unit.cc:684

gem5::FetchUnit::init
void init()
Definition fetch_unit.cc:65

gem5::FetchUnit::globalFetchUnitID
static uint32_t globalFetchUnitID
Definition fetch_unit.hh:67

gem5::FetchUnit::~FetchUnit
~FetchUnit()
Definition fetch_unit.cc:58

gem5::FetchUnit::fetchScheduler
Scheduler fetchScheduler
Definition fetch_unit.hh:265

gem5::FetchUnit::waveList
std::vector< Wavefront * > * waveList
Definition fetch_unit.hh:277

gem5::FetchUnit::bindWaveList
void bindWaveList(std::vector< Wavefront * > *list)
Definition fetch_unit.cc:339

gem5::FetchUnit::FetchUnit
FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu)
Definition fetch_unit.cc:52

gem5::FetchUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)
Definition fetch_unit.cc:230

gem5::FetchUnit::fetchQueue
std::vector< Wavefront * > fetchQueue
Definition fetch_unit.hh:269

gem5::FetchUnit::timingSim
bool timingSim
Definition fetch_unit.hh:257

gem5::FetchUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)
Definition fetch_unit.cc:136

gem5::FetchUnit::exec
void exec()
Definition fetch_unit.cc:84

gem5::FetchUnit::fetchDepth
int fetchDepth
number of cache lines we can fetch and buffer.
Definition fetch_unit.hh:286

gem5::FetchUnit::decoder
TheGpuISA::Decoder decoder
Definition fetch_unit.hh:259

gem5::FetchUnit::computeUnit
ComputeUnit & computeUnit
Definition fetch_unit.hh:258

gem5::FetchUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)
Definition fetch_unit.cc:307

gem5::FetchUnit::fetchBuf
std::vector< FetchBufDesc > fetchBuf
Definition fetch_unit.hh:279

gem5::FetchUnit::flushBuf
void flushBuf(int wfSlotId)
Definition fetch_unit.cc:333

gem5::FetchUnit::fetchStatusQueue
std::vector< std::pair< Wavefront *, bool > > fetchStatusQueue
Definition fetch_unit.hh:274

gem5::GPUStaticInst
Definition gpu_static_inst.hh:62

gem5::GPUStaticInst::disassemble
const std::string & disassemble()
Definition gpu_static_inst.cc:47

gem5::GPUStaticInst::instSize
virtual int instSize() const =0

gem5::MemCmd::MemSyncResp
@ MemSyncResp
Definition packet.hh:125

gem5::MemCmd::ReadReq
@ ReadReq
Definition packet.hh:87

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295

gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition packet.hh:372

gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603

gem5::RequestPort::sendFunctional
void sendFunctional(PacketPtr pkt) const
Send a functional request packet, where the data is instantly updated everywhere in the memory system...
Definition port.hh:579

gem5::Request::INST_FETCH
@ INST_FETCH
The request was an instruction fetch.
Definition request.hh:115

gem5::Scheduler::bindList
void bindList(std::vector< Wavefront * > *sched_list)
Definition scheduler.cc:59

gem5::Scheduler::chooseWave
Wavefront * chooseWave()
Definition scheduler.cc:53

gem5::Shader::timingSim
bool timingSim
Definition shader.hh:233

gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition shader.hh:124

gem5::Shader::n_wf
int n_wf
Definition shader.hh:247

gem5::Shader::systemHub
AMDGPUSystemHub * systemHub
Definition shader.hh:272

gem5::Wavefront
Definition wavefront.hh:61

gem5::Wavefront::pc
Addr pc() const
Definition wavefront.cc:1439

gem5::Wavefront::maxIbSize
int maxIbSize
Definition wavefront.hh:109

gem5::Wavefront::simdId
const int simdId
Definition wavefront.hh:101

gem5::Wavefront::dropFetch
bool dropFetch
Definition wavefront.hh:114

gem5::Wavefront::pendingFetch
bool pendingFetch
Definition wavefront.hh:113

gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition wavefront.hh:111

gem5::Wavefront::getStatus
status_e getStatus()
Definition wavefront.hh:141

gem5::Wavefront::stopFetch
bool stopFetch()
Definition wavefront.cc:782

gem5::Wavefront::wfSlotId
const int wfSlotId
Definition wavefront.hh:98

gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition wavefront.hh:88

gem5::Wavefront::S_RUNNING
@ S_RUNNING
Definition wavefront.hh:70

gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition wavefront.hh:233

std::vector
STL vector class.
Definition stl.hh:37

compute_unit.hh

fetch_unit.hh

gpu_static_inst.hh

gpu_dyn_inst.hh

gpu_translation_state.hh

gem5::floorLog2
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition intmath.hh:59

gem5::isPowerOf2
static constexpr bool isPowerOf2(const T &n)
Definition intmath.hh:98

gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:279

gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::MipsISA::vaddr
vaddr
Definition pra_constants.hh:278

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::ruby::makeLineAddress
Addr makeLineAddress(Addr addr, int cacheLineBits)
Definition Address.cc:61

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

shader.hh

gem5::ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition compute_unit.hh:834

gem5::ComputeUnit::SQCPort::SenderState
Definition compute_unit.hh:683

gem5::ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition compute_unit.hh:684

gem5::GpuTranslationState
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition gpu_translation_state.hh:59

gem5::GpuTranslationState::tlbEntry
Serializable * tlbEntry
Definition gpu_translation_state.hh:73

wavefront.hh