develop/local__memory__pipeline_8cc_source.html

/*

 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "gpu-compute/local_memory_pipeline.hh"


#include "debug/GPUMem.hh"

#include "debug/GPUPort.hh"

#include "gpu-compute/compute_unit.hh"

#include "gpu-compute/gpu_dyn_inst.hh"

#include "gpu-compute/shader.hh"

#include "gpu-compute/vector_register_file.hh"

#include "gpu-compute/wavefront.hh"


namespace gem5

{


LocalMemPipeline::LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)

    : computeUnit(cu), _name(cu.name() + ".LocalMemPipeline"),

      lmQueueSize(p.local_mem_queue_size), stats(&cu)

{

}


void


LocalMemPipeline::exec()

{

    // apply any returned shared (LDS) memory operations

    GPUDynInstPtr m = !lmReturnedRequests.empty() ?

        lmReturnedRequests.front() : nullptr;


    bool accessVrf = true;

    Wavefront *w = nullptr;


    if ((m) && m->latency.rdy() && (m->isLoad() || m->isAtomicRet())) {

        w = m->wavefront();


        accessVrf = w->computeUnit->vrf[w->simdId]->

            canScheduleWriteOperandsFromLoad(w, m);


    }


    if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf &&

        computeUnit.locMemToVrfBus.rdy()

        && (computeUnit.shader->coissue_return

        || computeUnit.vectorSharedMemUnit.rdy())) {


        lmReturnedRequests.pop();

        w = m->wavefront();


        if (m->isFlat() && !m->isMemSync() && !m->isEndOfKernel()

            && m->allLanesZero()) {

            computeUnit.getTokenManager()->recvTokens(1);

        }


        DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",

                m->cu_id, m->simdId, m->wfSlotId, m->disassemble());

        m->completeAcc(m);

        w->decLGKMInstsIssued();

        w->untrackLGKMInst(m);


        if (m->isLoad() || m->isAtomicRet()) {

            w->computeUnit->vrf[w->simdId]->

                scheduleWriteOperandsFromLoad(w, m);

        }


        // Decrement outstanding request count

        computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);


        if (m->isStore() || m->isAtomic()) {

            computeUnit.shader->ScheduleAdd(&w->outstandingReqsWrLm,

                                             m->time, -1);

        }


        if (m->isLoad() || m->isAtomic()) {

            computeUnit.shader->ScheduleAdd(&w->outstandingReqsRdLm,

                                             m->time, -1);

        }


        // Mark write bus busy for appropriate amount of time

        computeUnit.locMemToVrfBus.set(m->time);

        if (computeUnit.shader->coissue_return == 0)

            w->computeUnit->vectorSharedMemUnit.set(m->time);

    }


    // If pipeline has executed a local memory instruction

    // execute local memory packet and issue the packets

    // to LDS

    if (!lmIssuedRequests.empty() && lmReturnedRequests.size() < lmQueueSize) {


        GPUDynInstPtr m = lmIssuedRequests.front();


        bool returnVal = computeUnit.sendToLds(m);

        if (!returnVal) {

            DPRINTF(GPUPort, "packet was nack'd and put in retry queue");

        }

        lmIssuedRequests.pop();

    }

}


void


LocalMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)

{

    Wavefront *wf = gpuDynInst->wavefront();

    if (gpuDynInst->isLoad()) {

        wf->rdLmReqsInPipe--;

        wf->outstandingReqsRdLm++;

    } else if (gpuDynInst->isStore()) {

        wf->wrLmReqsInPipe--;

        wf->outstandingReqsWrLm++;

    } else {

        // Atomic, both read and write

        wf->rdLmReqsInPipe--;

        wf->outstandingReqsRdLm++;

        wf->wrLmReqsInPipe--;

        wf->outstandingReqsWrLm++;

    }


    wf->outstandingReqs++;

    wf->validateRequestCounters();


    gpuDynInst->setAccessTime(curTick());

    lmIssuedRequests.push(gpuDynInst);

}


void


LocalMemPipeline::printProgress()

{

    std::cout << "LMPipe issued: " << lmIssuedRequests.size()

              << " returned: "<< lmReturnedRequests.size() << "\n";

}


LocalMemPipeline::

LocalMemPipelineStats::LocalMemPipelineStats(statistics::Group *parent)

    : statistics::Group(parent, "LocalMemPipeline"),

      ADD_STAT(loadVrfBankConflictCycles, "total number of cycles LDS data "

               "are delayed before updating the VRF")

{

}


} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

gem5::ComputeUnit
Definition compute_unit.hh:204

gem5::LocalMemPipeline::exec
void exec()
Definition local_memory_pipeline.cc:52

gem5::LocalMemPipeline::computeUnit
ComputeUnit & computeUnit
Definition local_memory_pipeline.hh:90

gem5::LocalMemPipeline::_name
const std::string _name
Definition local_memory_pipeline.hh:91

gem5::LocalMemPipeline::stats
gem5::LocalMemPipeline::LocalMemPipelineStats stats

gem5::LocalMemPipeline::name
const std::string & name() const
Definition local_memory_pipeline.hh:79

gem5::LocalMemPipeline::printProgress
void printProgress()
Definition local_memory_pipeline.cc:153

gem5::LocalMemPipeline::lmReturnedRequests
std::queue< GPUDynInstPtr > lmReturnedRequests
Definition local_memory_pipeline.hh:100

gem5::LocalMemPipeline::lmQueueSize
int lmQueueSize
Definition local_memory_pipeline.hh:92

gem5::LocalMemPipeline::LocalMemPipeline
LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
Definition local_memory_pipeline.cc:45

gem5::LocalMemPipeline::issueRequest
void issueRequest(GPUDynInstPtr gpuDynInst)
Definition local_memory_pipeline.cc:128

gem5::LocalMemPipeline::lmIssuedRequests
std::queue< GPUDynInstPtr > lmIssuedRequests
Definition local_memory_pipeline.hh:96

gem5::Wavefront
Definition wavefront.hh:62

gem5::Wavefront::rdLmReqsInPipe
int rdLmReqsInPipe
Definition wavefront.hh:190

gem5::Wavefront::validateRequestCounters
void validateRequestCounters()
Definition wavefront.cc:829

gem5::Wavefront::outstandingReqsRdLm
int outstandingReqsRdLm
Definition wavefront.hh:185

gem5::Wavefront::outstandingReqsWrLm
int outstandingReqsWrLm
Definition wavefront.hh:181

gem5::Wavefront::wrLmReqsInPipe
int wrLmReqsInPipe
Definition wavefront.hh:192

gem5::Wavefront::outstandingReqs
int outstandingReqs
Definition wavefront.hh:177

gem5::statistics::Group
Statistics container.
Definition group.hh:93

compute_unit.hh

gpu_dyn_inst.hh

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75

gem5::statistics::Group::Group
Group()=delete

local_memory_pipeline.hh

gem5::ArmISA::m
Bitfield< 0 > m
Definition misc_types.hh:482

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::MipsISA::w
Bitfield< 0 > w
Definition pra_constants.hh:281

gem5::statistics
Definition statistics.cc:57

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

shader.hh

gem5::LocalMemPipeline::LocalMemPipelineStats::LocalMemPipelineStats
LocalMemPipelineStats(statistics::Group *parent)
Definition local_memory_pipeline.cc:160

gem5::LocalMemPipeline::LocalMemPipelineStats::loadVrfBankConflictCycles
statistics::Scalar loadVrfBankConflictCycles
Definition local_memory_pipeline.hh:107

vector_register_file.hh

wavefront.hh