release/current/o3_2lsq_8cc_source.html

/*

 * Copyright (c) 2011-2012, 2014, 2017-2019, 2021 ARM Limited

 * Copyright (c) 2013 Advanced Micro Devices, Inc.

 * All rights reserved

 *

 * The license below extends only to copyright in the software and shall

 * not be construed as granting a license to any other intellectual

 * property including but not limited to intellectual property relating

 * to a hardware implementation of the functionality of the software

 * licensed hereunder.  You may use the software subject to the license

 * terms below provided that you ensure that this notice is replicated

 * unmodified and in its entirety in all distributions of the software,

 * modified or unmodified, in source code or in binary form.

 *

 * Copyright (c) 2005-2006 The Regents of The University of Michigan

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are

 * met: redistributions of source code must retain the above copyright

 * notice, this list of conditions and the following disclaimer;

 * redistributions in binary form must reproduce the above copyright

 * notice, this list of conditions and the following disclaimer in the

 * documentation and/or other materials provided with the distribution;

 * neither the name of the copyright holders nor the names of its

 * contributors may be used to endorse or promote products derived from

 * this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 */


#include "cpu/o3/lsq.hh"


#include <algorithm>

#include <list>

#include <string>


#include "base/compiler.hh"

#include "base/logging.hh"

#include "cpu/o3/cpu.hh"

#include "cpu/o3/dyn_inst.hh"

#include "cpu/o3/iew.hh"

#include "cpu/o3/limits.hh"

#include "debug/Drain.hh"

#include "debug/Fetch.hh"

#include "debug/HtmCpu.hh"

#include "debug/LSQ.hh"

#include "debug/Writeback.hh"

#include "params/BaseO3CPU.hh"


namespace gem5

{


namespace o3

{


LSQ::DcachePort::DcachePort(LSQ *_lsq, CPU *_cpu) :

    RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu)

{}


LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)

    : cpu(cpu_ptr), iewStage(iew_ptr),

      _cacheBlocked(false),

      cacheStorePorts(params.cacheStorePorts), usedStorePorts(0),

      cacheLoadPorts(params.cacheLoadPorts), usedLoadPorts(0),

      waitingForStaleTranslation(false),

      staleTranslationWaitTxnId(0),

      lsqPolicy(params.smtLSQPolicy),

      LQEntries(params.LQEntries),

      SQEntries(params.SQEntries),

      maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params.numThreads,

                  params.smtLSQThreshold)),

      maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params.numThreads,

                  params.smtLSQThreshold)),

      dcachePort(this, cpu_ptr),

      numThreads(params.numThreads)

{

    assert(numThreads > 0 && numThreads <= MaxThreads);


    //**********************************************

    //************ Handle SMT Parameters ***********

    //**********************************************


    /* Run SMT olicy checks. */

        if (lsqPolicy == SMTQueuePolicy::Dynamic) {

        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");

    } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {

        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "

                "%i entries per LQ | %i entries per SQ\n",

                maxLQEntries,maxSQEntries);

    } else if (lsqPolicy == SMTQueuePolicy::Threshold) {


        assert(params.smtLSQThreshold > params.LQEntries);

        assert(params.smtLSQThreshold > params.SQEntries);


        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "

                "%i entries per LQ | %i entries per SQ\n",

                maxLQEntries,maxSQEntries);

    } else {

        panic("Invalid LSQ sharing policy. Options are: Dynamic, "

                    "Partitioned, Threshold");

    }


    thread.reserve(numThreads);

    for (ThreadID tid = 0; tid < numThreads; tid++) {

        thread.emplace_back(maxLQEntries, maxSQEntries);

        thread[tid].init(cpu, iew_ptr, params, this, tid);

        thread[tid].setDcachePort(&dcachePort);

    }

}


std::string


LSQ::name() const

{

    return iewStage->name() + ".lsq";

}


void


LSQ::setActiveThreads(std::list<ThreadID> *at_ptr)

{

    activeThreads = at_ptr;

    assert(activeThreads != 0);

}


void


LSQ::drainSanityCheck() const

{

    assert(isDrained());


    for (ThreadID tid = 0; tid < numThreads; tid++)

        thread[tid].drainSanityCheck();

}


bool


LSQ::isDrained() const

{

    bool drained(true);


    if (!lqEmpty()) {

        DPRINTF(Drain, "Not drained, LQ not empty.\n");

        drained = false;

    }


    if (!sqEmpty()) {

        DPRINTF(Drain, "Not drained, SQ not empty.\n");

        drained = false;

    }


    return drained;

}


void


LSQ::takeOverFrom()

{

    usedStorePorts = 0;

    _cacheBlocked = false;


    for (ThreadID tid = 0; tid < numThreads; tid++) {

        thread[tid].takeOverFrom();

    }

}


void


LSQ::tick()

{

    // Re-issue loads which got blocked on the per-cycle load ports limit.

    if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)

        iewStage->cacheUnblocked();


    usedLoadPorts = 0;

    usedStorePorts = 0;

}


bool


LSQ::cacheBlocked() const

{

    return _cacheBlocked;

}


void


LSQ::cacheBlocked(bool v)

{

    _cacheBlocked = v;

}


bool


LSQ::cachePortAvailable(bool is_load) const

{

    bool ret;

    if (is_load) {

        ret  = usedLoadPorts < cacheLoadPorts;

    } else {

        ret  = usedStorePorts < cacheStorePorts;

    }

    return ret;

}


void


LSQ::cachePortBusy(bool is_load)

{

    assert(cachePortAvailable(is_load));

    if (is_load) {

        usedLoadPorts++;

    } else {

        usedStorePorts++;

    }

}


void


LSQ::insertLoad(const DynInstPtr &load_inst)

{

    ThreadID tid = load_inst->threadNumber;


    thread[tid].insertLoad(load_inst);

}


void


LSQ::insertStore(const DynInstPtr &store_inst)

{

    ThreadID tid = store_inst->threadNumber;


    thread[tid].insertStore(store_inst);

}


Fault


LSQ::executeLoad(const DynInstPtr &inst)

{

    ThreadID tid = inst->threadNumber;


    return thread[tid].executeLoad(inst);

}


Fault


LSQ::executeStore(const DynInstPtr &inst)

{

    ThreadID tid = inst->threadNumber;


    return thread[tid].executeStore(inst);

}


void


LSQ::commitLoads(InstSeqNum &youngest_inst, ThreadID tid)

{

    thread.at(tid).commitLoads(youngest_inst);

}


void


LSQ::commitStores(InstSeqNum &youngest_inst, ThreadID tid)

{

    thread.at(tid).commitStores(youngest_inst);

}


void


LSQ::writebackStores()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (numStoresToWB(tid) > 0) {

            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "

                "available for Writeback.\n", tid, numStoresToWB(tid));

        }


        thread[tid].writebackStores();

    }

}


void


LSQ::squash(const InstSeqNum &squashed_num, ThreadID tid)

{

    thread.at(tid).squash(squashed_num);

}


bool


LSQ::violation()

{

    /* Answers: Does Anybody Have a Violation?*/

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (thread[tid].violation())

            return true;

    }


    return false;

}


bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }


DynInstPtr


LSQ::getMemDepViolator(ThreadID tid)

{

    return thread.at(tid).getMemDepViolator();

}


int


LSQ::getLoadHead(ThreadID tid)

{

    return thread.at(tid).getLoadHead();

}


InstSeqNum


LSQ::getLoadHeadSeqNum(ThreadID tid)

{

    return thread.at(tid).getLoadHeadSeqNum();

}


int


LSQ::getStoreHead(ThreadID tid)

{

    return thread.at(tid).getStoreHead();

}


InstSeqNum


LSQ::getStoreHeadSeqNum(ThreadID tid)

{

    return thread.at(tid).getStoreHeadSeqNum();

}


int LSQ::getCount(ThreadID tid) { return thread.at(tid).getCount(); }


int LSQ::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }


int LSQ::numStores(ThreadID tid) { return thread.at(tid).numStores(); }


int


LSQ::numHtmStarts(ThreadID tid) const

{

    if (tid == InvalidThreadID)

        return 0;

    else

        return thread[tid].numHtmStarts();

}


int


LSQ::numHtmStops(ThreadID tid) const

{

    if (tid == InvalidThreadID)

        return 0;

    else

        return thread[tid].numHtmStops();

}


void


LSQ::resetHtmStartsStops(ThreadID tid)

{

    if (tid != InvalidThreadID)

        thread[tid].resetHtmStartsStops();

}


uint64_t


LSQ::getLatestHtmUid(ThreadID tid) const

{

    if (tid == InvalidThreadID)

        return 0;

    else

        return thread[tid].getLatestHtmUid();

}


void


LSQ::setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)

{

    if (tid != InvalidThreadID)

        thread[tid].setLastRetiredHtmUid(htmUid);

}


void


LSQ::recvReqRetry()

{

    iewStage->cacheUnblocked();

    cacheBlocked(false);


    for (ThreadID tid : *activeThreads) {

        thread[tid].recvRetry();

    }

}


void


LSQ::completeDataAccess(PacketPtr pkt)

{

    LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);

    thread[cpu->contextToThread(request->contextId())]

        .completeDataAccess(pkt);

}


bool


LSQ::recvTimingResp(PacketPtr pkt)

{

    if (pkt->isError())

        DPRINTF(LSQ, "Got error packet back for address: %#X\n",

                pkt->getAddr());


    LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);

    panic_if(!request, "Got packet back with unknown sender state\n");


    thread[cpu->contextToThread(request->contextId())].recvTimingResp(pkt);


    if (pkt->isInvalidate()) {

        // This response also contains an invalidate; e.g. this can be the case

        // if cmd is ReadRespWithInvalidate.

        //

        // The calling order between completeDataAccess and checkSnoop matters.

        // By calling checkSnoop after completeDataAccess, we ensure that the

        // fault set by checkSnoop is not lost. Calling writeback (more

        // specifically inst->completeAcc) in completeDataAccess overwrites

        // fault, and in case this instruction requires squashing (as

        // determined by checkSnoop), the ReExec fault set by checkSnoop would

        // be lost otherwise.


        DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",

                pkt->getAddr());


        for (ThreadID tid = 0; tid < numThreads; tid++) {

            thread[tid].checkSnoop(pkt);

        }

    }

    // Update the LSQRequest state (this may delete the request)

    request->packetReplied();


    if (waitingForStaleTranslation) {

        checkStaleTranslations();

    }


    return true;

}


void


LSQ::recvTimingSnoopReq(PacketPtr pkt)

{

    DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),

            pkt->cmdString());


    // must be a snoop

    if (pkt->isInvalidate()) {

        DPRINTF(LSQ, "received invalidation for addr:%#x\n",

                pkt->getAddr());

        for (ThreadID tid = 0; tid < numThreads; tid++) {

            thread[tid].checkSnoop(pkt);

        }

    } else if (pkt->req && pkt->req->isTlbiExtSync()) {

        DPRINTF(LSQ, "received TLBI Ext Sync\n");

        assert(!waitingForStaleTranslation);


        waitingForStaleTranslation = true;

        staleTranslationWaitTxnId = pkt->req->getExtraData();


        for (auto& unit : thread) {

            unit.startStaleTranslationFlush();

        }


        // In case no units have pending ops, just go ahead

        checkStaleTranslations();

    }

}


int


LSQ::getCount()

{

    unsigned total = 0;


    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        total += getCount(tid);

    }


    return total;

}


int


LSQ::numLoads()

{

    unsigned total = 0;


    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        total += numLoads(tid);

    }


    return total;

}


int


LSQ::numStores()

{

    unsigned total = 0;


    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        total += thread[tid].numStores();

    }


    return total;

}


unsigned


LSQ::numFreeLoadEntries()

{

    unsigned total = 0;


    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        total += thread[tid].numFreeLoadEntries();

    }


    return total;

}


unsigned


LSQ::numFreeStoreEntries()

{

    unsigned total = 0;


    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        total += thread[tid].numFreeStoreEntries();

    }


    return total;

}


unsigned


LSQ::numFreeLoadEntries(ThreadID tid)

{

        return thread[tid].numFreeLoadEntries();

}


unsigned


LSQ::numFreeStoreEntries(ThreadID tid)

{

        return thread[tid].numFreeStoreEntries();

}


bool


LSQ::isFull()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!(thread[tid].lqFull() || thread[tid].sqFull()))

            return false;

    }


    return true;

}


bool


LSQ::isFull(ThreadID tid)

{

    //@todo: Change to Calculate All Entries for

    //Dynamic Policy

    if (lsqPolicy == SMTQueuePolicy::Dynamic)

        return isFull();

    else

        return thread[tid].lqFull() || thread[tid].sqFull();

}


bool


LSQ::isEmpty() const

{

    return lqEmpty() && sqEmpty();

}


bool


LSQ::lqEmpty() const

{

    std::list<ThreadID>::const_iterator threads = activeThreads->begin();

    std::list<ThreadID>::const_iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!thread[tid].lqEmpty())

            return false;

    }


    return true;

}


bool


LSQ::sqEmpty() const

{

    std::list<ThreadID>::const_iterator threads = activeThreads->begin();

    std::list<ThreadID>::const_iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!thread[tid].sqEmpty())

            return false;

    }


    return true;

}


bool


LSQ::lqFull()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!thread[tid].lqFull())

            return false;

    }


    return true;

}


bool


LSQ::lqFull(ThreadID tid)

{

    //@todo: Change to Calculate All Entries for

    //Dynamic Policy

    if (lsqPolicy == SMTQueuePolicy::Dynamic)

        return lqFull();

    else

        return thread[tid].lqFull();

}


bool


LSQ::sqFull()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!sqFull(tid))

            return false;

    }


    return true;

}


bool


LSQ::sqFull(ThreadID tid)

{

     //@todo: Change to Calculate All Entries for

    //Dynamic Policy

    if (lsqPolicy == SMTQueuePolicy::Dynamic)

        return sqFull();

    else

        return thread[tid].sqFull();

}


bool


LSQ::isStalled()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (!thread[tid].isStalled())

            return false;

    }


    return true;

}


bool


LSQ::isStalled(ThreadID tid)

{

    if (lsqPolicy == SMTQueuePolicy::Dynamic)

        return isStalled();

    else

        return thread[tid].isStalled();

}


bool


LSQ::hasStoresToWB()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (hasStoresToWB(tid))

            return true;

    }


    return false;

}


bool


LSQ::hasStoresToWB(ThreadID tid)

{

    return thread.at(tid).hasStoresToWB();

}


int


LSQ::numStoresToWB(ThreadID tid)

{

    return thread.at(tid).numStoresToWB();

}


bool


LSQ::willWB()

{

    std::list<ThreadID>::iterator threads = activeThreads->begin();

    std::list<ThreadID>::iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        if (willWB(tid))

            return true;

    }


    return false;

}


bool


LSQ::willWB(ThreadID tid)

{

    return thread.at(tid).willWB();

}


void


LSQ::dumpInsts() const

{

    std::list<ThreadID>::const_iterator threads = activeThreads->begin();

    std::list<ThreadID>::const_iterator end = activeThreads->end();


    while (threads != end) {

        ThreadID tid = *threads++;


        thread[tid].dumpInsts();

    }

}


void


LSQ::dumpInsts(ThreadID tid) const

{

    thread.at(tid).dumpInsts();

}


Fault


LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,

        unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,

        AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)

{

    // This comming request can be either load, store or atomic.

    // Atomic request has a corresponding pointer to its atomic memory

    // operation

    [[maybe_unused]] bool isAtomic = !isLoad && amo_op;


    ThreadID tid = cpu->contextToThread(inst->contextId());

    auto cacheLineSize = cpu->cacheLineSize();

    bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);

    LSQRequest* request = nullptr;


    // Atomic requests that access data across cache line boundary are

    // currently not allowed since the cache does not guarantee corresponding

    // atomic memory operations to be executed atomically across a cache line.

    // For ISAs such as x86 that supports cross-cache-line atomic instructions,

    // the cache needs to be modified to perform atomic update to both cache

    // lines. For now, such cross-line update is not supported.

    assert(!isAtomic || (isAtomic && !needs_burst));


    const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);

    const bool tlbi_cmd = isLoad && (flags & Request::TLBI_CMD);


    if (inst->translationStarted()) {

        request = inst->savedRequest;

        assert(request);

    } else {

        if (htm_cmd || tlbi_cmd) {

            assert(addr == 0x0lu);

            assert(size == 8);

            request = new UnsquashableDirectRequest(&thread[tid], inst, flags);

        } else if (needs_burst) {

            request = new SplitDataRequest(&thread[tid], inst, isLoad, addr,

                    size, flags, data, res);

        } else {

            request = new SingleDataRequest(&thread[tid], inst, isLoad, addr,

                    size, flags, data, res, std::move(amo_op));

        }

        assert(request);

        request->_byteEnable = byte_enable;

        inst->setRequest();

        request->taskId(cpu->taskId());


        // There might be fault from a previous execution attempt if this is

        // a strictly ordered load

        inst->getFault() = NoFault;


        request->initiateTranslation();

    }


    /* This is the place were instructions get the effAddr. */

    if (request->isTranslationComplete()) {

        if (request->isMemAccessRequired()) {

            inst->effAddr = request->getVaddr();

            inst->effSize = size;

            inst->effAddrValid(true);


            if (cpu->checker) {

                inst->reqToVerify = std::make_shared<Request>(*request->req());

            }

            Fault fault;

            if (isLoad)

                fault = read(request, inst->lqIdx);

            else

                fault = write(request, data, inst->sqIdx);

            // inst->getFault() may have the first-fault of a

            // multi-access split request at this point.

            // Overwrite that only if we got another type of fault

            // (e.g. re-exec).

            if (fault != NoFault)

                inst->getFault() = fault;

        } else if (isLoad) {

            inst->setMemAccPredicate(false);

            // Commit will have to clean up whatever happened.  Set this

            // instruction as executed.

            inst->setExecuted();

        }

    }


    if (inst->traceData)

        inst->traceData->setMem(addr, size, flags);


    return inst->getFault();

}


void


LSQ::SingleDataRequest::finish(const Fault &fault, const RequestPtr &request,

        gem5::ThreadContext* tc, BaseMMU::Mode mode)

{

    _fault.push_back(fault);

    numInTranslationFragments = 0;

    numTranslatedFragments = 1;

    /* If the instruction has been squahsed, let the request know

     * as it may have to self-destruct. */

    if (_inst->isSquashed()) {

        squashTranslation();

    } else {

        _inst->strictlyOrdered(request->isStrictlyOrdered());


        flags.set(Flag::TranslationFinished);

        if (fault == NoFault) {

            _inst->physEffAddr = request->getPaddr();

            _inst->memReqFlags = request->getFlags();

            if (request->isCondSwap()) {

                assert(_res);

                request->setExtraData(*_res);

            }

            setState(State::Request);

        } else {

            setState(State::Fault);

        }


        LSQRequest::_inst->fault = fault;

        LSQRequest::_inst->translationCompleted(true);

    }

}


void


LSQ::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,

        gem5::ThreadContext* tc, BaseMMU::Mode mode)

{

    int i;

    for (i = 0; i < _reqs.size() && _reqs[i] != req; i++);

    assert(i < _reqs.size());

    _fault[i] = fault;


    numInTranslationFragments--;

    numTranslatedFragments++;


    if (fault == NoFault)

        _mainReq->setFlags(req->getFlags());


    if (numTranslatedFragments == _reqs.size()) {

        if (_inst->isSquashed()) {

            squashTranslation();

        } else {

            _inst->strictlyOrdered(_mainReq->isStrictlyOrdered());

            flags.set(Flag::TranslationFinished);

            _inst->translationCompleted(true);


            for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);

            if (i > 0) {

                _inst->physEffAddr = LSQRequest::req()->getPaddr();

                _inst->memReqFlags = _mainReq->getFlags();

                if (_mainReq->isCondSwap()) {

                    assert (i == _fault.size());

                    assert(_res);

                    _mainReq->setExtraData(*_res);

                }

                if (i == _fault.size()) {

                    _inst->fault = NoFault;

                    setState(State::Request);

                } else {

                  _inst->fault = _fault[i];

                  setState(State::PartialFault);

                }

            } else {

                _inst->fault = _fault[0];

                setState(State::Fault);

            }

        }


    }

}


void


LSQ::SingleDataRequest::initiateTranslation()

{

    assert(_reqs.size() == 0);


    addReq(_addr, _size, _byteEnable);


    if (_reqs.size() > 0) {

        _reqs.back()->setReqInstSeqNum(_inst->seqNum);

        _reqs.back()->taskId(_taskId);

        _inst->translationStarted(true);

        setState(State::Translation);

        flags.set(Flag::TranslationStarted);


        _inst->savedRequest = this;

        sendFragmentToTranslation(0);

    } else {

        _inst->setMemAccPredicate(false);

    }

}


PacketPtr


LSQ::SplitDataRequest::mainPacket()

{

    return _mainPacket;

}


RequestPtr


LSQ::SplitDataRequest::mainReq()

{

    return _mainReq;

}


void


LSQ::SplitDataRequest::initiateTranslation()

{

    auto cacheLineSize = _port.cacheLineSize();

    Addr base_addr = _addr;

    Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);

    Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);

    uint32_t size_so_far = 0;


    _mainReq = std::make_shared<Request>(base_addr,

                _size, _flags, _inst->requestorId(),

                _inst->pcState().instAddr(), _inst->contextId());

    _mainReq->setByteEnable(_byteEnable);


    // Paddr is not used in _mainReq. However, we will accumulate the flags

    // from the sub requests into _mainReq by calling setFlags() in finish().

    // setFlags() assumes that paddr is set so flip the paddr valid bit here to

    // avoid a potential assert in setFlags() when we call it from  finish().

    _mainReq->setPaddr(0);


    /* Get the pre-fix, possibly unaligned. */

    auto it_start = _byteEnable.begin();

    auto it_end = _byteEnable.begin() + (next_addr - base_addr);

    addReq(base_addr, next_addr - base_addr,

                     std::vector<bool>(it_start, it_end));

    size_so_far = next_addr - base_addr;


    /* We are block aligned now, reading whole blocks. */

    base_addr = next_addr;

    while (base_addr != final_addr) {

        auto it_start = _byteEnable.begin() + size_so_far;

        auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;

        addReq(base_addr, cacheLineSize,

                         std::vector<bool>(it_start, it_end));

        size_so_far += cacheLineSize;

        base_addr += cacheLineSize;

    }


    /* Deal with the tail. */

    if (size_so_far < _size) {

        auto it_start = _byteEnable.begin() + size_so_far;

        auto it_end = _byteEnable.end();

        addReq(base_addr, _size - size_so_far,

                         std::vector<bool>(it_start, it_end));

    }


    if (_reqs.size() > 0) {

        /* Setup the requests and send them to translation. */

        for (auto& r: _reqs) {

            r->setReqInstSeqNum(_inst->seqNum);

            r->taskId(_taskId);

        }


        _inst->translationStarted(true);

        setState(State::Translation);

        flags.set(Flag::TranslationStarted);

        _inst->savedRequest = this;

        numInTranslationFragments = 0;

        numTranslatedFragments = 0;

        _fault.resize(_reqs.size());


        for (uint32_t i = 0; i < _reqs.size(); i++) {

            sendFragmentToTranslation(i);

        }

    } else {

        _inst->setMemAccPredicate(false);

    }

}


LSQ::LSQRequest::LSQRequest(

        LSQUnit *port, const DynInstPtr& inst, bool isLoad) :

    _state(State::NotIssued),

    _port(*port), _inst(inst), _data(nullptr),

    _res(nullptr), _addr(0), _size(0), _flags(0),

    _numOutstandingPackets(0), _amo_op(nullptr)

{

    flags.set(Flag::IsLoad, isLoad);

    flags.set(Flag::WriteBackToRegister,

              _inst->isStoreConditional() || _inst->isAtomic() ||

              _inst->isLoad());

    flags.set(Flag::IsAtomic, _inst->isAtomic());

    install();

}


LSQ::LSQRequest::LSQRequest(

        LSQUnit *port, const DynInstPtr& inst, bool isLoad,

        const Addr& addr, const uint32_t& size, const Request::Flags& flags_,

        PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op,

        bool stale_translation)

    : _state(State::NotIssued),

    numTranslatedFragments(0),

    numInTranslationFragments(0),

    _port(*port), _inst(inst), _data(data),

    _res(res), _addr(addr), _size(size),

    _flags(flags_),

    _numOutstandingPackets(0),

    _amo_op(std::move(amo_op)),

    _hasStaleTranslation(stale_translation)

{

    flags.set(Flag::IsLoad, isLoad);

    flags.set(Flag::WriteBackToRegister,

              _inst->isStoreConditional() || _inst->isAtomic() ||

              _inst->isLoad());

    flags.set(Flag::IsAtomic, _inst->isAtomic());

    install();

}


void


LSQ::LSQRequest::install()

{

    if (isLoad()) {

        _port.loadQueue[_inst->lqIdx].setRequest(this);

    } else {

        // Store, StoreConditional, and Atomic requests are pushed

        // to this storeQueue

        _port.storeQueue[_inst->sqIdx].setRequest(this);

    }

}


bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); }


void


LSQ::LSQRequest::addReq(Addr addr, unsigned size,

           const std::vector<bool>& byte_enable)

{

    if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {

        auto req = std::make_shared<Request>(

                addr, size, _flags, _inst->requestorId(),

                _inst->pcState().instAddr(), _inst->contextId(),

                std::move(_amo_op));

        req->setByteEnable(byte_enable);


        /* If the request is marked as NO_ACCESS, setup a local access */

        if (_flags.isSet(Request::NO_ACCESS)) {

            req->setLocalAccessor(

                [this, req](gem5::ThreadContext *tc, PacketPtr pkt) -> Cycles

                {

                    if ((req->isHTMStart() || req->isHTMCommit())) {

                        auto& inst = this->instruction();

                        assert(inst->inHtmTransactionalState());

                        pkt->setHtmTransactional(

                            inst->getHtmTransactionUid());

                    }

                    return Cycles(1);

                }

            );

        }


        _reqs.push_back(req);

    }

}


LSQ::LSQRequest::~LSQRequest()

{

    assert(!isAnyOutstandingRequest());

    _inst->savedRequest = nullptr;


    for (auto r: _packets)

        delete r;

};


ContextID


LSQ::LSQRequest::contextId() const

{

    return _inst->contextId();

}


void


LSQ::LSQRequest::sendFragmentToTranslation(int i)

{

    numInTranslationFragments++;

    _port.getMMUPtr()->translateTiming(req(i), _inst->thread->getTC(),

            this, isLoad() ? BaseMMU::Read : BaseMMU::Write);

}


void


LSQ::SingleDataRequest::markAsStaleTranslation()

{

    // If this element has been translated and is currently being requested,

    // then it may be stale

    if ((!flags.isSet(Flag::Complete)) &&

        (!flags.isSet(Flag::Discarded)) &&

        (flags.isSet(Flag::TranslationStarted))) {

        _hasStaleTranslation = true;

    }


    DPRINTF(LSQ, "SingleDataRequest %d 0x%08x isBlocking:%d\n",

        (int)_state, (uint32_t)flags, _hasStaleTranslation);

}


void


LSQ::SplitDataRequest::markAsStaleTranslation()

{

    // If this element has been translated and is currently being requested,

    // then it may be stale

    if ((!flags.isSet(Flag::Complete)) &&

        (!flags.isSet(Flag::Discarded)) &&

        (flags.isSet(Flag::TranslationStarted))) {

        _hasStaleTranslation = true;

    }


    DPRINTF(LSQ, "SplitDataRequest %d 0x%08x isBlocking:%d\n",

        (int)_state, (uint32_t)flags, _hasStaleTranslation);

}


bool


LSQ::SingleDataRequest::recvTimingResp(PacketPtr pkt)

{

    assert(_numOutstandingPackets == 1);

    flags.set(Flag::Complete);

    assert(pkt == _packets.front());

    _port.completeDataAccess(pkt);

    _hasStaleTranslation = false;

    return true;

}


bool


LSQ::SplitDataRequest::recvTimingResp(PacketPtr pkt)

{

    uint32_t pktIdx = 0;

    while (pktIdx < _packets.size() && pkt != _packets[pktIdx])

        pktIdx++;

    assert(pktIdx < _packets.size());

    numReceivedPackets++;

    if (numReceivedPackets == _packets.size()) {

        flags.set(Flag::Complete);

        /* Assemble packets. */

        PacketPtr resp = isLoad()

            ? Packet::createRead(_mainReq)

            : Packet::createWrite(_mainReq);

        if (isLoad())

            resp->dataStatic(_inst->memData);

        else

            resp->dataStatic(_data);

        resp->senderState = this;

        _port.completeDataAccess(resp);

        delete resp;

    }

    _hasStaleTranslation = false;

    return true;

}


void


LSQ::SingleDataRequest::buildPackets()

{

    /* Retries do not create new packets. */

    if (_packets.size() == 0) {

        _packets.push_back(

                isLoad()

                    ?  Packet::createRead(req())

                    :  Packet::createWrite(req()));

        _packets.back()->dataStatic(_inst->memData);

        _packets.back()->senderState = this;


        // hardware transactional memory

        // If request originates in a transaction (not necessarily a HtmCmd),

        // then the packet should be marked as such.

        if (_inst->inHtmTransactionalState()) {

            _packets.back()->setHtmTransactional(

                _inst->getHtmTransactionUid());


            DPRINTF(HtmCpu,

              "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",

              isLoad() ? "LD" : "ST",

              _inst->pcState().instAddr(),

              _packets.back()->req->hasVaddr() ?

                  _packets.back()->req->getVaddr() : 0lu,

              _packets.back()->getAddr(),

              _inst->getHtmTransactionUid());

        }

    }

    assert(_packets.size() == 1);

}


void


LSQ::SplitDataRequest::buildPackets()

{

    /* Extra data?? */

    Addr base_address = _addr;


    if (_packets.size() == 0) {

        /* New stuff */

        if (isLoad()) {

            _mainPacket = Packet::createRead(_mainReq);

            _mainPacket->dataStatic(_inst->memData);


            // hardware transactional memory

            // If request originates in a transaction,

            // packet should be marked as such

            if (_inst->inHtmTransactionalState()) {

                _mainPacket->setHtmTransactional(

                    _inst->getHtmTransactionUid());

                DPRINTF(HtmCpu,

                  "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",

                  _inst->pcState().instAddr(),

                  _mainPacket->req->hasVaddr() ?

                      _mainPacket->req->getVaddr() : 0lu,

                  _mainPacket->getAddr(),

                  _inst->getHtmTransactionUid());

            }

        }

        for (int i = 0; i < _reqs.size() && _fault[i] == NoFault; i++) {

            RequestPtr req = _reqs[i];

            PacketPtr pkt = isLoad() ? Packet::createRead(req)

                                     : Packet::createWrite(req);

            ptrdiff_t offset = req->getVaddr() - base_address;

            if (isLoad()) {

                pkt->dataStatic(_inst->memData + offset);

            } else {

                uint8_t* req_data = new uint8_t[req->getSize()];

                std::memcpy(req_data,

                        _inst->memData + offset,

                        req->getSize());

                pkt->dataDynamic(req_data);

            }

            pkt->senderState = this;

            _packets.push_back(pkt);


            // hardware transactional memory

            // If request originates in a transaction,

            // packet should be marked as such

            if (_inst->inHtmTransactionalState()) {

                _packets.back()->setHtmTransactional(

                    _inst->getHtmTransactionUid());

                DPRINTF(HtmCpu,

                  "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",

                  isLoad() ? "LD" : "ST",

                  i+1,

                  _inst->pcState().instAddr(),

                  _packets.back()->req->hasVaddr() ?

                      _packets.back()->req->getVaddr() : 0lu,

                  _packets.back()->getAddr(),

                  _inst->getHtmTransactionUid());

            }

        }

    }

    assert(_packets.size() > 0);

}


void


LSQ::SingleDataRequest::sendPacketToCache()

{

    assert(_numOutstandingPackets == 0);

    if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))

        _numOutstandingPackets = 1;

}


void


LSQ::SplitDataRequest::sendPacketToCache()

{

    /* Try to send the packets. */

    while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&

            lsqUnit()->trySendPacket(isLoad(),

                _packets.at(numReceivedPackets + _numOutstandingPackets))) {

        _numOutstandingPackets++;

    }

}


Cycles


LSQ::SingleDataRequest::handleLocalAccess(

        gem5::ThreadContext *thread, PacketPtr pkt)

{

    return pkt->req->localAccessor(thread, pkt);

}


Cycles


LSQ::SplitDataRequest::handleLocalAccess(

        gem5::ThreadContext *thread, PacketPtr mainPkt)

{

    Cycles delay(0);

    unsigned offset = 0;


    for (auto r: _reqs) {

        PacketPtr pkt =

            new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);

        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);

        Cycles d = r->localAccessor(thread, pkt);

        if (d > delay)

            delay = d;

        offset += r->getSize();

        delete pkt;

    }

    return delay;

}


bool


LSQ::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)

{

    return ( (LSQRequest::_reqs[0]->getPaddr() & blockMask) == blockAddr);

}


bool


LSQ::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)

{

    bool is_hit = false;

    for (auto &r: _reqs) {

        if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {

            is_hit = true;

            break;

        }

    }

    return is_hit;

}


bool


LSQ::DcachePort::recvTimingResp(PacketPtr pkt)

{

    return lsq->recvTimingResp(pkt);

}


void


LSQ::DcachePort::recvTimingSnoopReq(PacketPtr pkt)

{

    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {

        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {

            cpu->wakeup(tid);

        }

    }

    lsq->recvTimingSnoopReq(pkt);

}


void


LSQ::DcachePort::recvReqRetry()

{

    lsq->recvReqRetry();

}


LSQ::UnsquashableDirectRequest::UnsquashableDirectRequest(

    LSQUnit* port,

    const DynInstPtr& inst,

    const Request::Flags& flags_) :

    SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,

        nullptr, nullptr, nullptr)

{

}


void


LSQ::UnsquashableDirectRequest::initiateTranslation()

{

    // Special commands are implemented as loads to avoid significant

    // changes to the cpu and memory interfaces

    // The virtual and physical address uses a dummy value of 0x00

    // Address translation does not really occur thus the code below


    assert(_reqs.size() == 0);


    addReq(_addr, _size, _byteEnable);


    if (_reqs.size() > 0) {

        _reqs.back()->setReqInstSeqNum(_inst->seqNum);

        _reqs.back()->taskId(_taskId);

        _reqs.back()->setPaddr(_addr);

        _reqs.back()->setInstCount(_inst->getCpuPtr()->totalInsts());


        _inst->strictlyOrdered(_reqs.back()->isStrictlyOrdered());

        _inst->fault = NoFault;

        _inst->physEffAddr = _reqs.back()->getPaddr();

        _inst->memReqFlags = _reqs.back()->getFlags();

        _inst->savedRequest = this;


        flags.set(Flag::TranslationStarted);

        flags.set(Flag::TranslationFinished);


        _inst->translationStarted(true);

        _inst->translationCompleted(true);


        setState(State::Request);

    } else {

        panic("unexpected behaviour in initiateTranslation()");

    }

}


void


LSQ::UnsquashableDirectRequest::markAsStaleTranslation()

{

    // HTM/TLBI operations do not translate,

    // so cannot have stale translations

    _hasStaleTranslation = false;

}


void


LSQ::UnsquashableDirectRequest::finish(const Fault &fault,

        const RequestPtr &req, gem5::ThreadContext* tc,

        BaseMMU::Mode mode)

{

    panic("unexpected behaviour - finish()");

}


void


LSQ::checkStaleTranslations()

{

    assert(waitingForStaleTranslation);


    DPRINTF(LSQ, "Checking pending TLBI sync\n");

    // Check if all thread queues are complete

    for (const auto& unit : thread) {

        if (unit.checkStaleTranslations())

            return;

    }

    DPRINTF(LSQ, "No threads have blocking TLBI sync\n");


    // All thread queues have committed their sync operations

    // => send a RubyRequest to the sequencer

    auto req = Request::createMemManagement(

        Request::TLBI_EXT_SYNC_COMP,

        cpu->dataRequestorId());

    req->setExtraData(staleTranslationWaitTxnId);

    PacketPtr pkt = Packet::createRead(req);


    // TODO - reserve some credit for these responses?

    if (!dcachePort.sendTimingReq(pkt)) {

        panic("Couldn't send TLBI_EXT_SYNC_COMP message");

    }


    waitingForStaleTranslation = false;

    staleTranslationWaitTxnId = 0;

}


Fault


LSQ::read(LSQRequest* request, ssize_t load_idx)

{

    assert(request->req()->contextId() == request->contextId());

    ThreadID tid = cpu->contextToThread(request->req()->contextId());


    return thread.at(tid).read(request, load_idx);

}


Fault


LSQ::write(LSQRequest* request, uint8_t *data, ssize_t store_idx)

{

    ThreadID tid = cpu->contextToThread(request->req()->contextId());


    return thread.at(tid).write(request, data, store_idx);

}


} // namespace o3

} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:210

data
const char data[]
Definition circlebuf.test.cc:48

gem5::BaseCPU::dataRequestorId
RequestorID dataRequestorId() const
Reads this CPU's unique data requestor ID.
Definition base.hh:193

gem5::BaseCPU::cacheLineSize
Addr cacheLineSize() const
Get the cache line size of the system.
Definition base.hh:397

gem5::BaseCPU::getCpuAddrMonitor
AddressMonitor * getCpuAddrMonitor(ThreadID tid)
Definition base.hh:656

gem5::BaseCPU::taskId
uint32_t taskId() const
Get cpu task id.
Definition base.hh:211

gem5::BaseCPU::numThreads
ThreadID numThreads
Number of threads we're actually simulating (<= SMT_MAX_THREADS).
Definition base.hh:390

gem5::BaseCPU::contextToThread
ThreadID contextToThread(ContextID cid)
Convert ContextID to threadID.
Definition base.hh:299

gem5::BaseMMU::Mode
Mode
Definition mmu.hh:56

gem5::BaseMMU::Write
@ Write
Definition mmu.hh:56

gem5::BaseMMU::Read
@ Read
Definition mmu.hh:56

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79

gem5::Flags< FlagsType >

gem5::MemCmd::WriteReq
@ WriteReq
Definition packet.hh:90

gem5::MemCmd::ReadReq
@ ReadReq
Definition packet.hh:87

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295

gem5::Packet::getAddr
Addr getAddr() const
Definition packet.hh:807

gem5::Packet::isError
bool isError() const
Definition packet.hh:622

gem5::Packet::cmdString
const std::string & cmdString() const
Return the string name of the cmd field (for debugging and tracing).
Definition packet.hh:588

gem5::Packet::createWrite
static PacketPtr createWrite(const RequestPtr &req)
Definition packet.hh:1044

gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225

gem5::Packet::createRead
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition packet.hh:1038

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::dataDynamic
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition packet.hh:1213

gem5::Packet::isInvalidate
bool isInvalidate() const
Definition packet.hh:609

gem5::RefCountingPtr< DynInst >

gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136

gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603

gem5::Request::TLBI_EXT_SYNC_COMP
@ TLBI_EXT_SYNC_COMP
The Request tells the interconnect that a remote TLB Sync request has completed.
Definition request.hh:252

gem5::Request::NO_ACCESS
@ NO_ACCESS
The request should not cause a memory access.
Definition request.hh:146

gem5::Request::createMemManagement
static RequestPtr createMemManagement(Flags flags, RequestorID id)
Factory method for creating memory management requests, with unspecified addr and size.
Definition request.hh:531

gem5::Request::TLBI_CMD
static const FlagsType TLBI_CMD
Definition request.hh:266

gem5::Request::HTM_CMD
static const FlagsType HTM_CMD
Definition request.hh:263

gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition guest_abi.test.cc:41

gem5::o3::CPU
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94

gem5::o3::CPU::checker
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition cpu.hh:527

gem5::o3::CPU::wakeup
virtual void wakeup(ThreadID tid) override
Definition cpu.cc:1337

gem5::o3::Fetch
Fetch class handles both single threaded and SMT fetch.
Definition fetch.hh:79

gem5::o3::IEW
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88

gem5::o3::IEW::name
std::string name() const
Returns the name of the IEW stage.
Definition iew.cc:118

gem5::o3::IEW::cacheUnblocked
void cacheUnblocked()
Notifies that the cache has become unblocked.
Definition iew.cc:523

gem5::o3::LSQUnit
Class that implements the actual LQ and SQ for each specific thread.
Definition lsq_unit.hh:89

gem5::o3::LSQ::DcachePort::recvTimingSnoopReq
virtual void recvTimingSnoopReq(PacketPtr pkt)
Receive a timing snoop request from the peer.
Definition lsq.cc:1418

gem5::o3::LSQ::DcachePort::DcachePort
DcachePort(LSQ *_lsq, CPU *_cpu)
Default constructor.
Definition lsq.cc:67

gem5::o3::LSQ::DcachePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Timing version of receive.
Definition lsq.cc:1412

gem5::o3::LSQ::DcachePort::recvReqRetry
virtual void recvReqRetry()
Handles doing a retry of the previous send.
Definition lsq.cc:1429

gem5::o3::LSQ::LSQRequest
Memory operation metadata.
Definition lsq.hh:190

gem5::o3::LSQ::LSQRequest::IsAtomic
@ IsAtomic
True if this is an atomic request.
Definition lsq.hh:223

gem5::o3::LSQ::LSQRequest::IsLoad
@ IsLoad
Definition lsq.hh:197

gem5::o3::LSQ::LSQRequest::TranslationFinished
@ TranslationFinished
True if there are un-replied outbound translations.
Definition lsq.hh:208

gem5::o3::LSQ::LSQRequest::WriteBackToRegister
@ WriteBackToRegister
True if this request needs to writeBack to register.
Definition lsq.hh:202

gem5::o3::LSQ::LSQRequest::_byteEnable
std::vector< bool > _byteEnable
Definition lsq.hh:257

gem5::o3::LSQ::LSQRequest::LSQRequest
LSQRequest(LSQUnit *port, const DynInstPtr &inst, bool isLoad)
Definition lsq.cc:1052

gem5::o3::LSQ::LSQRequest::~LSQRequest
virtual ~LSQRequest()
Destructor.
Definition lsq.cc:1135

gem5::o3::LSQ::LSQRequest::isLoad
bool isLoad() const
Definition lsq.hh:272

gem5::o3::LSQ::LSQRequest::install
void install()
Install the request in the LQ/SQ.
Definition lsq.cc:1091

gem5::o3::LSQ::LSQRequest::squashTranslation
void squashTranslation()
Definition lsq.hh:550

gem5::o3::LSQ::LSQRequest::contextId
ContextID contextId() const
Definition lsq.cc:1145

gem5::o3::LSQ::LSQRequest::_res
uint64_t * _res
Definition lsq.hh:253

gem5::o3::LSQ::LSQRequest::State
State
Definition lsq.hh:228

gem5::o3::LSQ::LSQRequest::State::Request
@ Request

gem5::o3::LSQ::LSQRequest::State::Fault
@ Fault

gem5::o3::LSQ::LSQRequest::taskId
void taskId(const uint32_t &v)
Definition lsq.hh:353

gem5::o3::LSQ::LSQRequest::initiateTranslation
virtual void initiateTranslation()=0

gem5::o3::LSQ::LSQRequest::flags
FlagsType flags
Definition lsq.hh:225

gem5::o3::LSQ::LSQRequest::setState
void setState(const State &newState)
Definition lsq.hh:236

gem5::o3::LSQ::LSQRequest::addReq
void addReq(Addr addr, unsigned size, const std::vector< bool > &byte_enable)
Helper function used to add a (sub)request, given its address addr, size size and byte-enable mask by...
Definition lsq.cc:1105

gem5::o3::LSQ::LSQRequest::sendFragmentToTranslation
void sendFragmentToTranslation(int i)
Definition lsq.cc:1151

gem5::o3::LSQ::LSQRequest::_inst
const DynInstPtr _inst
Definition lsq.hh:247

gem5::o3::LSQ::LSQRequest::numTranslatedFragments
uint32_t numTranslatedFragments
Definition lsq.hh:238

gem5::o3::LSQ::LSQRequest::_fault
std::vector< Fault > _fault
Definition lsq.hh:252

gem5::o3::LSQ::LSQRequest::isMemAccessRequired
bool isMemAccessRequired()
Definition lsq.hh:493

gem5::o3::LSQ::LSQRequest::numInTranslationFragments
uint32_t numInTranslationFragments
Definition lsq.hh:239

gem5::o3::LSQ::LSQRequest::squashed
bool squashed() const override
This function is used by the page table walker to determine if it should translate the a pending requ...
Definition lsq.cc:1102

gem5::o3::LSQ::LSQRequest::req
RequestPtr req(int idx=0)
Definition lsq.hh:362

gem5::o3::LSQ::LSQRequest::_reqs
std::vector< RequestPtr > _reqs
Definition lsq.hh:251

gem5::o3::LSQ::LSQRequest::packetReplied
void packetReplied()
Definition lsq.hh:524

gem5::o3::LSQ::LSQRequest::isTranslationComplete
bool isTranslationComplete()
Definition lsq.hh:466

gem5::o3::LSQ::LSQRequest::getVaddr
Addr getVaddr(int idx=0) const
Definition lsq.hh:365

gem5::o3::LSQ::SingleDataRequest
Definition lsq.hh:570

gem5::o3::LSQ::SingleDataRequest::handleLocalAccess
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1342

gem5::o3::LSQ::SingleDataRequest::finish
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:871

gem5::o3::LSQ::SingleDataRequest::initiateTranslation
virtual void initiateTranslation()
Definition lsq.cc:951

gem5::o3::LSQ::SingleDataRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1189

gem5::o3::LSQ::SingleDataRequest::buildPackets
virtual void buildPackets()
Definition lsq.cc:1226

gem5::o3::LSQ::SingleDataRequest::markAsStaleTranslation
virtual void markAsStaleTranslation()
Definition lsq.cc:1159

gem5::o3::LSQ::SingleDataRequest::sendPacketToCache
virtual void sendPacketToCache()
Definition lsq.cc:1323

gem5::o3::LSQ::SingleDataRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Test if the request accesses a particular cache line.
Definition lsq.cc:1369

gem5::o3::LSQ::SplitDataRequest
Definition lsq.hh:615

gem5::o3::LSQ::SplitDataRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Definition lsq.cc:1200

gem5::o3::LSQ::SplitDataRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)
Caches may probe into the load-store queue to enforce memory ordering guarantees.
Definition lsq.cc:1390

gem5::o3::LSQ::SplitDataRequest::initiateTranslation
virtual void initiateTranslation()
Definition lsq.cc:984

gem5::o3::LSQ::SplitDataRequest::markAsStaleTranslation
virtual void markAsStaleTranslation()
Definition lsq.cc:1174

gem5::o3::LSQ::SplitDataRequest::finish
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:903

gem5::o3::LSQ::SplitDataRequest::mainPacket
virtual PacketPtr mainPacket()
Definition lsq.cc:972

gem5::o3::LSQ::SplitDataRequest::handleLocalAccess
virtual Cycles handleLocalAccess(gem5::ThreadContext *thread, PacketPtr pkt)
Memory mapped IPR accesses.
Definition lsq.cc:1349

gem5::o3::LSQ::SplitDataRequest::sendPacketToCache
virtual void sendPacketToCache()
Definition lsq.cc:1331

gem5::o3::LSQ::SplitDataRequest::mainReq
virtual RequestPtr mainReq()
Definition lsq.cc:978

gem5::o3::LSQ::SplitDataRequest::buildPackets
virtual void buildPackets()
Definition lsq.cc:1258

gem5::o3::LSQ::UnsquashableDirectRequest
Definition lsq.hh:598

gem5::o3::LSQ::UnsquashableDirectRequest::initiateTranslation
virtual void initiateTranslation()
Definition lsq.cc:1444

gem5::o3::LSQ::UnsquashableDirectRequest::markAsStaleTranslation
virtual void markAsStaleTranslation()
Definition lsq.cc:1480

gem5::o3::LSQ::UnsquashableDirectRequest::finish
virtual void finish(const Fault &fault, const RequestPtr &req, gem5::ThreadContext *tc, BaseMMU::Mode mode)
Definition lsq.cc:1488

gem5::o3::LSQ::UnsquashableDirectRequest::UnsquashableDirectRequest
UnsquashableDirectRequest(LSQUnit *port, const DynInstPtr &inst, const Request::Flags &flags_)
Definition lsq.cc:1434

gem5::o3::LSQ
Definition lsq.hh:76

gem5::o3::LSQ::SQEntries
unsigned SQEntries
Total Size of SQ Entries.
Definition lsq.hh:943

gem5::o3::LSQ::isDrained
bool isDrained() const
Has the LSQ drained?
Definition lsq.cc:146

gem5::o3::LSQ::cacheLoadPorts
int cacheLoadPorts
The number of cache ports available each cycle (loads only).
Definition lsq.hh:902

gem5::o3::LSQ::usedStorePorts
int usedStorePorts
The number of used cache ports in this cycle by stores.
Definition lsq.hh:900

gem5::o3::LSQ::numHtmStarts
int numHtmStarts(ThreadID tid) const
Definition lsq.cc:344

gem5::o3::LSQ::name
std::string name() const
Returns the name of the LSQ.
Definition lsq.cc:124

gem5::o3::LSQ::commitStores
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
Commits stores up until the given sequence number for a specific thread.
Definition lsq.cc:259

gem5::o3::LSQ::staleTranslationWaitTxnId
Addr staleTranslationWaitTxnId
The ID if the transaction that made translations stale.
Definition lsq.hh:909

gem5::o3::LSQ::recvTimingResp
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq.cc:403

gem5::o3::LSQ::checkStaleTranslations
void checkStaleTranslations()
Checks if queues have any marked operations left, and sends the appropriate Sync Completion message i...
Definition lsq.cc:1496

gem5::o3::LSQ::getLoadHead
int getLoadHead(ThreadID tid)
Returns the head index of the load queue for a specific thread.
Definition lsq.cc:314

gem5::o3::LSQ::squash
void squash(const InstSeqNum &squashed_num, ThreadID tid)
Squash instructions from a thread until the specified sequence number.
Definition lsq.cc:283

gem5::o3::LSQ::sqEmpty
bool sqEmpty() const
Returns if all of the SQs are empty.
Definition lsq.cc:619

gem5::o3::LSQ::completeDataAccess
void completeDataAccess(PacketPtr pkt)
Definition lsq.cc:395

gem5::o3::LSQ::pushRequest
Fault pushRequest(const DynInstPtr &inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector< bool > &byte_enable)
Definition lsq.cc:783

gem5::o3::LSQ::numFreeLoadEntries
unsigned numFreeLoadEntries()
Returns the number of free load entries.
Definition lsq.cc:524

gem5::o3::LSQ::numThreads
ThreadID numThreads
Number of Threads.
Definition lsq.hh:958

gem5::o3::LSQ::iewStage
IEW * iewStage
The IEW stage pointer.
Definition lsq.hh:881

gem5::o3::LSQ::getLoadHeadSeqNum
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the load queue.
Definition lsq.cc:320

gem5::o3::LSQ::activeThreads
std::list< ThreadID > * activeThreads
List of Active Threads in System.
Definition lsq.hh:938

gem5::o3::LSQ::dcachePort
DcachePort dcachePort
Data port.
Definition lsq.hh:952

gem5::o3::LSQ::takeOverFrom
void takeOverFrom()
Takes over execution from another CPU's thread.
Definition lsq.cc:164

gem5::o3::LSQ::getMemDepViolator
DynInstPtr getMemDepViolator(ThreadID tid)
Gets the instruction that caused the memory ordering violation.
Definition lsq.cc:308

gem5::o3::LSQ::maxLSQAllocation
static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries, uint32_t numThreads, uint32_t SMTThreshold)
Auxiliary function to calculate per-thread max LSQ allocation limit.
Definition lsq.hh:920

gem5::o3::LSQ::setActiveThreads
void setActiveThreads(std::list< ThreadID > *at_ptr)
Sets the pointer to the list of active threads.
Definition lsq.cc:130

gem5::o3::LSQ::cacheBlocked
bool cacheBlocked() const
Is D-cache blocked?
Definition lsq.cc:186

gem5::o3::LSQ::numLoads
int numLoads()
Returns the total number of loads in the load queue.
Definition lsq.cc:490

gem5::o3::LSQ::setLastRetiredHtmUid
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
Definition lsq.cc:377

gem5::o3::LSQ::dumpInsts
void dumpInsts() const
Debugging function to print out all instructions.
Definition lsq.cc:764

gem5::o3::LSQ::usedLoadPorts
int usedLoadPorts
The number of used cache ports in this cycle by loads.
Definition lsq.hh:904

gem5::o3::LSQ::maxLQEntries
unsigned maxLQEntries
Max LQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:946

gem5::o3::LSQ::isFull
bool isFull()
Returns if the LSQ is full (either LQ or SQ is full).
Definition lsq.cc:570

gem5::o3::LSQ::insertStore
void insertStore(const DynInstPtr &store_inst)
Inserts a store into the LSQ.
Definition lsq.cc:229

gem5::o3::LSQ::recvReqRetry
void recvReqRetry()
Retry the previous send that failed.
Definition lsq.cc:384

gem5::o3::LSQ::commitLoads
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
Commits loads up until the given sequence number for a specific thread.
Definition lsq.cc:253

gem5::o3::LSQ::write
Fault write(LSQRequest *request, uint8_t *data, ssize_t store_idx)
Executes a store operation, using the store specified at the store index.
Definition lsq.cc:1535

gem5::o3::LSQ::getLatestHtmUid
uint64_t getLatestHtmUid(ThreadID tid) const
Definition lsq.cc:368

gem5::o3::LSQ::willWB
bool willWB()
Returns if the LSQ will write back to memory this cycle.
Definition lsq.cc:742

gem5::o3::LSQ::getStoreHead
int getStoreHead(ThreadID tid)
Returns the head index of the store queue.
Definition lsq.cc:326

gem5::o3::LSQ::LSQ
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
Constructs an LSQ with the given parameters.
Definition lsq.cc:71

gem5::o3::LSQ::cpu
CPU * cpu
The CPU pointer.
Definition lsq.hh:878

gem5::o3::LSQ::_cacheBlocked
bool _cacheBlocked
D-cache is blocked.
Definition lsq.hh:896

gem5::o3::LSQ::drainSanityCheck
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq.cc:137

gem5::o3::LSQ::thread
std::vector< LSQUnit > thread
The LSQ units for individual threads.
Definition lsq.hh:955

gem5::o3::LSQ::LQEntries
unsigned LQEntries
Total Size of LQ Entries.
Definition lsq.hh:941

gem5::o3::LSQ::numHtmStops
int numHtmStops(ThreadID tid) const
Definition lsq.cc:352

gem5::o3::LSQ::cachePortBusy
void cachePortBusy(bool is_load)
Another store port is in use.
Definition lsq.cc:210

gem5::o3::LSQ::cachePortAvailable
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition lsq.cc:198

gem5::o3::LSQ::getStoreHeadSeqNum
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
Returns the sequence number of the head of the store queue.
Definition lsq.cc:332

gem5::o3::LSQ::isStalled
bool isStalled()
Returns if the LSQ is stalled due to a memory operation that must be replayed.
Definition lsq.cc:689

gem5::o3::LSQ::writebackStores
void writebackStores()
Attempts to write back stores until all cache ports are used or the interface becomes blocked.
Definition lsq.cc:265

gem5::o3::LSQ::lqFull
bool lqFull()
Returns if any of the LQs are full.
Definition lsq.cc:635

gem5::o3::LSQ::waitingForStaleTranslation
bool waitingForStaleTranslation
If the LSQ is currently waiting for stale translations.
Definition lsq.hh:907

gem5::o3::LSQ::maxSQEntries
unsigned maxSQEntries
Max SQ Size - Used to Enforce Sharing Policies.
Definition lsq.hh:949

gem5::o3::LSQ::lqEmpty
bool lqEmpty() const
Returns if all of the LQs are empty.
Definition lsq.cc:603

gem5::o3::LSQ::getCount
int getCount()
Returns the number of instructions in all of the queues.
Definition lsq.cc:473

gem5::o3::LSQ::hasStoresToWB
bool hasStoresToWB()
Returns whether or not there are any stores to write back to memory.
Definition lsq.cc:714

gem5::o3::LSQ::read
Fault read(LSQRequest *request, ssize_t load_idx)
Executes a read operation, using the load specified at the load index.
Definition lsq.cc:1526

gem5::o3::LSQ::executeStore
Fault executeStore(const DynInstPtr &inst)
Executes a store.
Definition lsq.cc:245

gem5::o3::LSQ::tick
void tick()
Ticks the LSQ.
Definition lsq.cc:175

gem5::o3::LSQ::insertLoad
void insertLoad(const DynInstPtr &load_inst)
Inserts a load into the LSQ.
Definition lsq.cc:221

gem5::o3::LSQ::isEmpty
bool isEmpty() const
Returns if the LSQ is empty (both LQ and SQ are empty).
Definition lsq.cc:597

gem5::o3::LSQ::numStores
int numStores()
Returns the total number of stores in the store queue.
Definition lsq.cc:507

gem5::o3::LSQ::recvTimingSnoopReq
void recvTimingSnoopReq(PacketPtr pkt)
Definition lsq.cc:444

gem5::o3::LSQ::cacheStorePorts
int cacheStorePorts
The number of cache ports available each cycle (stores only).
Definition lsq.hh:898

gem5::o3::LSQ::executeLoad
Fault executeLoad(const DynInstPtr &inst)
Executes a load.
Definition lsq.cc:237

gem5::o3::LSQ::violation
bool violation()
Returns whether or not there was a memory ordering violation.
Definition lsq.cc:289

gem5::o3::LSQ::resetHtmStartsStops
void resetHtmStartsStops(ThreadID tid)
Definition lsq.cc:361

gem5::o3::LSQ::lsqPolicy
SMTQueuePolicy lsqPolicy
The LSQ policy for SMT mode.
Definition lsq.hh:912

gem5::o3::LSQ::numStoresToWB
int numStoresToWB(ThreadID tid)
Returns the number of stores a specific thread has to write back.
Definition lsq.cc:736

gem5::o3::LSQ::numFreeStoreEntries
unsigned numFreeStoreEntries()
Returns the number of free store entries.
Definition lsq.cc:541

gem5::o3::LSQ::sqFull
bool sqFull()
Returns if any of the SQs are full.
Definition lsq.cc:662

std::list
STL list class.
Definition stl.hh:51

std::vector
STL vector class.
Definition stl.hh:37

compiler.hh

cpu.hh

gem5::AtomicOpFunctorPtr
std::unique_ptr< AtomicOpFunctor > AtomicOpFunctorPtr
Definition amo.hh:269

gem5::Flags::set
void set(Type mask)
Set all flag's bits matching the given mask.
Definition flags.hh:116

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188

panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:214

flags
uint8_t flags
Definition helpers.cc:87

iew.hh

limits.hh

logging.hh

gem5::ArmISA::v
Bitfield< 28 > v
Definition misc_types.hh:54

gem5::ArmISA::mode
Bitfield< 4, 0 > mode
Definition misc_types.hh:74

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::ArmISA::d
Bitfield< 9 > d
Definition misc_types.hh:64

gem5::MipsISA::r
r
Definition pra_constants.hh:98

gem5::X86ISA::addr
Bitfield< 3 > addr
Definition types.hh:84

gem5::o3::MaxThreads
static constexpr int MaxThreads
Definition limits.hh:38

gem5::statistics::total
const FlagsType total
Print the total.
Definition info.hh:59

gem5
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36

gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition types.hh:249

gem5::ThreadID
int16_t ThreadID
Thread index/ID type.
Definition types.hh:235

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition request.hh:94

gem5::InvalidThreadID
const ThreadID InvalidThreadID
Definition types.hh:236

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::transferNeedsBurst
bool transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size)
Returns true if the given memory access (address, size) needs to be fragmented across aligned fixed-s...
Definition utils.hh:80

gem5::PacketDataPtr
uint8_t * PacketDataPtr
Definition packet.hh:72

gem5::ContextID
int ContextID
Globally unique thread context ID.
Definition types.hh:239

gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition types.hh:253

gem5::InstSeqNum
uint64_t InstSeqNum
Definition inst_seq.hh:40

gem5::isAnyActiveElement
bool isAnyActiveElement(const std::vector< bool >::const_iterator &it_start, const std::vector< bool >::const_iterator &it_end)
Test if there is any active element in an enablement range.
Definition utils.hh:89

gem5::addrBlockAlign
Addr addrBlockAlign(Addr addr, Addr block_size)
Returns the address of the closest aligned fixed-size block to the given address.
Definition utils.hh:66

std
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81

dyn_inst.hh

lsq.hh

gem5::AddressMonitor::doMonitor
bool doMonitor(PacketPtr pkt)
Definition base.cc:764

name
const std::string & name()
Definition trace.cc:48