release/current/lsq__unit_8cc_source.html

/*

 * Copyright (c) 2010-2014, 2017-2021 ARM Limited

 * Copyright (c) 2013 Advanced Micro Devices, Inc.

 * All rights reserved

 *

 * The license below extends only to copyright in the software and shall

 * not be construed as granting a license to any other intellectual

 * property including but not limited to intellectual property relating

 * to a hardware implementation of the functionality of the software

 * licensed hereunder.  You may use the software subject to the license

 * terms below provided that you ensure that this notice is replicated

 * unmodified and in its entirety in all distributions of the software,

 * modified or unmodified, in source code or in binary form.

 *

 * Copyright (c) 2004-2006 The Regents of The University of Michigan

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are

 * met: redistributions of source code must retain the above copyright

 * notice, this list of conditions and the following disclaimer;

 * redistributions in binary form must reproduce the above copyright

 * notice, this list of conditions and the following disclaimer in the

 * documentation and/or other materials provided with the distribution;

 * neither the name of the copyright holders nor the names of its

 * contributors may be used to endorse or promote products derived from

 * this software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 */


#include "cpu/o3/lsq_unit.hh"


#include "arch/generic/debugfaults.hh"

#include "base/str.hh"

#include "cpu/checker/cpu.hh"

#include "cpu/o3/dyn_inst.hh"

#include "cpu/o3/limits.hh"

#include "cpu/o3/lsq.hh"

#include "debug/Activity.hh"

#include "debug/HtmCpu.hh"

#include "debug/IEW.hh"

#include "debug/LSQUnit.hh"

#include "debug/O3PipeView.hh"

#include "mem/packet.hh"

#include "mem/request.hh"


namespace gem5

{


namespace o3

{


LSQUnit::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,

        PacketPtr _pkt, LSQUnit *lsq_ptr)

    : Event(Default_Pri, AutoDelete),

      inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)

{

    assert(_inst->savedRequest);

    _inst->savedRequest->writebackScheduled();

}

LSQUnit::WritebackEvent::WritebackEvent(const DynInstPtr &_inst, {…}


void


LSQUnit::WritebackEvent::process()

{

    assert(!lsqPtr->cpu->switchedOut());


    lsqPtr->writeback(inst, pkt);


    assert(inst->savedRequest);

    inst->savedRequest->writebackDone();

    delete pkt;

}

LSQUnit::WritebackEvent::process() {…}


const char *


LSQUnit::WritebackEvent::description() const

{

    return "Store writeback";

}

LSQUnit::WritebackEvent::description() const {…}


bool


LSQUnit::recvTimingResp(PacketPtr pkt)

{

    LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);

    assert(request != nullptr);

    bool ret = true;

    /* Check that the request is still alive before any further action. */

    if (!request->isReleased()) {

        ret = request->recvTimingResp(pkt);

    }

    return ret;

}

LSQUnit::recvTimingResp(PacketPtr pkt) {…}


void


LSQUnit::completeDataAccess(PacketPtr pkt)

{

    LSQRequest *request = dynamic_cast<LSQRequest *>(pkt->senderState);

    DynInstPtr inst = request->instruction();


    // hardware transactional memory

    // sanity check

    if (pkt->isHtmTransactional() && !inst->isSquashed()) {

        assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());

    }


    // if in a HTM transaction, it's possible

    // to abort within the cache hierarchy.

    // This is signalled back to the processor

    // through responses to memory requests.

    if (pkt->htmTransactionFailedInCache()) {

        // cannot do this for write requests because

        // they cannot tolerate faults

        const HtmCacheFailure htm_rc =

            pkt->getHtmTransactionFailedInCacheRC();

        if (pkt->isWrite()) {

            DPRINTF(HtmCpu,

                "store notification (ignored) of HTM transaction failure "

                "in cache - addr=0x%lx - rc=%s - htmUid=%d\n",

                pkt->getAddr(), htmFailureToStr(htm_rc),

                pkt->getHtmTransactionUid());

        } else {

            HtmFailureFaultCause fail_reason =

                HtmFailureFaultCause::INVALID;


            if (htm_rc == HtmCacheFailure::FAIL_SELF) {

                fail_reason = HtmFailureFaultCause::SIZE;

            } else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {

                fail_reason = HtmFailureFaultCause::MEMORY;

            } else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {

                // these are likely loads that were issued out of order

                // they are faulted here, but it's unlikely that these will

                // ever reach the commit head.

                fail_reason = HtmFailureFaultCause::OTHER;

            } else {

                panic("HTM error - unhandled return code from cache (%s)",

                      htmFailureToStr(htm_rc));

            }


            inst->fault =

            std::make_shared<GenericHtmFailureFault>(

                inst->getHtmTransactionUid(),

                fail_reason);


            DPRINTF(HtmCpu,

                "load notification of HTM transaction failure "

                "in cache - pc=%s - addr=0x%lx - "

                "rc=%u - htmUid=%d\n",

                inst->pcState(), pkt->getAddr(),

                htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());

        }

    }


    cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));


    assert(!cpu->switchedOut());

    if (!inst->isSquashed()) {

        if (request->needWBToRegister()) {

            // Only loads, store conditionals and atomics perform the writeback

            // after receving the response from the memory

            assert(inst->isLoad() || inst->isStoreConditional() ||

                   inst->isAtomic());


            // hardware transactional memory

            if (pkt->htmTransactionFailedInCache()) {

                request->mainPacket()->setHtmTransactionFailedInCache(

                    pkt->getHtmTransactionFailedInCacheRC() );

            }


            writeback(inst, request->mainPacket());

            if (inst->isStore() || inst->isAtomic()) {

                request->writebackDone();

                completeStore(request->instruction()->sqIt);

            }

        } else if (inst->isStore()) {

            // This is a regular store (i.e., not store conditionals and

            // atomics), so it can complete without writing back

            completeStore(request->instruction()->sqIt);

        }

    }

}

LSQUnit::completeDataAccess(PacketPtr pkt) {…}


LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)

    : lsqID(-1), storeQueue(sqEntries), loadQueue(lqEntries),

      storesToWB(0),

      htmStarts(0), htmStops(0),

      lastRetiredHtmUid(0),

      cacheBlockMask(0), stalled(false),

      isStoreBlocked(false), storeInFlight(false), stats(nullptr)

{

}

LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries) {…}


void


LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params,

        LSQ *lsq_ptr, unsigned id)

{

    lsqID = id;


    cpu = cpu_ptr;

    iewStage = iew_ptr;


    lsq = lsq_ptr;


    cpu->addStatGroup(csprintf("lsq%i", lsqID).c_str(), &stats);


    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);


    depCheckShift = params.LSQDepCheckShift;

    checkLoads = params.LSQCheckLoads;

    needsTSO = params.needsTSO;


    resetState();

}

LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params, {…}


void


LSQUnit::resetState()

{

    storesToWB = 0;


    // hardware transactional memory

    // nesting depth

    htmStarts = htmStops = 0;


    storeWBIt = storeQueue.begin();


    retryPkt = NULL;

    memDepViolator = NULL;


    stalled = false;


    cacheBlockMask = ~(cpu->cacheLineSize() - 1);

}

LSQUnit::resetState() {…}


std::string


LSQUnit::name() const

{

    if (MaxThreads == 1) {

        return iewStage->name() + ".lsq";

    } else {

        return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);

    }

}

LSQUnit::name() const {…}


LSQUnit::LSQUnitStats::LSQUnitStats(statistics::Group *parent)

    : statistics::Group(parent),

      ADD_STAT(forwLoads, statistics::units::Count::get(),

               "Number of loads that had data forwarded from stores"),

      ADD_STAT(squashedLoads, statistics::units::Count::get(),

               "Number of loads squashed"),

      ADD_STAT(ignoredResponses, statistics::units::Count::get(),

               "Number of memory responses ignored because the instruction is "

               "squashed"),

      ADD_STAT(memOrderViolation, statistics::units::Count::get(),

               "Number of memory ordering violations"),

      ADD_STAT(squashedStores, statistics::units::Count::get(),

               "Number of stores squashed"),

      ADD_STAT(rescheduledLoads, statistics::units::Count::get(),

               "Number of loads that were rescheduled"),

      ADD_STAT(blockedByCache, statistics::units::Count::get(),

               "Number of times an access to memory failed due to the cache "

               "being blocked"),

      ADD_STAT(loadToUse, "Distribution of cycle latency between the "

                "first time a load is issued and its completion")

{

    loadToUse

        .init(0, 299, 10)

        .flags(statistics::nozero);

}

LSQUnit::LSQUnitStats::LSQUnitStats(statistics::Group *parent) {…}


void


LSQUnit::setDcachePort(RequestPort *dcache_port)

{

    dcachePort = dcache_port;

}

LSQUnit::setDcachePort(RequestPort *dcache_port) {…}


void


LSQUnit::drainSanityCheck() const

{

    for (int i = 0; i < loadQueue.capacity(); ++i)

        assert(!loadQueue[i].valid());


    assert(storesToWB == 0);

    assert(!retryPkt);

}

LSQUnit::drainSanityCheck() const {…}


void


LSQUnit::takeOverFrom()

{

    resetState();

}

LSQUnit::takeOverFrom() {…}


void


LSQUnit::insert(const DynInstPtr &inst)

{

    assert(inst->isMemRef());


    assert(inst->isLoad() || inst->isStore() || inst->isAtomic());


    if (inst->isLoad()) {

        insertLoad(inst);

    } else {

        insertStore(inst);

    }


    inst->setInLSQ();

}

LSQUnit::insert(const DynInstPtr &inst) {…}


void


LSQUnit::insertLoad(const DynInstPtr &load_inst)

{

    assert(!loadQueue.full());

    assert(loadQueue.size() < loadQueue.capacity());


    DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",

            load_inst->pcState(), loadQueue.tail(), load_inst->seqNum);


    /* Grow the queue. */

    loadQueue.advance_tail();


    load_inst->sqIt = storeQueue.end();


    assert(!loadQueue.back().valid());

    loadQueue.back().set(load_inst);

    load_inst->lqIdx = loadQueue.tail();

    assert(load_inst->lqIdx > 0);

    load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);


    // hardware transactional memory

    // transactional state and nesting depth must be tracked

    // in the in-order part of the core.

    if (load_inst->isHtmStart()) {

        htmStarts++;

        DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",

                htmStarts, htmStops);


        const int htm_depth = htmStarts - htmStops;

        const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();

        auto htm_uid = htm_cpt->getHtmUid();


        // for debugging purposes

        if (!load_inst->inHtmTransactionalState()) {

            htm_uid = htm_cpt->newHtmUid();

            DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);

            if (htm_depth != 1) {

                DPRINTF(HtmCpu,

                    "unusual HTM transactional depth (%d)"

                    " possibly caused by mispeculation - htmUid=%u\n",

                    htm_depth, htm_uid);

            }

        }

        load_inst->setHtmTransactionalState(htm_uid, htm_depth);

    }


    if (load_inst->isHtmStop()) {

        htmStops++;

        DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",

                htmStarts, htmStops);


        if (htmStops==1 && htmStarts==0) {

            DPRINTF(HtmCpu,

            "htmStops==1 && htmStarts==0. "

            "This generally shouldn't happen "

            "(unless due to misspeculation)\n");

        }

    }

}

LSQUnit::insertLoad(const DynInstPtr &load_inst) {…}


void


LSQUnit::insertStore(const DynInstPtr& store_inst)

{

    // Make sure it is not full before inserting an instruction.

    assert(!storeQueue.full());

    assert(storeQueue.size() < storeQueue.capacity());


    DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",

            store_inst->pcState(), storeQueue.tail(), store_inst->seqNum);

    storeQueue.advance_tail();


    store_inst->sqIdx = storeQueue.tail();

    store_inst->sqIt = storeQueue.getIterator(store_inst->sqIdx);


    store_inst->lqIdx = loadQueue.tail() + 1;

    assert(store_inst->lqIdx > 0);

    store_inst->lqIt = loadQueue.end();


    storeQueue.back().set(store_inst);

}

LSQUnit::insertStore(const DynInstPtr& store_inst) {…}


DynInstPtr


LSQUnit::getMemDepViolator()

{

    DynInstPtr temp = memDepViolator;


    memDepViolator = NULL;


    return temp;

}

LSQUnit::getMemDepViolator() {…}


unsigned


LSQUnit::numFreeLoadEntries()

{

        DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n",

                loadQueue.capacity(), loadQueue.size());

        return loadQueue.capacity() - loadQueue.size();

}

LSQUnit::numFreeLoadEntries() {…}


unsigned


LSQUnit::numFreeStoreEntries()

{

        DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n",

                storeQueue.capacity(), storeQueue.size());

        return storeQueue.capacity() - storeQueue.size();


 }

LSQUnit::numFreeStoreEntries() {…}


void


LSQUnit::checkSnoop(PacketPtr pkt)

{

    // Should only ever get invalidations in here

    assert(pkt->isInvalidate());


    DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());


    for (int x = 0; x < cpu->numContexts(); x++) {

        gem5::ThreadContext *tc = cpu->getContext(x);

        bool no_squash = cpu->thread[x]->noSquashFromTC;

        cpu->thread[x]->noSquashFromTC = true;

        tc->getIsaPtr()->handleLockedSnoop(pkt, cacheBlockMask);

        cpu->thread[x]->noSquashFromTC = no_squash;

    }


    if (loadQueue.empty())

        return;


    auto iter = loadQueue.begin();


    Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;


    DynInstPtr ld_inst = iter->instruction();

    assert(ld_inst);

    LSQRequest *request = iter->request();


    // Check that this snoop didn't just invalidate our lock flag

    if (ld_inst->effAddrValid() && request &&

        request->isCacheBlockHit(invalidate_addr, cacheBlockMask)

        && ld_inst->memReqFlags & Request::LLSC) {

        ld_inst->tcBase()->getIsaPtr()->handleLockedSnoopHit(ld_inst.get());

    }


    bool force_squash = false;


    while (++iter != loadQueue.end()) {

        ld_inst = iter->instruction();

        assert(ld_inst);

        request = iter->request();

        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered() || !request)

            continue;


        DPRINTF(LSQUnit, "-- inst [sn:%lli] to pktAddr:%#x\n",

                    ld_inst->seqNum, invalidate_addr);


        if (force_squash ||

            request->isCacheBlockHit(invalidate_addr, cacheBlockMask)) {

            if (needsTSO) {

                // If we have a TSO system, as all loads must be ordered with

                // all other loads, this load as well as *all* subsequent loads

                // need to be squashed to prevent possible load reordering.

                force_squash = true;

            }

            if (ld_inst->possibleLoadViolation() || force_squash) {

                DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",

                        pkt->getAddr(), ld_inst->seqNum);


                // Mark the load for re-execution

                ld_inst->fault = std::make_shared<ReExec>();

                request->setStateToFault();

            } else {

                DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",

                        pkt->getAddr(), ld_inst->seqNum);


                // Make sure that we don't lose a snoop hitting a LOCKED

                // address since the LOCK* flags don't get updated until

                // commit.

                if (ld_inst->memReqFlags & Request::LLSC) {

                    ld_inst->tcBase()->getIsaPtr()->

                        handleLockedSnoopHit(ld_inst.get());

                }


                // If a older load checks this and it's true

                // then we might have missed the snoop

                // in which case we need to invalidate to be sure

                ld_inst->hitExternalSnoop(true);

            }

        }

    }

    return;

}

LSQUnit::checkSnoop(PacketPtr pkt) {…}


Fault


LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,

        const DynInstPtr& inst)

{

    Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;

    Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;


    while (loadIt != loadQueue.end()) {

        DynInstPtr ld_inst = loadIt->instruction();

        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {

            ++loadIt;

            continue;

        }


        Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;

        Addr ld_eff_addr2 =

            (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;


        if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {

            if (inst->isLoad()) {

                // If this load is to the same block as an external snoop

                // invalidate that we've observed then the load needs to be

                // squashed as it could have newer data

                if (ld_inst->hitExternalSnoop()) {

                    if (!memDepViolator ||

                            ld_inst->seqNum < memDepViolator->seqNum) {

                        DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "

                                "and [sn:%lli] at address %#x\n",

                                inst->seqNum, ld_inst->seqNum, ld_eff_addr1);

                        memDepViolator = ld_inst;


                        ++stats.memOrderViolation;


                        return std::make_shared<GenericISA::M5PanicFault>(

                            "Detected fault with inst [sn:%lli] and "

                            "[sn:%lli] at address %#x\n",

                            inst->seqNum, ld_inst->seqNum, ld_eff_addr1);

                    }

                }


                // Otherwise, mark the load has a possible load violation and

                // if we see a snoop before it's commited, we need to squash

                ld_inst->possibleLoadViolation(true);

                DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"

                        " between instructions [sn:%lli] and [sn:%lli]\n",

                        inst_eff_addr1, inst->seqNum, ld_inst->seqNum);

            } else {

                // A load/store incorrectly passed this store.

                // Check if we already have a violator, or if it's newer

                // squash and refetch.

                if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)

                    break;


                DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "

                        "[sn:%lli] at address %#x\n",

                        inst->seqNum, ld_inst->seqNum, ld_eff_addr1);

                memDepViolator = ld_inst;


                ++stats.memOrderViolation;


                return std::make_shared<GenericISA::M5PanicFault>(

                    "Detected fault with "

                    "inst [sn:%lli] and [sn:%lli] at address %#x\n",

                    inst->seqNum, ld_inst->seqNum, ld_eff_addr1);

            }

        }


        ++loadIt;

    }

    return NoFault;

}

LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, {…}


Fault


LSQUnit::executeLoad(const DynInstPtr &inst)

{

    // Execute a specific load.

    Fault load_fault = NoFault;


    DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",

            inst->pcState(), inst->seqNum);


    assert(!inst->isSquashed());


    load_fault = inst->initiateAcc();


    if (load_fault == NoFault && !inst->readMemAccPredicate()) {

        assert(inst->readPredicate());

        inst->setExecuted();

        inst->completeAcc(nullptr);

        iewStage->instToCommit(inst);

        iewStage->activityThisCycle();

        return NoFault;

    }


    if (inst->isTranslationDelayed() && load_fault == NoFault)

        return load_fault;


    if (load_fault != NoFault && inst->translationCompleted() &&

            inst->savedRequest->isPartialFault()

            && !inst->savedRequest->isComplete()) {

        assert(inst->savedRequest->isSplit());

        // If we have a partial fault where the mem access is not complete yet

        // then the cache must have been blocked. This load will be re-executed

        // when the cache gets unblocked. We will handle the fault when the

        // mem access is complete.

        return NoFault;

    }


    // If the instruction faulted or predicated false, then we need to send it

    // along to commit without the instruction completing.

    if (load_fault != NoFault || !inst->readPredicate()) {

        // Send this instruction to commit, also make sure iew stage

        // realizes there is activity.  Mark it as executed unless it

        // is a strictly ordered load that needs to hit the head of

        // commit.

        if (!inst->readPredicate())

            inst->forwardOldRegs();

        DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",

                inst->seqNum,

                (load_fault != NoFault ? "fault" : "predication"));

        if (!(inst->hasRequest() && inst->strictlyOrdered()) ||

            inst->isAtCommit()) {

            inst->setExecuted();

        }

        iewStage->instToCommit(inst);

        iewStage->activityThisCycle();

    } else {

        if (inst->effAddrValid()) {

            auto it = inst->lqIt;

            ++it;


            if (checkLoads)

                return checkViolations(it, inst);

        }

    }


    return load_fault;

}

LSQUnit::executeLoad(const DynInstPtr &inst) {…}


Fault


LSQUnit::executeStore(const DynInstPtr &store_inst)

{

    // Make sure that a store exists.

    assert(storeQueue.size() != 0);


    ssize_t store_idx = store_inst->sqIdx;


    DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",

            store_inst->pcState(), store_inst->seqNum);


    assert(!store_inst->isSquashed());


    // Check the recently completed loads to see if any match this store's

    // address.  If so, then we have a memory ordering violation.

    typename LoadQueue::iterator loadIt = store_inst->lqIt;


    Fault store_fault = store_inst->initiateAcc();


    if (store_inst->isTranslationDelayed() &&

        store_fault == NoFault)

        return store_fault;


    if (!store_inst->readPredicate()) {

        DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",

                store_inst->seqNum);

        store_inst->forwardOldRegs();

        return store_fault;

    }


    if (storeQueue[store_idx].size() == 0) {

        DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",

                store_inst->pcState(), store_inst->seqNum);


        if (store_inst->isAtomic()) {

            // If the instruction faulted, then we need to send it along

            // to commit without the instruction completing.

            if (!(store_inst->hasRequest() && store_inst->strictlyOrdered()) ||

                store_inst->isAtCommit()) {

                store_inst->setExecuted();

            }

            iewStage->instToCommit(store_inst);

            iewStage->activityThisCycle();

        }


        return store_fault;

    }


    assert(store_fault == NoFault);


    if (store_inst->isStoreConditional() || store_inst->isAtomic()) {

        // Store conditionals and Atomics need to set themselves as able to

        // writeback if we haven't had a fault by here.

        storeQueue[store_idx].canWB() = true;


        ++storesToWB;

    }


    return checkViolations(loadIt, store_inst);


}

LSQUnit::executeStore(const DynInstPtr &store_inst) {…}


void


LSQUnit::commitLoad()

{

    assert(loadQueue.front().valid());


    DynInstPtr inst = loadQueue.front().instruction();


    DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",

            inst->pcState());


    // Update histogram with memory latency from load

    // Only take latency from load demand that where issued and did not fault

    if (!inst->isInstPrefetch() && !inst->isDataPrefetch()

            && inst->firstIssue != -1

            && inst->lastWakeDependents != -1) {

        stats.loadToUse.sample(cpu->ticksToCycles(

                    inst->lastWakeDependents - inst->firstIssue));

    }


    loadQueue.front().clear();

    loadQueue.pop_front();

}

LSQUnit::commitLoad() {…}


void


LSQUnit::commitLoads(InstSeqNum &youngest_inst)

{

    assert(loadQueue.size() == 0 || loadQueue.front().valid());


    while (loadQueue.size() != 0 && loadQueue.front().instruction()->seqNum

            <= youngest_inst) {

        commitLoad();

    }

}

LSQUnit::commitLoads(InstSeqNum &youngest_inst) {…}


void


LSQUnit::commitStores(InstSeqNum &youngest_inst)

{

    assert(storeQueue.size() == 0 || storeQueue.front().valid());


    /* Forward iterate the store queue (age order). */

    for (auto& x : storeQueue) {

        assert(x.valid());

        // Mark any stores that are now committed and have not yet

        // been marked as able to write back.

        if (!x.canWB()) {

            if (x.instruction()->seqNum > youngest_inst) {

                break;

            }

            DPRINTF(LSQUnit, "Marking store as able to write back, PC "

                    "%s [sn:%lli]\n",

                    x.instruction()->pcState(),

                    x.instruction()->seqNum);


            x.canWB() = true;


            ++storesToWB;

        }

    }

}

LSQUnit::commitStores(InstSeqNum &youngest_inst) {…}


void


LSQUnit::writebackBlockedStore()

{

    assert(isStoreBlocked);

    storeWBIt->request()->sendPacketToCache();

    if (storeWBIt->request()->isSent()){

        storePostSend();

    }

}

LSQUnit::writebackBlockedStore() {…}


void


LSQUnit::writebackStores()

{

    if (isStoreBlocked) {

        DPRINTF(LSQUnit, "Writing back  blocked store\n");

        writebackBlockedStore();

    }


    while (storesToWB > 0 &&

           storeWBIt.dereferenceable() &&

           storeWBIt->valid() &&

           storeWBIt->canWB() &&

           ((!needsTSO) || (!storeInFlight)) &&

           lsq->cachePortAvailable(false)) {


        if (isStoreBlocked) {

            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"

                    " is blocked!\n");

            break;

        }


        // Store didn't write any data so no need to write it back to

        // memory.

        if (storeWBIt->size() == 0) {

            /* It is important that the preincrement happens at (or before)

             * the call, as the the code of completeStore checks

             * storeWBIt. */

            completeStore(storeWBIt++);

            continue;

        }


        if (storeWBIt->instruction()->isDataPrefetch()) {

            storeWBIt++;

            continue;

        }


        assert(storeWBIt->hasRequest());

        assert(!storeWBIt->committed());


        DynInstPtr inst = storeWBIt->instruction();

        LSQRequest* request = storeWBIt->request();


        // Process store conditionals or store release after all previous

        // stores are completed

        if ((request->mainReq()->isLLSC() ||

             request->mainReq()->isRelease()) &&

             (storeWBIt.idx() != storeQueue.head())) {

            DPRINTF(LSQUnit, "Store idx:%i PC:%s to Addr:%#x "

                "[sn:%lli] is %s%s and not head of the queue\n",

                storeWBIt.idx(), inst->pcState(),

                request->mainReq()->getPaddr(), inst->seqNum,

                request->mainReq()->isLLSC() ? "SC" : "",

                request->mainReq()->isRelease() ? "/Release" : "");

            break;

        }


        storeWBIt->committed() = true;


        assert(!inst->memData);

        inst->memData = new uint8_t[request->_size];


        if (storeWBIt->isAllZeros())

            memset(inst->memData, 0, request->_size);

        else

            memcpy(inst->memData, storeWBIt->data(), request->_size);


        request->buildPackets();


        DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "

                "to Addr:%#x, data:%#x [sn:%lli]\n",

                storeWBIt.idx(), inst->pcState(),

                request->mainReq()->getPaddr(), (int)*(inst->memData),

                inst->seqNum);


        // @todo: Remove this SC hack once the memory system handles it.

        if (inst->isStoreConditional()) {

            // Disable recording the result temporarily.  Writing to

            // misc regs normally updates the result, but this is not

            // the desired behavior when handling store conditionals.

            inst->recordResult(false);

            bool success = inst->tcBase()->getIsaPtr()->handleLockedWrite(

                    inst.get(), request->mainReq(), cacheBlockMask);

            inst->recordResult(true);

            request->packetSent();


            if (!success) {

                request->complete();

                // Instantly complete this store.

                DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed.  "

                        "Instantly completing it.\n",

                        inst->seqNum);

                PacketPtr new_pkt = new Packet(*request->packet());

                WritebackEvent *wb = new WritebackEvent(inst,

                        new_pkt, this);

                cpu->schedule(wb, curTick() + 1);

                completeStore(storeWBIt);

                if (!storeQueue.empty())

                    storeWBIt++;

                else

                    storeWBIt = storeQueue.end();

                continue;

            }

        }


        if (request->mainReq()->isLocalAccess()) {

            assert(!inst->isStoreConditional());

            assert(!inst->inHtmTransactionalState());

            gem5::ThreadContext *thread = cpu->tcBase(lsqID);

            PacketPtr main_pkt = new Packet(request->mainReq(),

                                            MemCmd::WriteReq);

            main_pkt->dataStatic(inst->memData);

            request->mainReq()->localAccessor(thread, main_pkt);

            delete main_pkt;

            completeStore(storeWBIt);

            storeWBIt++;

            continue;

        }

        /* Send to cache */

        request->sendPacketToCache();


        /* If successful, do the post send */

        if (request->isSent()) {

            storePostSend();

        } else {

            DPRINTF(LSQUnit, "D-Cache became blocked when writing [sn:%lli], "

                    "will retry later\n",

                    inst->seqNum);

        }

    }

    assert(storesToWB >= 0);

}

LSQUnit::writebackStores() {…}


void


LSQUnit::squash(const InstSeqNum &squashed_num)

{

    DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"

            "(Loads:%i Stores:%i)\n", squashed_num, loadQueue.size(),

            storeQueue.size());


    while (loadQueue.size() != 0 &&

            loadQueue.back().instruction()->seqNum > squashed_num) {

        DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "

                "[sn:%lli]\n",

                loadQueue.back().instruction()->pcState(),

                loadQueue.back().instruction()->seqNum);


        if (isStalled() && loadQueue.tail() == stallingLoadIdx) {

            stalled = false;

            stallingStoreIsn = 0;

            stallingLoadIdx = 0;

        }


        // hardware transactional memory

        // Squashing instructions can alter the transaction nesting depth

        // and must be corrected before fetching resumes.

        if (loadQueue.back().instruction()->isHtmStart())

        {

            htmStarts = (--htmStarts < 0) ? 0 : htmStarts;

            DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",

              htmStarts, htmStops);

        }

        if (loadQueue.back().instruction()->isHtmStop())

        {

            htmStops = (--htmStops < 0) ? 0 : htmStops;

            DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",

              htmStarts, htmStops);

        }

        // Clear the smart pointer to make sure it is decremented.

        loadQueue.back().instruction()->setSquashed();

        loadQueue.back().clear();


        loadQueue.pop_back();

        ++stats.squashedLoads;

    }


    // hardware transactional memory

    // scan load queue (from oldest to youngest) for most recent valid htmUid

    auto scan_it = loadQueue.begin();

    uint64_t in_flight_uid = 0;

    while (scan_it != loadQueue.end()) {

        if (scan_it->instruction()->isHtmStart() &&

            !scan_it->instruction()->isSquashed()) {

            in_flight_uid = scan_it->instruction()->getHtmTransactionUid();

            DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",

                scan_it._idx, in_flight_uid);

        }

        scan_it++;

    }

    // If there's a HtmStart in the pipeline then use its htmUid,

    // otherwise use the most recently committed uid

    const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();

    if (htm_cpt) {

        const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();

        uint64_t new_local_htm_uid;

        if (in_flight_uid > 0)

            new_local_htm_uid = in_flight_uid;

        else

            new_local_htm_uid = lastRetiredHtmUid;


        if (old_local_htm_uid != new_local_htm_uid) {

            DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",

                lastRetiredHtmUid);

            DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",

                new_local_htm_uid);


            htm_cpt->setHtmUid(new_local_htm_uid);

        }

    }


    if (memDepViolator && squashed_num < memDepViolator->seqNum) {

        memDepViolator = NULL;

    }


    while (storeQueue.size() != 0 &&

           storeQueue.back().instruction()->seqNum > squashed_num) {

        // Instructions marked as can WB are already committed.

        if (storeQueue.back().canWB()) {

            break;

        }


        DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "

                "idx:%i [sn:%lli]\n",

                storeQueue.back().instruction()->pcState(),

                storeQueue.tail(), storeQueue.back().instruction()->seqNum);


        // I don't think this can happen.  It should have been cleared

        // by the stalling load.

        if (isStalled() &&

            storeQueue.back().instruction()->seqNum == stallingStoreIsn) {

            panic("Is stalled should have been cleared by stalling load!\n");

            stalled = false;

            stallingStoreIsn = 0;

        }


        // Clear the smart pointer to make sure it is decremented.

        storeQueue.back().instruction()->setSquashed();


        // Must delete request now that it wasn't handed off to

        // memory.  This is quite ugly.  @todo: Figure out the proper

        // place to really handle request deletes.

        storeQueue.back().clear();


        storeQueue.pop_back();

        ++stats.squashedStores;

    }

}

LSQUnit::squash(const InstSeqNum &squashed_num) {…}


uint64_t


LSQUnit::getLatestHtmUid() const

{

    const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();

    return htm_cpt->getHtmUid();

}

LSQUnit::getLatestHtmUid() const {…}


void


LSQUnit::storePostSend()

{

    if (isStalled() &&

        storeWBIt->instruction()->seqNum == stallingStoreIsn) {

        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "

                "load idx:%li\n",

                stallingStoreIsn, stallingLoadIdx);

        stalled = false;

        stallingStoreIsn = 0;

        iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());

    }


    if (!storeWBIt->instruction()->isStoreConditional()) {

        // The store is basically completed at this time. This

        // only works so long as the checker doesn't try to

        // verify the value in memory for stores.

        storeWBIt->instruction()->setCompleted();


        if (cpu->checker) {

            cpu->checker->verify(storeWBIt->instruction());

        }

    }


    if (needsTSO) {

        storeInFlight = true;

    }


    storeWBIt++;

}

LSQUnit::storePostSend() {…}


void


LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt)

{

    iewStage->wakeCPU();


    // Squashed instructions do not need to complete their access.

    if (inst->isSquashed()) {

        assert (!inst->isStore() || inst->isStoreConditional());

        ++stats.ignoredResponses;

        return;

    }


    if (!inst->isExecuted()) {

        inst->setExecuted();


        if (inst->fault == NoFault) {

            // Complete access to copy data to proper place.

            inst->completeAcc(pkt);

        } else {

            // If the instruction has an outstanding fault, we cannot complete

            // the access as this discards the current fault.


            // If we have an outstanding fault, the fault should only be of

            // type ReExec or - in case of a SplitRequest - a partial

            // translation fault


            // Unless it's a hardware transactional memory fault

            auto htm_fault = std::dynamic_pointer_cast<

                GenericHtmFailureFault>(inst->fault);


            if (!htm_fault) {

                assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||

                       inst->savedRequest->isPartialFault());


            } else if (!pkt->htmTransactionFailedInCache()) {

                // Situation in which the instruction has a hardware

                // transactional memory fault but not the packet itself. This

                // can occur with ldp_uop microops since access is spread over

                // multiple packets.

                DPRINTF(HtmCpu,

                        "%s writeback with HTM failure fault, "

                        "however, completing packet is not aware of "

                        "transaction failure. cause=%s htmUid=%u\n",

                        inst->staticInst->getName(),

                        htmFailureToStr(htm_fault->getHtmFailureFaultCause()),

                        htm_fault->getHtmUid());

            }


            DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "

                    "due to pending fault.\n", inst->seqNum);

        }

    }


    // Need to insert instruction into queue to commit

    iewStage->instToCommit(inst);


    iewStage->activityThisCycle();


    // see if this load changed the PC

    iewStage->checkMisprediction(inst);

}

LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) {…}


void


LSQUnit::completeStore(typename StoreQueue::iterator store_idx)

{

    assert(store_idx->valid());

    store_idx->completed() = true;

    --storesToWB;

    // A bit conservative because a store completion may not free up entries,

    // but hopefully avoids two store completions in one cycle from making

    // the CPU tick twice.

    cpu->wakeCPU();

    cpu->activityThisCycle();


    /* We 'need' a copy here because we may clear the entry from the

     * store queue. */

    DynInstPtr store_inst = store_idx->instruction();

    if (store_idx == storeQueue.begin()) {

        do {

            storeQueue.front().clear();

            storeQueue.pop_front();

        } while (storeQueue.front().completed() &&

                 !storeQueue.empty());


        iewStage->updateLSQNextCycle = true;

    }


    DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "

            "idx:%i\n",

            store_inst->seqNum, store_idx.idx() - 1, storeQueue.head() - 1);


#if TRACING_ON

    if (debug::O3PipeView) {

        store_inst->storeTick =

            curTick() - store_inst->fetchTick;

    }

#endif


    if (isStalled() &&

        store_inst->seqNum == stallingStoreIsn) {

        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "

                "load idx:%li\n",

                stallingStoreIsn, stallingLoadIdx);

        stalled = false;

        stallingStoreIsn = 0;

        iewStage->replayMemInst(loadQueue[stallingLoadIdx].instruction());

    }


    store_inst->setCompleted();


    if (needsTSO) {

        storeInFlight = false;

    }


    // Tell the checker we've completed this instruction.  Some stores

    // may get reported twice to the checker, but the checker can

    // handle that case.

    // Store conditionals cannot be sent to the checker yet, they have

    // to update the misc registers first which should take place

    // when they commit

    if (cpu->checker &&  !store_inst->isStoreConditional()) {

        cpu->checker->verify(store_inst);

    }

}

LSQUnit::completeStore(typename StoreQueue::iterator store_idx) {…}


bool


LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt)

{

    bool ret = true;

    bool cache_got_blocked = false;


    LSQRequest *request = dynamic_cast<LSQRequest*>(data_pkt->senderState);


    if (!lsq->cacheBlocked() &&

        lsq->cachePortAvailable(isLoad)) {

        if (!dcachePort->sendTimingReq(data_pkt)) {

            ret = false;

            cache_got_blocked = true;

        }

    } else {

        ret = false;

    }


    if (ret) {

        if (!isLoad) {

            isStoreBlocked = false;

        }

        lsq->cachePortBusy(isLoad);

        request->packetSent();

    } else {

        if (cache_got_blocked) {

            lsq->cacheBlocked(true);

            ++stats.blockedByCache;

        }

        if (!isLoad) {

            assert(request == storeWBIt->request());

            isStoreBlocked = true;

        }

        request->packetNotSent();

    }

    DPRINTF(LSQUnit, "Memory request (pkt: %s) from inst [sn:%llu] was"

            " %ssent (cache is blocked: %d, cache_got_blocked: %d)\n",

            data_pkt->print(), request->instruction()->seqNum,

            ret ? "": "not ", lsq->cacheBlocked(), cache_got_blocked);

    return ret;

}

LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt) {…}


void


LSQUnit::startStaleTranslationFlush()

{

    DPRINTF(LSQUnit, "Unit %p marking stale translations %d %d\n", this,

        storeQueue.size(), loadQueue.size());

    for (auto& entry : storeQueue) {

        if (entry.valid() && entry.hasRequest())

            entry.request()->markAsStaleTranslation();

    }

    for (auto& entry : loadQueue) {

        if (entry.valid() && entry.hasRequest())

            entry.request()->markAsStaleTranslation();

    }

}

LSQUnit::startStaleTranslationFlush() {…}


bool


LSQUnit::checkStaleTranslations() const

{

    DPRINTF(LSQUnit, "Unit %p checking stale translations\n", this);

    for (auto& entry : storeQueue) {

        if (entry.valid() && entry.hasRequest()

            && entry.request()->hasStaleTranslation())

            return true;

    }

    for (auto& entry : loadQueue) {

        if (entry.valid() && entry.hasRequest()

            && entry.request()->hasStaleTranslation())

            return true;

    }

    DPRINTF(LSQUnit, "Unit %p found no stale translations\n", this);

    return false;

}

LSQUnit::checkStaleTranslations() const {…}


void


LSQUnit::recvRetry()

{

    if (isStoreBlocked) {

        DPRINTF(LSQUnit, "Receiving retry: blocked store\n");

        writebackBlockedStore();

    }

}

LSQUnit::recvRetry() {…}


void


LSQUnit::dumpInsts() const

{

    cprintf("Load store queue: Dumping instructions.\n");

    cprintf("Load queue size: %i\n", loadQueue.size());

    cprintf("Load queue: ");


    for (const auto& e: loadQueue) {

        const DynInstPtr &inst(e.instruction());

        cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);

    }

    cprintf("\n");


    cprintf("Store queue size: %i\n", storeQueue.size());

    cprintf("Store queue: ");


    for (const auto& e: storeQueue) {

        const DynInstPtr &inst(e.instruction());

        cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);

    }


    cprintf("\n");

}

LSQUnit::dumpInsts() const {…}


void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); }


BaseMMU *LSQUnit::getMMUPtr() { return cpu->mmu; }


unsigned int


LSQUnit::cacheLineSize()

{

    return cpu->cacheLineSize();

}

LSQUnit::cacheLineSize() {…}


Fault


LSQUnit::read(LSQRequest *request, ssize_t load_idx)

{

    LQEntry& load_entry = loadQueue[load_idx];

    const DynInstPtr& load_inst = load_entry.instruction();


    load_entry.setRequest(request);

    assert(load_inst);


    assert(!load_inst->isExecuted());


    // Make sure this isn't a strictly ordered load

    // A bit of a hackish way to get strictly ordered accesses to work

    // only if they're at the head of the LSQ and are ready to commit

    // (at the head of the ROB too).


    if (request->mainReq()->isStrictlyOrdered() &&

        (load_idx != loadQueue.head() || !load_inst->isAtCommit())) {

        // Tell IQ/mem dep unit that this instruction will need to be

        // rescheduled eventually

        iewStage->rescheduleMemInst(load_inst);

        load_inst->clearIssued();

        load_inst->effAddrValid(false);

        ++stats.rescheduledLoads;

        DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",

                load_inst->seqNum, load_inst->pcState());


        // Must delete request now that it wasn't handed off to

        // memory.  This is quite ugly.  @todo: Figure out the proper

        // place to really handle request deletes.

        load_entry.setRequest(nullptr);

        request->discard();

        return std::make_shared<GenericISA::M5PanicFault>(

            "Strictly ordered load [sn:%llx] PC %s\n",

            load_inst->seqNum, load_inst->pcState());

    }


    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "

            "storeHead: %i addr: %#x%s\n",

            load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,

            request->mainReq()->getPaddr(), request->isSplit() ? " split" :

            "");


    if (request->mainReq()->isLLSC()) {

        // Disable recording the result temporarily.  Writing to misc

        // regs normally updates the result, but this is not the

        // desired behavior when handling store conditionals.

        load_inst->recordResult(false);

        load_inst->tcBase()->getIsaPtr()->handleLockedRead(load_inst.get(),

                request->mainReq());

        load_inst->recordResult(true);

    }


    if (request->mainReq()->isLocalAccess()) {

        assert(!load_inst->memData);

        load_inst->memData = new uint8_t[MaxDataBytes];


        gem5::ThreadContext *thread = cpu->tcBase(lsqID);

        PacketPtr main_pkt = new Packet(request->mainReq(), MemCmd::ReadReq);


        main_pkt->dataStatic(load_inst->memData);


        Cycles delay = request->mainReq()->localAccessor(thread, main_pkt);


        WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);

        cpu->schedule(wb, cpu->clockEdge(delay));

        return NoFault;

    }


    // Check the SQ for any previous stores that might lead to forwarding

    auto store_it = load_inst->sqIt;

    assert (store_it >= storeWBIt);

    // End once we've reached the top of the LSQ

    while (store_it != storeWBIt && !load_inst->isDataPrefetch()) {

        // Move the index to one younger

        store_it--;

        assert(store_it->valid());

        assert(store_it->instruction()->seqNum < load_inst->seqNum);

        int store_size = store_it->size();


        // Cache maintenance instructions go down via the store

        // path but they carry no data and they shouldn't be

        // considered for forwarding

        if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&

            !(store_it->request()->mainReq() &&

              store_it->request()->mainReq()->isCacheMaintenance())) {

            assert(store_it->instruction()->effAddrValid());


            // Check if the store data is within the lower and upper bounds of

            // addresses that the request needs.

            auto req_s = request->mainReq()->getVaddr();

            auto req_e = req_s + request->mainReq()->getSize();

            auto st_s = store_it->instruction()->effAddr;

            auto st_e = st_s + store_size;


            bool store_has_lower_limit = req_s >= st_s;

            bool store_has_upper_limit = req_e <= st_e;

            bool lower_load_has_store_part = req_s < st_e;

            bool upper_load_has_store_part = req_e > st_s;


            auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;


            // If the store entry is not atomic (atomic does not have valid

            // data), the store has all of the data needed, and

            // the load is not LLSC, then

            // we can forward data from the store to the load

            if (!store_it->instruction()->isAtomic() &&

                store_has_lower_limit && store_has_upper_limit &&

                !request->mainReq()->isLLSC()) {


                const auto& store_req = store_it->request()->mainReq();

                coverage = store_req->isMasked() ?

                    AddrRangeCoverage::PartialAddrRangeCoverage :

                    AddrRangeCoverage::FullAddrRangeCoverage;

            } else if (

                // This is the partial store-load forwarding case where a store

                // has only part of the load's data and the load isn't LLSC

                (!request->mainReq()->isLLSC() &&

                 ((store_has_lower_limit && lower_load_has_store_part) ||

                  (store_has_upper_limit && upper_load_has_store_part) ||

                  (lower_load_has_store_part && upper_load_has_store_part))) ||

                // The load is LLSC, and the store has all or part of the

                // load's data

                (request->mainReq()->isLLSC() &&

                 ((store_has_lower_limit || upper_load_has_store_part) &&

                  (store_has_upper_limit || lower_load_has_store_part))) ||

                // The store entry is atomic and has all or part of the load's

                // data

                (store_it->instruction()->isAtomic() &&

                 ((store_has_lower_limit || upper_load_has_store_part) &&

                  (store_has_upper_limit || lower_load_has_store_part)))) {


                coverage = AddrRangeCoverage::PartialAddrRangeCoverage;

            }


            if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {

                // Get shift amount for offset into the store's data.

                int shift_amt = request->mainReq()->getVaddr() -

                    store_it->instruction()->effAddr;


                // Allocate memory if this is the first time a load is issued.

                if (!load_inst->memData) {

                    load_inst->memData =

                        new uint8_t[request->mainReq()->getSize()];

                }

                if (store_it->isAllZeros())

                    memset(load_inst->memData, 0,

                            request->mainReq()->getSize());

                else

                    memcpy(load_inst->memData,

                        store_it->data() + shift_amt,

                        request->mainReq()->getSize());


                DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "

                        "addr %#x\n", store_it._idx,

                        request->mainReq()->getVaddr());


                PacketPtr data_pkt = new Packet(request->mainReq(),

                        MemCmd::ReadReq);

                data_pkt->dataStatic(load_inst->memData);


                // hardware transactional memory

                // Store to load forwarding within a transaction

                // This should be okay because the store will be sent to

                // the memory subsystem and subsequently get added to the

                // write set of the transaction. The write set has a stronger

                // property than the read set, so the load doesn't necessarily

                // have to be there.

                assert(!request->mainReq()->isHTMCmd());

                if (load_inst->inHtmTransactionalState()) {

                    assert (!storeQueue[store_it._idx].completed());

                    assert (

                        storeQueue[store_it._idx].instruction()->

                          inHtmTransactionalState());

                    assert (

                        load_inst->getHtmTransactionUid() ==

                        storeQueue[store_it._idx].instruction()->

                          getHtmTransactionUid());

                    data_pkt->setHtmTransactional(

                        load_inst->getHtmTransactionUid());

                    DPRINTF(HtmCpu, "HTM LD (ST2LDF) "

                      "pc=0x%lx - vaddr=0x%lx - "

                      "paddr=0x%lx - htmUid=%u\n",

                      load_inst->pcState().instAddr(),

                      data_pkt->req->hasVaddr() ?

                        data_pkt->req->getVaddr() : 0lu,

                      data_pkt->getAddr(),

                      load_inst->getHtmTransactionUid());

                }


                if (request->isAnyOutstandingRequest()) {

                    assert(request->_numOutstandingPackets > 0);

                    // There are memory requests packets in flight already.

                    // This may happen if the store was not complete the

                    // first time this load got executed. Signal the senderSate

                    // that response packets should be discarded.

                    request->discard();

                    // Avoid checking snoops on this discarded request.

                    load_entry.setRequest(nullptr);

                }


                WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,

                        this);


                // We'll say this has a 1 cycle load-store forwarding latency

                // for now.

                // @todo: Need to make this a parameter.

                cpu->schedule(wb, curTick());


                // Don't need to do anything special for split loads.

                ++stats.forwLoads;


                return NoFault;

            } else if (

                    coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {

                // If it's already been written back, then don't worry about

                // stalling on it.

                if (store_it->completed()) {

                    panic("Should not check one of these");

                    continue;

                }


                // Must stall load and force it to retry, so long as it's the

                // oldest load that needs to do so.

                if (!stalled ||

                    (stalled &&

                     load_inst->seqNum <

                     loadQueue[stallingLoadIdx].instruction()->seqNum)) {

                    stalled = true;

                    stallingStoreIsn = store_it->instruction()->seqNum;

                    stallingLoadIdx = load_idx;

                }


                // Tell IQ/mem dep unit that this instruction will need to be

                // rescheduled eventually

                iewStage->rescheduleMemInst(load_inst);

                load_inst->clearIssued();

                load_inst->effAddrValid(false);

                ++stats.rescheduledLoads;


                // Do not generate a writeback event as this instruction is not

                // complete.

                DPRINTF(LSQUnit, "Load-store forwarding mis-match. "

                        "Store idx %i to load addr %#x\n",

                        store_it._idx, request->mainReq()->getVaddr());


                // Must discard the request.

                request->discard();

                load_entry.setRequest(nullptr);

                return NoFault;

            }

        }

    }


    // If there's no forwarding case, then go access memory

    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",

            load_inst->seqNum, load_inst->pcState());


    // Allocate memory if this is the first time a load is issued.

    if (!load_inst->memData) {

        load_inst->memData = new uint8_t[request->mainReq()->getSize()];

    }


    // hardware transactional memory

    if (request->mainReq()->isHTMCmd()) {

        // this is a simple sanity check

        // the Ruby cache controller will set

        // memData to 0x0ul if successful.

        *load_inst->memData = (uint64_t) 0x1ull;

    }


    // For now, load throughput is constrained by the number of

    // load FUs only, and loads do not consume a cache port (only

    // stores do).

    // @todo We should account for cache port contention

    // and arbitrate between loads and stores.


    // if we the cache is not blocked, do cache access

    request->buildPackets();

    request->sendPacketToCache();

    if (!request->isSent())

        iewStage->blockMemInst(load_inst);


    return NoFault;

}

LSQUnit::read(LSQRequest *request, ssize_t load_idx) {…}


Fault


LSQUnit::write(LSQRequest *request, uint8_t *data, ssize_t store_idx)

{

    assert(storeQueue[store_idx].valid());


    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "

            "[sn:%llu]\n",

            store_idx - 1, request->req()->getPaddr(), storeQueue.head() - 1,

            storeQueue[store_idx].instruction()->seqNum);


    storeQueue[store_idx].setRequest(request);

    unsigned size = request->_size;

    storeQueue[store_idx].size() = size;

    bool store_no_data =

        request->mainReq()->getFlags() & Request::STORE_NO_DATA;

    storeQueue[store_idx].isAllZeros() = store_no_data;

    assert(size <= SQEntry::DataSize || store_no_data);


    // copy data into the storeQueue only if the store request has valid data

    if (!(request->req()->getFlags() & Request::CACHE_BLOCK_ZERO) &&

        !request->req()->isCacheMaintenance() &&

        !request->req()->isAtomic())

        memcpy(storeQueue[store_idx].data(), data, size);


    // This function only writes the data to the store queue, so no fault

    // can happen here.

    return NoFault;

}

LSQUnit::write(LSQRequest *request, uint8_t *data, ssize_t store_idx) {…}


InstSeqNum


LSQUnit::getLoadHeadSeqNum()

{

    if (loadQueue.front().valid())

        return loadQueue.front().instruction()->seqNum;

    else

        return 0;

}

LSQUnit::getLoadHeadSeqNum() {…}


InstSeqNum


LSQUnit::getStoreHeadSeqNum()

{

    if (storeQueue.front().valid())

        return storeQueue.front().instruction()->seqNum;

    else

        return 0;

}

LSQUnit::getStoreHeadSeqNum() {…}


} // namespace o3

} // namespace gem5

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

data
const char data[]
Definition circlebuf.test.cc:48

gem5::BaseCPU::cacheLineSize
Addr cacheLineSize() const
Get the cache line size of the system.
Definition base.hh:424

gem5::BaseCPU::getContext
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition base.hh:315

gem5::BaseCPU::numContexts
unsigned numContexts()
Get the number of thread contexts available.
Definition base.hh:319

gem5::BaseCPU::switchedOut
bool switchedOut() const
Determine if the CPU is switched out.
Definition base.hh:400

gem5::BaseISA::handleLockedSnoop
virtual void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask)
Definition isa.hh:115

gem5::BaseMMU
Definition mmu.hh:54

gem5::CircularQueue::getIterator
iterator getIterator(size_t idx)
Return an iterator to an index in the queue.
Definition circular_queue.hh:592

gem5::Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition clocked_object.hh:177

gem5::Clocked::ticksToCycles
Cycles ticksToCycles(Tick t) const
Definition clocked_object.hh:222

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79

gem5::Event
Definition eventq.hh:255

gem5::GenericHtmFailureFault
Definition faults.hh:141

gem5::MemCmd::WriteReq
@ WriteReq
Definition packet.hh:90

gem5::MemCmd::ReadReq
@ ReadReq
Definition packet.hh:87

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295

gem5::Packet::getAddr
Addr getAddr() const
Definition packet.hh:807

gem5::Packet::setHtmTransactionFailedInCache
void setHtmTransactionFailedInCache(const HtmCacheFailure ret_code)
Stipulates that this packet/request has returned from the cache hierarchy in a failed transaction.
Definition packet.cc:493

gem5::Packet::print
void print(std::ostream &o, int verbosity=0, const std::string &prefix="") const
Definition packet.cc:368

gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Packet::getHtmTransactionFailedInCacheRC
HtmCacheFailure getHtmTransactionFailedInCacheRC() const
If a packet/request has returned from the cache hierarchy in a failed transaction,...
Definition packet.cc:509

gem5::Packet::isHtmTransactional
bool isHtmTransactional() const
Returns whether or not this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:523

gem5::Packet::isWrite
bool isWrite() const
Definition packet.hh:594

gem5::Packet::getHtmTransactionUid
uint64_t getHtmTransactionUid() const
If a packet/request originates in a CPU executing in transactional mode, i.e.
Definition packet.cc:529

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::setHtmTransactional
void setHtmTransactional(uint64_t val)
Stipulates that this packet/request originates in the CPU executing in transactional mode,...
Definition packet.cc:516

gem5::Packet::isInvalidate
bool isInvalidate() const
Definition packet.hh:609

gem5::Packet::htmTransactionFailedInCache
bool htmTransactionFailedInCache() const
Returns whether or not this packet/request has returned from the cache hierarchy in a failed transact...
Definition packet.cc:503

gem5::ReExec
Definition faults.hh:93

gem5::RefCountingPtr< DynInst >

gem5::RefCountingPtr::data
T * data
The stored pointer.
Definition refcnt.hh:146

gem5::RefCountingPtr::get
T * get() const
Directly access the pointer itself without taking a reference.
Definition refcnt.hh:227

gem5::RequestPort
A RequestPort is a specialisation of a Port, which implements the default protocol for the three diff...
Definition port.hh:136

gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition port.hh:603

gem5::Request::LLSC
@ LLSC
The request is a Load locked/store conditional.
Definition request.hh:156

gem5::Request::CACHE_BLOCK_ZERO
@ CACHE_BLOCK_ZERO
This is a write that is targeted and zeroing an entire cache block.
Definition request.hh:143

gem5::Request::STORE_NO_DATA
static const FlagsType STORE_NO_DATA
Definition request.hh:263

gem5::ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU.
Definition thread_context.hh:89

gem5::ThreadContext::getIsaPtr
virtual BaseISA * getIsaPtr() const =0

gem5::ThreadContext::getHtmCheckpointPtr
virtual BaseHTMCheckpointPtr & getHtmCheckpointPtr()=0

gem5::o3::CPU
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94

gem5::o3::CPU::checker
gem5::Checker< DynInstPtr > * checker
Pointer to the checker, which can dynamically verify instruction results at run time.
Definition cpu.hh:527

gem5::o3::CPU::thread
std::vector< ThreadState * > thread
Pointers to all of the threads in the CPU.
Definition cpu.hh:533

gem5::o3::CPU::activityThisCycle
void activityThisCycle()
Records that there was time buffer activity this cycle.
Definition cpu.hh:488

gem5::o3::CPU::ppDataAccessComplete
ProbePointArg< std::pair< DynInstPtr, PacketPtr > > * ppDataAccessComplete
Definition cpu.hh:177

gem5::o3::CPU::mmu
BaseMMU * mmu
Definition cpu.hh:110

gem5::o3::CPU::tcBase
gem5::ThreadContext * tcBase(ThreadID tid)
Returns a pointer to a thread context.
Definition cpu.hh:515

gem5::o3::CPU::wakeCPU
void wakeCPU()
Wakes the CPU, rescheduling the CPU if it's not already active.
Definition cpu.cc:1316

gem5::o3::IEW
IEW handles both single threaded and SMT IEW (issue/execute/writeback).
Definition iew.hh:88

gem5::o3::IEW::instToCommit
void instToCommit(const DynInstPtr &inst)
Sends an instruction to commit through the time buffer.
Definition iew.cc:529

gem5::o3::IEW::blockMemInst
void blockMemInst(const DynInstPtr &inst)
Moves memory instruction onto the list of cache blocked instructions.
Definition iew.cc:517

gem5::o3::IEW::activityThisCycle
void activityThisCycle()
Reports to the CPU that there is activity this cycle.
Definition iew.cc:771

gem5::o3::IEW::wakeCPU
void wakeCPU()
Tells the CPU to wakeup if it has descheduled itself due to no activity.
Definition iew.cc:765

gem5::o3::IEW::rescheduleMemInst
void rescheduleMemInst(const DynInstPtr &inst)
Tells memory dependence unit that a memory instruction needs to be rescheduled.
Definition iew.cc:505

gem5::o3::IEW::name
std::string name() const
Returns the name of the IEW stage.
Definition iew.cc:118

gem5::o3::IEW::checkMisprediction
void checkMisprediction(const DynInstPtr &inst)
Check misprediction
Definition iew.cc:1554

gem5::o3::IEW::updateLSQNextCycle
bool updateLSQNextCycle
Records if the LSQ needs to be updated on the next cycle, so that IEW knows if there will be activity...
Definition iew.hh:365

gem5::o3::IEW::replayMemInst
void replayMemInst(const DynInstPtr &inst)
Re-executes all rescheduled memory instructions.
Definition iew.cc:511

gem5::o3::LSQUnit::LSQEntry
Definition lsq_unit.hh:96

gem5::o3::LSQUnit::LSQEntry::setRequest
void setRequest(LSQRequest *r)
Definition lsq_unit.hh:138

gem5::o3::LSQUnit::LSQEntry::instruction
const DynInstPtr & instruction() const
Definition lsq_unit.hh:145

gem5::o3::LSQUnit::SQEntry::DataSize
static constexpr size_t DataSize
Definition lsq_unit.hh:167

gem5::o3::LSQUnit::WritebackEvent
Writeback event, specifically for when stores forward data to loads.
Definition lsq_unit.hh:412

gem5::o3::LSQUnit::WritebackEvent::WritebackEvent
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr)
Constructs a writeback event.
Definition lsq_unit.cc:64

gem5::o3::LSQUnit::WritebackEvent::description
const char * description() const
Returns the description of this event.
Definition lsq_unit.cc:86

gem5::o3::LSQUnit::WritebackEvent::process
void process()
Processes the writeback event.
Definition lsq_unit.cc:74

gem5::o3::LSQUnit
Class that implements the actual LQ and SQ for each specific thread.
Definition lsq_unit.hh:89

gem5::o3::LSQUnit::insertStore
void insertStore(const DynInstPtr &store_inst)
Inserts a store instruction.
Definition lsq_unit.cc:379

gem5::o3::LSQUnit::write
Fault write(LSQRequest *requst, uint8_t *data, ssize_t store_idx)
Executes the store at the given index.
Definition lsq_unit.cc:1604

gem5::o3::LSQUnit::cacheBlockMask
Addr cacheBlockMask
Address Mask for a cache block (e.g.
Definition lsq_unit.hh:479

gem5::o3::LSQUnit::iewStage
IEW * iewStage
Pointer to the IEW stage.
Definition lsq_unit.hh:402

gem5::o3::LSQUnit::isStoreBlocked
bool isStoreBlocked
Whehter or not a store is blocked due to the memory system.
Definition lsq_unit.hh:497

gem5::o3::LSQUnit::AddrRangeCoverage::NoAddrRangeCoverage
@ NoAddrRangeCoverage

gem5::o3::LSQUnit::AddrRangeCoverage::FullAddrRangeCoverage
@ FullAddrRangeCoverage

gem5::o3::LSQUnit::AddrRangeCoverage::PartialAddrRangeCoverage
@ PartialAddrRangeCoverage

gem5::o3::LSQUnit::takeOverFrom
void takeOverFrom()
Takes over from another CPU's thread.
Definition lsq_unit.cc:297

gem5::o3::LSQUnit::checkLoads
bool checkLoads
Should loads be checked for dependency issues.
Definition lsq_unit.hh:461

gem5::o3::LSQUnit::read
Fault read(LSQRequest *request, ssize_t load_idx)
Executes the load at the given index.
Definition lsq_unit.cc:1317

gem5::o3::LSQUnit::storeInFlight
bool storeInFlight
Whether or not a store is in flight.
Definition lsq_unit.hh:500

gem5::o3::LSQUnit::cpu
CPU * cpu
Pointer to the CPU.
Definition lsq_unit.hh:399

gem5::o3::LSQUnit::getStoreHeadSeqNum
InstSeqNum getStoreHeadSeqNum()
Returns the sequence number of the head store instruction.
Definition lsq_unit.cc:1642

gem5::o3::LSQUnit::getLoadHeadSeqNum
InstSeqNum getLoadHeadSeqNum()
Returns the sequence number of the head load instruction.
Definition lsq_unit.cc:1633

gem5::o3::LSQUnit::depCheckShift
unsigned depCheckShift
The number of places to shift addresses in the LSQ before checking for dependency violations.
Definition lsq_unit.hh:458

gem5::o3::LSQUnit::storePostSend
void storePostSend()
Handles completing the send of a store to memory.
Definition lsq_unit.cc:1043

gem5::o3::LSQUnit::dcachePort
RequestPort * dcachePort
Pointer to the dcache port.
Definition lsq_unit.hh:408

gem5::o3::LSQUnit::stallingStoreIsn
InstSeqNum stallingStoreIsn
The store that causes the stall due to partial store to load forwarding.
Definition lsq_unit.hh:489

gem5::o3::LSQUnit::isStalled
bool isStalled()
Returns whether or not the LSQ unit is stalled.
Definition lsq_unit.hh:562

gem5::o3::LSQUnit::insertLoad
void insertLoad(const DynInstPtr &load_inst)
Inserts a load instruction.
Definition lsq_unit.cc:319

gem5::o3::LSQUnit::executeStore
Fault executeStore(const DynInstPtr &inst)
Executes a store instruction.
Definition lsq_unit.cc:657

gem5::o3::LSQUnit::insert
void insert(const DynInstPtr &inst)
Inserts an instruction.
Definition lsq_unit.cc:303

gem5::o3::LSQUnit::checkStaleTranslations
bool checkStaleTranslations() const
Definition lsq_unit.cc:1256

gem5::o3::LSQUnit::getLatestHtmUid
uint64_t getLatestHtmUid() const
Definition lsq_unit.cc:1036

gem5::o3::LSQUnit::schedule
void schedule(Event &ev, Tick when)
Schedule event for the cpu.
Definition lsq_unit.cc:1306

gem5::o3::LSQUnit::writebackStores
void writebackStores()
Writes back stores.
Definition lsq_unit.cc:789

gem5::o3::LSQUnit::htmStarts
int htmStarts
Definition lsq_unit.hh:468

gem5::o3::LSQUnit::checkViolations
Fault checkViolations(typename LoadQueue::iterator &loadIt, const DynInstPtr &inst)
Check for ordering violations in the LSQ.
Definition lsq_unit.cc:510

gem5::o3::LSQUnit::storeQueue
StoreQueue storeQueue
The store queue.
Definition lsq_unit.hh:449

gem5::o3::LSQUnit::htmStops
int htmStops
Definition lsq_unit.hh:469

gem5::o3::LSQUnit::executeLoad
Fault executeLoad(const DynInstPtr &inst)
Executes a load instruction.
Definition lsq_unit.cc:590

gem5::o3::LSQUnit::commitLoad
void commitLoad()
Commits the head load.
Definition lsq_unit.cc:719

gem5::o3::LSQUnit::LSQUnit
LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
Constructs an LSQ unit.
Definition lsq_unit.cc:192

gem5::o3::LSQUnit::completeStore
void completeStore(typename StoreQueue::iterator store_idx)
Completes the store at the specified index.
Definition lsq_unit.cc:1136

gem5::o3::LSQUnit::drainSanityCheck
void drainSanityCheck() const
Perform sanity checks after a drain.
Definition lsq_unit.cc:287

gem5::o3::LSQUnit::setDcachePort
void setDcachePort(RequestPort *dcache_port)
Sets the pointer to the dcache port.
Definition lsq_unit.cc:281

gem5::o3::LSQUnit::cacheLineSize
unsigned int cacheLineSize()
Definition lsq_unit.cc:1311

gem5::o3::LSQUnit::resetState
void resetState()
Reset the LSQ state.
Definition lsq_unit.cc:226

gem5::o3::LSQUnit::numFreeStoreEntries
unsigned numFreeStoreEntries()
Returns the number of free SQ entries.
Definition lsq_unit.cc:418

gem5::o3::LSQUnit::lastRetiredHtmUid
uint64_t lastRetiredHtmUid
Definition lsq_unit.hh:471

gem5::o3::LSQUnit::dumpInsts
void dumpInsts() const
Debugging function to dump instructions in the LSQ.
Definition lsq_unit.cc:1283

gem5::o3::LSQUnit::stalled
bool stalled
Whether or not the LSQ is stalled.
Definition lsq_unit.hh:485

gem5::o3::LSQUnit::stallingLoadIdx
ssize_t stallingLoadIdx
The index of the above store.
Definition lsq_unit.hh:491

gem5::o3::LSQUnit::loadQueue
LoadQueue loadQueue
The load queue.
Definition lsq_unit.hh:452

gem5::o3::LSQUnit::retryPkt
PacketPtr retryPkt
The packet that needs to be retried.
Definition lsq_unit.hh:494

gem5::o3::LSQUnit::init
void init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params, LSQ *lsq_ptr, unsigned id)
Initializes the LSQ unit with the specified number of entries.
Definition lsq_unit.cc:203

gem5::o3::LSQUnit::getMemDepViolator
DynInstPtr getMemDepViolator()
Returns the memory ordering violator.
Definition lsq_unit.cc:400

gem5::o3::LSQUnit::memDepViolator
DynInstPtr memDepViolator
The oldest load that caused a memory ordering violation.
Definition lsq_unit.hh:503

gem5::o3::LSQUnit::squash
void squash(const InstSeqNum &squashed_num)
Squashes all instructions younger than a specific sequence number.
Definition lsq_unit.cc:921

gem5::o3::LSQUnit::name
std::string name() const
Returns the name of the LSQ unit.
Definition lsq_unit.cc:245

gem5::o3::LSQUnit::checkSnoop
void checkSnoop(PacketPtr pkt)
Check if an incoming invalidate hits in the lsq on a load that might have issued out of order wrt ano...
Definition lsq_unit.cc:427

gem5::o3::LSQUnit::MaxDataBytes
static constexpr auto MaxDataBytes
Definition lsq_unit.hh:91

gem5::o3::LSQUnit::recvRetry
void recvRetry()
Handles doing the retry.
Definition lsq_unit.cc:1274

gem5::o3::LSQUnit::storesToWB
int storesToWB
The number of store instructions in the SQ waiting to writeback.
Definition lsq_unit.hh:464

gem5::o3::LSQUnit::numFreeLoadEntries
unsigned numFreeLoadEntries()
Returns the number of free LQ entries.
Definition lsq_unit.cc:410

gem5::o3::LSQUnit::trySendPacket
bool trySendPacket(bool isLoad, PacketPtr data_pkt)
Attempts to send a packet to the cache.
Definition lsq_unit.cc:1199

gem5::o3::LSQUnit::writeback
void writeback(const DynInstPtr &inst, PacketPtr pkt)
Writes back the instruction, sending it to IEW.
Definition lsq_unit.cc:1074

gem5::o3::LSQUnit::storeWBIt
StoreQueue::iterator storeWBIt
The index of the first instruction that may be ready to be written back, and has not yet been written...
Definition lsq_unit.hh:476

gem5::o3::LSQUnit::getMMUPtr
BaseMMU * getMMUPtr()
Definition lsq_unit.cc:1308

gem5::o3::LSQUnit::writebackBlockedStore
void writebackBlockedStore()
Try to finish a previously blocked write back attempt.
Definition lsq_unit.cc:779

gem5::o3::LSQUnit::commitLoads
void commitLoads(InstSeqNum &youngest_inst)
Commits loads older than a specific sequence number.
Definition lsq_unit.cc:742

gem5::o3::LSQUnit::stats
gem5::o3::LSQUnit::LSQUnitStats stats

gem5::o3::LSQUnit::completeDataAccess
void completeDataAccess(PacketPtr pkt)
Completes the data access that has been returned from the memory system.
Definition lsq_unit.cc:105

gem5::o3::LSQUnit::startStaleTranslationFlush
void startStaleTranslationFlush()
Definition lsq_unit.cc:1241

gem5::o3::LSQUnit::lsq
LSQ * lsq
Pointer to the LSQ.
Definition lsq_unit.hh:405

gem5::o3::LSQUnit::needsTSO
bool needsTSO
Flag for memory model.
Definition lsq_unit.hh:506

gem5::o3::LSQUnit::commitStores
void commitStores(InstSeqNum &youngest_inst)
Commits stores older than a specific sequence number.
Definition lsq_unit.cc:753

gem5::o3::LSQUnit::lsqID
ThreadID lsqID
The LSQUnit thread id.
Definition lsq_unit.hh:446

gem5::o3::LSQUnit::recvTimingResp
bool recvTimingResp(PacketPtr pkt)
Handles writing back and completing the load or store that has returned from memory.
Definition lsq_unit.cc:92

gem5::o3::LSQ::LSQRequest
Memory operation metadata.
Definition lsq.hh:190

gem5::o3::LSQ::LSQRequest::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)=0

gem5::o3::LSQ::LSQRequest::buildPackets
virtual void buildPackets()=0

gem5::o3::LSQ::LSQRequest::mainReq
virtual RequestPtr mainReq()
Definition lsq.hh:378

gem5::o3::LSQ::LSQRequest::isCacheBlockHit
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask)=0
Test if the request accesses a particular cache line.

gem5::o3::LSQ::LSQRequest::discard
void discard()
The request is discarded (e.g.
Definition lsq.hh:518

gem5::o3::LSQ::LSQRequest::packet
PacketPtr packet(int idx=0)
Definition lsq.hh:368

gem5::o3::LSQ::LSQRequest::_numOutstandingPackets
uint32_t _numOutstandingPackets
Definition lsq.hh:258

gem5::o3::LSQ::LSQRequest::isSent
bool isSent()
Definition lsq.hh:481

gem5::o3::LSQ::LSQRequest::packetNotSent
void packetNotSent()
Update the status to reflect that a packet was not sent.
Definition lsq.hh:446

gem5::o3::LSQ::LSQRequest::_size
const uint32_t _size
Definition lsq.hh:255

gem5::o3::LSQ::LSQRequest::isReleased
bool isReleased()
Test if the LSQRequest has been released, i.e.
Definition lsq.hh:402

gem5::o3::LSQ::LSQRequest::isAnyOutstandingRequest
bool isAnyOutstandingRequest()
Test if there is any in-flight translation or mem access request.
Definition lsq.hh:388

gem5::o3::LSQ::LSQRequest::writebackDone
void writebackDone()
Definition lsq.hh:540

gem5::o3::LSQ::LSQRequest::sendPacketToCache
virtual void sendPacketToCache()=0

gem5::o3::LSQ::LSQRequest::complete
void complete()
Definition lsq.hh:561

gem5::o3::LSQ::LSQRequest::isSplit
bool isSplit() const
Definition lsq.hh:409

gem5::o3::LSQ::LSQRequest::packetSent
void packetSent()
Update the status to reflect that a packet was sent.
Definition lsq.hh:437

gem5::o3::LSQ::LSQRequest::needWBToRegister
bool needWBToRegister() const
Definition lsq.hh:415

gem5::o3::LSQ::LSQRequest::setStateToFault
void setStateToFault()
Definition lsq.hh:500

gem5::o3::LSQ::LSQRequest::req
RequestPtr req(int idx=0)
Definition lsq.hh:362

gem5::o3::LSQ::LSQRequest::mainPacket
virtual PacketPtr mainPacket()
Definition lsq.hh:371

gem5::o3::LSQ::LSQRequest::instruction
const DynInstPtr & instruction()
Definition lsq.hh:334

gem5::o3::LSQ
Definition lsq.hh:76

gem5::o3::LSQ::cacheBlocked
bool cacheBlocked() const
Is D-cache blocked?
Definition lsq.cc:186

gem5::o3::LSQ::cachePortBusy
void cachePortBusy(bool is_load)
Another store port is in use.
Definition lsq.cc:210

gem5::o3::LSQ::cachePortAvailable
bool cachePortAvailable(bool is_load) const
Is any store port available to use?
Definition lsq.cc:198

gem5::statistics::DataWrap::flags
Derived & flags(Flags _flags)
Set the flags and marks this stat to print at the end of simulation.
Definition statistics.hh:357

gem5::statistics::DistBase::sample
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition statistics.hh:1327

gem5::statistics::Distribution::init
Distribution & init(Counter min, Counter max, Counter bkt)
Set the parameters of this distribution.
Definition statistics.hh:2112

gem5::statistics::Group
Statistics container.
Definition group.hh:93

cpu.hh

debugfaults.hh

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75

gem5::CircularQueue::size
size_t size() const
Definition circular_queue.hh:466

gem5::CircularQueue::front
reference front()
Definition circular_queue.hh:441

gem5::CircularQueue::end
iterator end()
Definition circular_queue.hh:580

gem5::CircularQueue::tail
size_t tail() const
Definition circular_queue.hh:456

gem5::CircularQueue::pop_front
void pop_front(size_t num_elem=1)
Circularly increase the head pointer.
Definition circular_queue.hh:477

gem5::CircularQueue::head
size_t head() const
Definition circular_queue.hh:451

gem5::CircularQueue::empty
bool empty() const
Is the queue empty?
Definition circular_queue.hh:548

gem5::CircularQueue::back
reference back()
Definition circular_queue.hh:446

gem5::CircularQueue::pop_back
void pop_back()
Circularly decrease the tail pointer.
Definition circular_queue.hh:490

gem5::CircularQueue::advance_tail
void advance_tail()
Increases the tail by one.
Definition circular_queue.hh:515

gem5::CircularQueue::full
bool full() const
Is the queue full? A queue is full if the head is the 0^{th} element and the tail is the (size-1)^{th...
Definition circular_queue.hh:558

gem5::CircularQueue::begin
iterator begin()
Iterators.
Definition circular_queue.hh:565

gem5::CircularQueue::capacity
size_t capacity() const
Definition circular_queue.hh:461

gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition eventq.hh:1012

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188

gem5::statistics::Group::addStatGroup
void addStatGroup(const char *name, Group *block)
Add a stat block as a child of this block.
Definition group.cc:117

limits.hh

lsq_unit.hh

gem5::ArmISA::writeback
Bitfield< 21 > writeback
Definition types.hh:126

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::e
Bitfield< 9 > e
Definition misc_types.hh:65

gem5::ArmISA::id
Bitfield< 33 > id
Definition misc_types.hh:332

gem5::RiscvISA::x
Bitfield< 3 > x
Definition pagetable.hh:74

gem5::o3::MaxThreads
static constexpr int MaxThreads
Definition limits.hh:38

gem5::statistics::nozero
const FlagsType nozero
Don't print if this is zero.
Definition info.hh:67

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::Fault
std::shared_ptr< FaultBase > Fault
Definition types.hh:249

gem5::cprintf
void cprintf(const char *format, const Args &...args)
Definition cprintf.hh:155

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::htmFailureToStr
std::string htmFailureToStr(HtmFailureFaultCause cause)
Convert enum into string to be used for debug purposes.
Definition htm.cc:44

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

gem5::HtmCacheFailure
HtmCacheFailure
Definition htm.hh:60

gem5::HtmCacheFailure::FAIL_REMOTE
@ FAIL_REMOTE

gem5::HtmCacheFailure::FAIL_SELF
@ FAIL_SELF

gem5::HtmCacheFailure::FAIL_OTHER
@ FAIL_OTHER

gem5::csprintf
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161

gem5::NoFault
constexpr decltype(nullptr) NoFault
Definition types.hh:253

gem5::HtmFailureFaultCause
HtmFailureFaultCause
Definition htm.hh:48

gem5::HtmFailureFaultCause::OTHER
@ OTHER

gem5::HtmFailureFaultCause::SIZE
@ SIZE

gem5::HtmFailureFaultCause::MEMORY
@ MEMORY

gem5::HtmFailureFaultCause::INVALID
@ INVALID

gem5::InstSeqNum
uint64_t InstSeqNum
Definition inst_seq.hh:40

dyn_inst.hh

lsq.hh

packet.hh
Declaration of the Packet class.

request.hh
Declaration of a request, the overall memory request consisting of the parts of the request that are ...

str.hh

gem5::o3::LSQUnit::LSQUnitStats::blockedByCache
statistics::Scalar blockedByCache
Number of times the LSQ is blocked due to the cache.
Definition lsq_unit.hh:536

gem5::o3::LSQUnit::LSQUnitStats::forwLoads
statistics::Scalar forwLoads
Total number of loads forwaded from LSQ stores.
Definition lsq_unit.hh:517

gem5::o3::LSQUnit::LSQUnitStats::LSQUnitStats
LSQUnitStats(statistics::Group *parent)
Definition lsq_unit.cc:254

gem5::o3::LSQUnit::LSQUnitStats::ignoredResponses
statistics::Scalar ignoredResponses
Total number of responses from the memory system that are ignored due to the instruction already bein...
Definition lsq_unit.hh:524

gem5::o3::LSQUnit::LSQUnitStats::loadToUse
statistics::Distribution loadToUse
Distribution of cycle latency between the first time a load is issued and its completion.
Definition lsq_unit.hh:540

gem5::o3::LSQUnit::LSQUnitStats::rescheduledLoads
statistics::Scalar rescheduledLoads
Number of loads that were rescheduled.
Definition lsq_unit.hh:533

gem5::o3::LSQUnit::LSQUnitStats::squashedStores
statistics::Scalar squashedStores
Total number of squashed stores.
Definition lsq_unit.hh:530

gem5::o3::LSQUnit::LSQUnitStats::squashedLoads
statistics::Scalar squashedLoads
Total number of squashed loads.
Definition lsq_unit.hh:520

gem5::o3::LSQUnit::LSQUnitStats::memOrderViolation
statistics::Scalar memOrderViolation
Tota number of memory ordering violations.
Definition lsq_unit.hh:527