develop/GPUCoalescer_8cc_source.html

/*

 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#include "mem/ruby/system/GPUCoalescer.hh"


#include "base/compiler.hh"

#include "base/logging.hh"

#include "base/str.hh"

#include "cpu/testers/rubytest/RubyTester.hh"

#include "debug/GPUCoalescer.hh"

#include "debug/MemoryAccess.hh"

#include "debug/ProtocolTrace.hh"

#include "debug/RubyHitMiss.hh"

#include "debug/RubyPort.hh"

#include "debug/RubyStats.hh"

#include "gpu-compute/shader.hh"

#include "mem/packet.hh"

#include "mem/ruby/common/DataBlock.hh"

#include "mem/ruby/common/SubBlock.hh"

#include "mem/ruby/network/MessageBuffer.hh"

#include "mem/ruby/profiler/Profiler.hh"

#include "mem/ruby/slicc_interface/AbstractController.hh"

#include "mem/ruby/slicc_interface/RubyRequest.hh"

#include "mem/ruby/structures/CacheMemory.hh"

#include "mem/ruby/system/RubySystem.hh"

#include "params/RubyGPUCoalescer.hh"


namespace gem5

{


namespace ruby

{


UncoalescedTable::UncoalescedTable(GPUCoalescer *gc)

    : coalescer(gc)

{

}


void


UncoalescedTable::insertPacket(PacketPtr pkt)

{

    uint64_t seqNum = pkt->req->getReqInstSeqNum();


    instMap[seqNum].push_back(pkt);

    DPRINTF(GPUCoalescer, "Adding 0x%X seqNum %d to map. (map %d vec %d)\n",

            pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size());

}


void


UncoalescedTable::insertReqType(PacketPtr pkt, RubyRequestType type)

{

    uint64_t seqNum = pkt->req->getReqInstSeqNum();


    reqTypeMap[seqNum] = type;

}


bool


UncoalescedTable::packetAvailable()

{

    return !instMap.empty();

}


void


UncoalescedTable::initPacketsRemaining(InstSeqNum seqNum, int count)

{

    if (!instPktsRemaining.count(seqNum)) {

        instPktsRemaining[seqNum] = count;

    }

}


int


UncoalescedTable::getPacketsRemaining(InstSeqNum seqNum)

{

    return instPktsRemaining[seqNum];

}


void


UncoalescedTable::setPacketsRemaining(InstSeqNum seqNum, int count)

{

    instPktsRemaining[seqNum] = count;

}


PerInstPackets*


UncoalescedTable::getInstPackets(int offset)

{

    if (offset >= instMap.size()) {

        return nullptr;

    }


    auto instMapIter = instMap.begin();

    std::advance(instMapIter, offset);


    return &(instMapIter->second);

}


void


UncoalescedTable::updateResources()

{

    for (auto iter = instMap.begin(); iter != instMap.end(); ) {

        InstSeqNum seq_num = iter->first;

        DPRINTF(GPUCoalescer, "%s checking remaining pkts for %d\n",

                coalescer->name().c_str(), seq_num);

        assert(instPktsRemaining.count(seq_num));


        if (instPktsRemaining[seq_num] == 0) {

            assert(iter->second.empty());


            // Remove from both maps

            instMap.erase(iter++);

            instPktsRemaining.erase(seq_num);


            // Release the token if the Ruby system is not in cooldown

            // or warmup phases. When in these phases, the RubyPorts

            // are accessed directly using the makeRequest() command

            // instead of accessing through the port. This makes

            // sending tokens through the port unnecessary

            if (!coalescer->getRubySystem()->getWarmupEnabled() &&

                !coalescer->getRubySystem()->getCooldownEnabled()) {

                if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) {

                    DPRINTF(GPUCoalescer,

                            "Returning token seqNum %d\n", seq_num);

                    coalescer->getGMTokenPort().sendTokens(1);

                }

            }


            reqTypeMap.erase(seq_num);

        } else {

            ++iter;

        }

    }

}


bool


UncoalescedTable::areRequestsDone(const uint64_t instSeqNum) {

    // iterate the instructions held in UncoalescedTable to see whether there

    // are more requests to issue; if yes, not yet done; otherwise, done

    for (auto& inst : instMap) {

        DPRINTF(GPUCoalescer, "instSeqNum= %d, pending packets=%d\n"

            ,inst.first, inst.second.size());

        if (inst.first == instSeqNum) { return false; }

    }


    return true;

}


void


UncoalescedTable::printRequestTable(std::stringstream& ss)

{

    ss << "Listing pending packets from " << instMap.size() << " instructions";


    for (auto& inst : instMap) {

        ss << "\tAddr: " << coalescer->printAddress(inst.first) << " with "

           << inst.second.size() << " pending packets" << std::endl;

    }

}


void


UncoalescedTable::checkDeadlock(Tick threshold)

{

    Tick current_time = curTick();


    for (auto &it : instMap) {

        for (auto &pkt : it.second) {

            if (current_time - pkt->req->time() > threshold) {

                std::stringstream ss;

                printRequestTable(ss);


                panic("Possible Deadlock detected. Aborting!\n"

                     "version: %d request.paddr: 0x%x uncoalescedTable: %d "

                     "current time: %u issue_time: %d difference: %d\n"

                     "Request Tables:\n\n%s", coalescer->getId(),

                      pkt->getAddr(), instMap.size(), current_time,

                      pkt->req->time(), current_time - pkt->req->time(),

                      ss.str());

            }

        }

    }

}


GPUCoalescer::GPUCoalescer(const Params &p)

    : RubyPort(p),

      issueEvent([this]{ completeIssue(); }, "Issue coalesced request",

                 false, Event::Progress_Event_Pri),

      uncoalescedTable(this),

      deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check"),

      stats(this),

      gmTokenPort(name() + ".gmTokenPort")

{

    m_store_waiting_on_load_cycles = 0;

    m_store_waiting_on_store_cycles = 0;

    m_load_waiting_on_store_cycles = 0;

    m_load_waiting_on_load_cycles = 0;


    m_outstanding_count = 0;


    coalescingWindow = p.max_coalesces_per_cycle;


    m_max_outstanding_requests = 0;

    m_instCache_ptr = nullptr;

    m_dataCache_ptr = nullptr;


    m_instCache_ptr = p.icache;

    m_dataCache_ptr = p.dcache;

    m_max_outstanding_requests = p.max_outstanding_requests;

    m_deadlock_threshold = p.deadlock_threshold;


    assert(m_max_outstanding_requests > 0);

    assert(m_deadlock_threshold > 0);

    assert(m_instCache_ptr);

    assert(m_dataCache_ptr);


    m_runningGarnetStandalone = p.garnet_standalone;


    // These statistical variables are not for display.

    // The profiler will collate these across different

    // coalescers and display those collated statistics.

    m_outstandReqHist.init(10);

    m_latencyHist.init(10);

    m_missLatencyHist.init(10);


    for (int i = 0; i < RubyRequestType_NUM; i++) {

        m_typeLatencyHist.push_back(new statistics::Histogram());

        m_typeLatencyHist[i]->init(10);


        m_missTypeLatencyHist.push_back(new statistics::Histogram());

        m_missTypeLatencyHist[i]->init(10);

    }


    for (int i = 0; i < MachineType_NUM; i++) {

        m_missMachLatencyHist.push_back(new statistics::Histogram());

        m_missMachLatencyHist[i]->init(10);


        m_IssueToInitialDelayHist.push_back(new statistics::Histogram());

        m_IssueToInitialDelayHist[i]->init(10);


        m_InitialToForwardDelayHist.push_back(new statistics::Histogram());

        m_InitialToForwardDelayHist[i]->init(10);


        m_ForwardToFirstResponseDelayHist.push_back(

            new statistics::Histogram());

        m_ForwardToFirstResponseDelayHist[i]->init(10);


        m_FirstResponseToCompletionDelayHist.push_back(

            new statistics::Histogram());

        m_FirstResponseToCompletionDelayHist[i]->init(10);

    }


    for (int i = 0; i < RubyRequestType_NUM; i++) {

        m_missTypeMachLatencyHist.push_back(

            std::vector<statistics::Histogram *>());


        for (int j = 0; j < MachineType_NUM; j++) {

            m_missTypeMachLatencyHist[i].push_back(

                new statistics::Histogram());

            m_missTypeMachLatencyHist[i][j]->init(10);

        }

    }


}


GPUCoalescer::~GPUCoalescer()

{

}


Port &


GPUCoalescer::getPort(const std::string &if_name, PortID idx)

{

    if (if_name == "gmTokenPort") {

        return gmTokenPort;

    }


    // delgate to RubyPort otherwise

    return RubyPort::getPort(if_name, idx);

}


void


GPUCoalescer::wakeup()

{

    Cycles current_time = curCycle();

    for (auto& requestList : coalescedTable) {

        for (auto& req : requestList.second) {

            if (current_time - req->getIssueTime() > m_deadlock_threshold) {

                std::stringstream ss;

                printRequestTable(ss);

                warn("GPUCoalescer %d Possible deadlock detected!\n%s\n",

                     m_version, ss.str());

                panic("Aborting due to deadlock!\n");

            }

        }

    }


    Tick tick_threshold = cyclesToTicks(m_deadlock_threshold);

    uncoalescedTable.checkDeadlock(tick_threshold);


    if (m_outstanding_count > 0) {

        schedule(deadlockCheckEvent,

                 m_deadlock_threshold * clockPeriod() +

                 curTick());

    }

}


void


GPUCoalescer::printRequestTable(std::stringstream& ss)

{

    ss << "Printing out " << coalescedTable.size()

       << " outstanding requests in the coalesced table\n";


    for (auto& requestList : coalescedTable) {

        for (auto& request : requestList.second) {

            ss << "\tAddr: " << printAddress(requestList.first) << "\n"

               << "\tInstruction sequence number: "

               << request->getSeqNum() << "\n"

               << "\t\tType: "

               << RubyRequestType_to_string(request->getRubyType()) << "\n"

               << "\t\tNumber of associated packets: "

               << request->getPackets().size() << "\n"

               << "\t\tIssue time: "

               << request->getIssueTime() * clockPeriod() << "\n"

               << "\t\tDifference from current tick: "

               << (curCycle() - request->getIssueTime()) * clockPeriod()

               << "\n";

        }

    }


    // print out packets waiting to be issued in uncoalesced table

    uncoalescedTable.printRequestTable(ss);

}


void


GPUCoalescer::resetStats()

{

    m_latencyHist.reset();

    m_missLatencyHist.reset();

    for (int i = 0; i < RubyRequestType_NUM; i++) {

        m_typeLatencyHist[i]->reset();

        m_missTypeLatencyHist[i]->reset();

        for (int j = 0; j < MachineType_NUM; j++) {

            m_missTypeMachLatencyHist[i][j]->reset();

        }

    }


    for (int i = 0; i < MachineType_NUM; i++) {

        m_missMachLatencyHist[i]->reset();


        m_IssueToInitialDelayHist[i]->reset();

        m_InitialToForwardDelayHist[i]->reset();

        m_ForwardToFirstResponseDelayHist[i]->reset();

        m_FirstResponseToCompletionDelayHist[i]->reset();

    }

}


void


GPUCoalescer::printProgress(std::ostream& out) const

{

}


// sets the kernelEndList

void


GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)

{

    // Don't know if this will happen or is possible

    // but I just want to be careful and not have it become

    // simulator hang in the future

    DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);

    assert(kernelEndList.count(wavefront_id) == 0);


    kernelEndList[wavefront_id] = pkt;

    DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",

            kernelEndList.size());

}


void


GPUCoalescer::writeCallback(Addr address, DataBlock& data)

{

    writeCallback(address, MachineType_NULL, data);

}


void


GPUCoalescer::writeCallback(Addr address,

                         MachineType mach,

                         DataBlock& data)

{

    writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));

}


void


GPUCoalescer::writeCallback(Addr address,

                         MachineType mach,

                         DataBlock& data,

                         Cycles initialRequestTime,

                         Cycles forwardRequestTime,

                         Cycles firstResponseTime)

{

    writeCallback(address, mach, data,

                  initialRequestTime, forwardRequestTime, firstResponseTime,

                  false);

}


void


GPUCoalescer::writeCallback(Addr address,

                         MachineType mach,

                         DataBlock& data,

                         Cycles initialRequestTime,

                         Cycles forwardRequestTime,

                         Cycles firstResponseTime,

                         bool isRegion)

{

    assert(address == makeLineAddress(address));

    assert(coalescedTable.count(address));


    auto crequest = coalescedTable.at(address).front();


    hitCallback(crequest, mach, data, true, crequest->getIssueTime(),

                forwardRequestTime, firstResponseTime, isRegion, false, false);


    // remove this crequest in coalescedTable

    delete crequest;

    coalescedTable.at(address).pop_front();


    if (coalescedTable.at(address).empty()) {

        coalescedTable.erase(address);

    } else {

        auto nextRequest = coalescedTable.at(address).front();

        issueRequest(nextRequest);

    }

}


void


GPUCoalescer::writeCompleteCallback(Addr address,

                                    uint64_t instSeqNum,

                                    MachineType mach)

{

    DPRINTF(GPUCoalescer, "writeCompleteCallback for address 0x%x"

            " instSeqNum = %d\n", address, instSeqNum);


    assert(pendingWriteInsts.count(instSeqNum) == 1);

    PendingWriteInst& inst = pendingWriteInsts[instSeqNum];


    // check the uncoalescedTable to see whether all requests for the inst

    // have been issued or not

    bool reqsAllIssued = uncoalescedTable.areRequestsDone(instSeqNum);

    DPRINTF(GPUCoalescer, "instSeqNum = %d, pendingStores=%d, "

                    "reqsAllIssued=%d\n", reqsAllIssued,

                    inst.getNumPendingStores()-1, reqsAllIssued);


    if (inst.receiveWriteCompleteAck() && reqsAllIssued ) {

        // if the pending write instruction has received all write completion

        // callbacks for its issued Ruby requests, we can now start respond

        // the requesting CU in one response packet.

        inst.ackWriteCompletion(m_usingRubyTester);


        DPRINTF(GPUCoalescer, "write inst %d completed at coalescer\n",

                instSeqNum);

        pendingWriteInsts.erase(instSeqNum);

    }

}


void


GPUCoalescer::readCallback(Addr address, DataBlock& data)

{

    readCallback(address, MachineType_NULL, data, false);

}


void


GPUCoalescer::readCallback(Addr address,

                        MachineType mach,

                        DataBlock& data,

                        bool externalHit = false)

{

    readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0), externalHit);

}


void


GPUCoalescer::readCallback(Addr address,

                        MachineType mach,

                        DataBlock& data,

                        Cycles initialRequestTime,

                        Cycles forwardRequestTime,

                        Cycles firstResponseTime,

                        bool externalHit = false)

{


    readCallback(address, mach, data,

                 initialRequestTime, forwardRequestTime, firstResponseTime,

                 false, externalHit);

}


void


GPUCoalescer::readCallback(Addr address,

                        MachineType mach,

                        DataBlock& data,

                        Cycles initialRequestTime,

                        Cycles forwardRequestTime,

                        Cycles firstResponseTime,

                        bool isRegion,

                        bool externalHit = false)

{

    assert(address == makeLineAddress(address));

    assert(coalescedTable.count(address));


    auto crequest = coalescedTable.at(address).front();

    fatal_if(crequest->getRubyType() != RubyRequestType_LD,

             "readCallback received non-read type response\n");


    bool mshr_hit_under_miss = false;

    // Iterate over the coalesced requests to respond to as many loads as

    // possible until another request type is seen. Models MSHR for

    // Coalescer. Do not respond to pending loads that have SLC/GLC flags

    // set; issue them instead

    while (crequest->getRubyType() == RubyRequestType_LD) {

    hitCallback(crequest, mach, data, true,

            crequest->getIssueTime(), forwardRequestTime, firstResponseTime,

            isRegion, externalHit, mshr_hit_under_miss);


        delete crequest;

        coalescedTable.at(address).pop_front();

        if (coalescedTable.at(address).empty()) {

            break;

        }


        crequest = coalescedTable.at(address).front();


        PacketPtr pkt = crequest->getFirstPkt();

        bool is_request_local = !pkt->isGLCSet() && !pkt->isSLCSet();

        if (!is_request_local) {

            break;

        }


        mshr_hit_under_miss = true;

    }


    if (coalescedTable.at(address).empty()) {

        coalescedTable.erase(address);

    } else {

        auto nextRequest = coalescedTable.at(address).front();

        issueRequest(nextRequest);

    }

}


void


GPUCoalescer::hitCallback(CoalescedRequest* crequest,

                       MachineType mach,

                       DataBlock& data,

                       bool success,

                       Cycles initialRequestTime,

                       Cycles forwardRequestTime,

                       Cycles firstResponseTime,

                       bool isRegion,

                       bool externalHit = false,

                       bool mshrHitUnderMiss = false)

{

    PacketPtr pkt = crequest->getFirstPkt();

    Addr request_address = pkt->getAddr();

    [[maybe_unused]] Addr request_line_address =

        makeLineAddress(request_address);


    RubyRequestType type = crequest->getRubyType();


    DPRINTF(GPUCoalescer, "Got hitCallback for 0x%X\n", request_line_address);


    DPRINTF(RubyHitMiss, "GPU TCP Cache %s at %#x\n",

                        externalHit ? "hit" : "miss",

                        printAddress(request_address));


    recordStats(crequest, mach,

                      initialRequestTime,

                      forwardRequestTime,

                      firstResponseTime,

                      isRegion,

                      mshrHitUnderMiss);

    // update the data

    //

    // MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER

    std::vector<PacketPtr> pktList = crequest->getPackets();


    uint8_t* log = nullptr;

    DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",

            pktList.size(), request_line_address);

    uint32_t offset;

    int pkt_size;

    for (auto& pkt : pktList) {

        offset = getOffset(pkt->getAddr());

        pkt_size = pkt->getSize();

        request_address = pkt->getAddr();


        // When the Ruby system is cooldown phase, the requests come from

        // the cache recorder. These requests do not get coalesced and

        // do not return valid data.

        if (m_ruby_system->getCooldownEnabled())

            continue;


        if (pkt->getPtr<uint8_t>()) {

            switch(type) {

                // Store and AtomicNoReturns follow the same path, as the

                // data response is not needed.

                case RubyRequestType_ATOMIC_NO_RETURN:

                    assert(pkt->isAtomicOp());

                    break;

                case RubyRequestType_ST:

                    break;

                case RubyRequestType_LD:

                    pkt->setData(data.getData(offset, pkt_size));

                    break;

                case RubyRequestType_ATOMIC_RETURN:

                    assert(pkt->isAtomicOp());

                    // Atomic operations are performed by the WriteMask

                    // in packet order, set by the crequest. Thus, when

                    // unpacking the changes from the log, we read from

                    // the front of the log to correctly map response

                    // data into the packets.


                    // Log entry contains the old value before the current

                    // atomic operation occurred.

                    log = data.popAtomicLogEntryFront();

                    pkt->setData(&log[offset]);

                    delete [] log;

                    log = nullptr;

                    break;

                default:

                    panic("Unsupported ruby packet type:%s\n",

                                    RubyRequestType_to_string(type));

                    break;

            }

        } else {

            DPRINTF(MemoryAccess,

                    "WARNING.  Data not transfered from Ruby to M5 for type " \

                    "%s\n",

                    RubyRequestType_to_string(type));

        }

    }

    assert(data.numAtomicLogEntries() == 0);


    m_outstanding_count--;

    assert(m_outstanding_count >= 0);


    completeHitCallback(pktList);

}


bool


GPUCoalescer::empty() const

{

    return coalescedTable.empty();

}


RubyRequestType


GPUCoalescer::getRequestType(PacketPtr pkt)

{

    RubyRequestType req_type = RubyRequestType_NULL;


    // These types are not support or not used in GPU caches.

    assert(!pkt->req->isLLSC());

    assert(!pkt->req->isLockedRMW());

    assert(!pkt->req->isInstFetch());


    if (pkt->req->isAtomicReturn()) {

        req_type = RubyRequestType_ATOMIC_RETURN;

    } else if (pkt->req->isAtomicNoReturn()) {

        req_type = RubyRequestType_ATOMIC_NO_RETURN;

    } else if (pkt->isRead()) {

        req_type = RubyRequestType_LD;

    } else if (pkt->isWrite()) {

        req_type = RubyRequestType_ST;

    } else if (pkt->isFlush()) {

        req_type = RubyRequestType_FLUSH;

    } else {

        panic("Unsupported ruby packet type\n");

    }


    return req_type;

}


// Places an uncoalesced packet in uncoalescedTable. If the packet is a

// special type (MemFence, scoping, etc), it is issued immediately.

RequestStatus


GPUCoalescer::makeRequest(PacketPtr pkt)

{

    if (pkt->cmd == MemCmd::MemSyncReq) {

        // issue mem_sync requests immediately to the cache system without

        // going through uncoalescedTable like normal LD/ST/Atomic requests

        issueMemSyncRequest(pkt);

    } else {

        // all packets must have valid instruction sequence numbers

        assert(pkt->req->hasInstSeqNum());


        // otherwise, this must be either read or write command

        assert(pkt->isRead() || pkt->isWrite() || pkt->isFlush());


        InstSeqNum seq_num = pkt->req->getReqInstSeqNum();


        // in the case of protocol tester, there is one packet per sequence

        // number. The number of packets during simulation depends on the

        // number of lanes actives for that vmem request (i.e., the popcnt

        // of the exec_mask.

        int num_packets = 1;


        // When Ruby is in warmup or cooldown phase, the requests come from

        // the cache recorder. There is no dynamic instruction associated

        // with these requests either

        if (!m_ruby_system->getWarmupEnabled()

                && !m_ruby_system->getCooldownEnabled()) {

            if (!m_usingRubyTester) {

                num_packets = 0;

                for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {

                    num_packets += getDynInst(pkt)->getLaneStatus(i);

                }

            }

        }


        // the pkt is temporarily stored in the uncoalesced table until

        // it's picked for coalescing process later in this cycle or in a

        // future cycle. Packets remaining is set to the number of excepted

        // requests from the instruction based on its exec_mask.

        uncoalescedTable.insertPacket(pkt);

        uncoalescedTable.insertReqType(pkt, getRequestType(pkt));

        uncoalescedTable.initPacketsRemaining(seq_num, num_packets);

        DPRINTF(GPUCoalescer, "Put pkt with addr 0x%X to uncoalescedTable\n",

                pkt->getAddr());


        // we schedule an issue event here to process the uncoalesced table

        // and try to issue Ruby request to cache system

        if (!issueEvent.scheduled()) {

            DPRINTF(GPUCoalescer, "Scheduled issueEvent for seqNum %d\n",

                    seq_num);

            schedule(issueEvent, curTick());

        }

    }


    // we always return RequestStatus_Issued in this coalescer

    // b/c the coalescer's resouce was checked ealier and the coalescer is

    // queueing up aliased requets in its coalesced table

    return RequestStatus_Issued;

}


template <class KEY, class VALUE>


std::ostream &

operator<<(std::ostream &out, const std::unordered_map<KEY, VALUE> &map)

{

    out << "[";

    for (auto i = map.begin(); i != map.end(); ++i)

        out << " " << i->first << "=" << i->second;

    out << " ]";


    return out;

}


void


GPUCoalescer::print(std::ostream& out) const

{

    out << "[GPUCoalescer: " << m_version

        << ", outstanding requests: " << m_outstanding_count

        << "]";

}


GPUDynInstPtr


GPUCoalescer::getDynInst(PacketPtr pkt) const

{

    RubyPort::SenderState* ss =

            safe_cast<RubyPort::SenderState*>(pkt->senderState);


    ComputeUnit::DataPort::SenderState* cu_state =

        safe_cast<ComputeUnit::DataPort::SenderState*>

            (ss->predecessor);


    return cu_state->_gpuDynInst;

}


bool


GPUCoalescer::coalescePacket(PacketPtr pkt)

{

    uint64_t seqNum = pkt->req->getReqInstSeqNum();

    Addr line_addr = makeLineAddress(pkt->getAddr());


    // If the packet has the same line address as a request already in the

    // coalescedTable and has the same sequence number, it can be coalesced.

    if (coalescedTable.count(line_addr)) {

        // Search for a previous coalesced request with the same seqNum.

        auto& creqQueue = coalescedTable.at(line_addr);

        auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),

            [&](CoalescedRequest* c) { return c->getSeqNum() == seqNum; }

        );

        if (citer != creqQueue.end()) {

            (*citer)->insertPacket(pkt);

            return true;

        }

    }


    if (m_outstanding_count < m_max_outstanding_requests) {

        // This is an "aliased" or new request. Create a RubyRequest and

        // append it to the list of "targets" in the coalescing table.

        DPRINTF(GPUCoalescer, "Creating new or aliased request for 0x%X\n",

                line_addr);


        CoalescedRequest *creq = new CoalescedRequest(seqNum);

        creq->insertPacket(pkt);

        creq->setRubyType(getRequestType(pkt));

        creq->setIssueTime(curCycle());


        if (!coalescedTable.count(line_addr)) {

            // If there is no outstanding request for this line address,

            // create a new coalecsed request and issue it immediately.

            auto reqList = std::deque<CoalescedRequest*> { creq };

            coalescedTable.insert(std::make_pair(line_addr, reqList));

            if (!coalescedReqs.count(seqNum)) {

                coalescedReqs.insert(std::make_pair(seqNum, reqList));

            } else {

                coalescedReqs.at(seqNum).push_back(creq);

            }

        } else {

            // The request is for a line address that is already outstanding

            // but for a different instruction. Add it as a new request to be

            // issued when the current outstanding request is completed.

            coalescedTable.at(line_addr).push_back(creq);

            DPRINTF(GPUCoalescer, "found address 0x%X with new seqNum %d\n",

                    line_addr, seqNum);

        }


        // In both cases, requests are added to the coalescing table and will

        // be counted as outstanding requests.

        m_outstanding_count++;


        // We track all issued or to-be-issued Ruby requests associated with

        // write instructions. An instruction may have multiple Ruby

        // requests.

        if (pkt->cmd == MemCmd::WriteReq) {

            DPRINTF(GPUCoalescer, "adding write inst %d at line 0x%x to"

                    " the pending write instruction list\n", seqNum,

                    line_addr);


            RubyPort::SenderState* ss =

                    safe_cast<RubyPort::SenderState*>(pkt->senderState);


            // we need to save this port because it will be used to call

            // back the requesting CU when we receive write

            // complete callbacks for all issued Ruby requests of this

            // instruction.

            RubyPort::MemResponsePort* mem_response_port = ss->port;


            GPUDynInstPtr gpuDynInst = nullptr;


            if (!m_usingRubyTester) {

                // If this coalescer is connected to a real CU, we need

                // to save the corresponding gpu dynamic instruction.

                // CU will use that instruction to decrement wait counters

                // in the issuing wavefront.

                // For Ruby tester, gpuDynInst == nullptr

                gpuDynInst = getDynInst(pkt);

            }


            PendingWriteInst& inst = pendingWriteInsts[seqNum];

            inst.addPendingReq(mem_response_port, gpuDynInst,

                               m_usingRubyTester);

        }


        return true;

    }


    // The maximum number of outstanding requests have been issued.

    return false;

}


void


GPUCoalescer::completeIssue()

{

    // Iterate over the maximum number of instructions we can coalesce

    // per cycle (coalescingWindow).

    for (int instIdx = 0; instIdx < coalescingWindow; ++instIdx) {

        PerInstPackets *pkt_list =

            uncoalescedTable.getInstPackets(instIdx);


        // getInstPackets will return nullptr if no instruction

        // exists at the current offset.

        if (!pkt_list) {

            break;

        } else if (pkt_list->empty()) {

            // Found something, but it has not been cleaned up by update

            // resources yet. See if there is anything else to coalesce.

            // Assume we can't check anymore if the coalescing window is 1.

            continue;

        } else {

            // All packets in the list have the same seqNum, use first.

            InstSeqNum seq_num = pkt_list->front()->req->getReqInstSeqNum();


            // The difference in list size before and after tells us the

            // number of packets which were coalesced.

            size_t pkt_list_size = pkt_list->size();


            // Since we have a pointer to the list of packets in the inst,

            // erase them from the list if coalescing is successful and

            // leave them in the list otherwise. This aggressively attempts

            // to coalesce as many packets as possible from the current inst.

            pkt_list->remove_if(

                [&](PacketPtr pkt) { return coalescePacket(pkt); }

            );


            if (coalescedReqs.count(seq_num)) {

                auto& creqs = coalescedReqs.at(seq_num);

                for (auto creq : creqs) {

                    DPRINTF(GPUCoalescer, "Issued req type %s seqNum %d\n",

                            RubyRequestType_to_string(creq->getRubyType()),

                                                      seq_num);

                    issueRequest(creq);

                }

                coalescedReqs.erase(seq_num);

            }


            assert(pkt_list_size >= pkt_list->size());

            size_t pkt_list_diff = pkt_list_size - pkt_list->size();


            int num_remaining = uncoalescedTable.getPacketsRemaining(seq_num);

            num_remaining -= pkt_list_diff;

            assert(num_remaining >= 0);


            uncoalescedTable.setPacketsRemaining(seq_num, num_remaining);

            DPRINTF(GPUCoalescer,

                    "Coalesced %d pkts for seqNum %d, %d remaining\n",

                    pkt_list_diff, seq_num, num_remaining);

        }

    }


    // Clean up any instructions in the uncoalesced table that have had

    // all of their packets coalesced and return a token for that column.

    uncoalescedTable.updateResources();


    // have Kernel End releases been issued this cycle

    int len = newKernelEnds.size();

    for (int i = 0; i < len; i++) {

        kernelCallback(newKernelEnds[i]);

    }

    newKernelEnds.clear();

}


void


GPUCoalescer::evictionCallback(Addr address)

{

    ruby_eviction_callback(address);

}


void


GPUCoalescer::kernelCallback(int wavefront_id)

{

    assert(kernelEndList.count(wavefront_id));


    ruby_hit_callback(kernelEndList[wavefront_id]);


    kernelEndList.erase(wavefront_id);

}


void


GPUCoalescer::atomicCallback(Addr address,

                             MachineType mach,

                             const DataBlock& data)

{

    assert(address == makeLineAddress(address));

    assert(coalescedTable.count(address));


    auto crequest = coalescedTable.at(address).front();


    fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&

              crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&

              crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),

             "atomicCallback saw non-atomic type response\n");


    hitCallback(crequest, mach, (DataBlock&)data, true,

                crequest->getIssueTime(), Cycles(0), Cycles(0), false, false);


    delete crequest;

    coalescedTable.at(address).pop_front();


    if (coalescedTable.at(address).empty()) {

        coalescedTable.erase(address);

    } else {

        auto nextRequest = coalescedTable.at(address).front();

        issueRequest(nextRequest);

    }

}


void


GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)

{

    for (auto& pkt : mylist) {

        // When Ruby is in warmup or cooldown phase, the requests come

        // from the cache recorder. They do not track which port to use

        // and do not need to send the response back

        if (!m_ruby_system->getWarmupEnabled()

                && !m_ruby_system->getCooldownEnabled()) {

            RubyPort::SenderState *ss =

                safe_cast<RubyPort::SenderState *>(pkt->senderState);

            MemResponsePort *port = ss->port;

            assert(port != NULL);


            pkt->senderState = ss->predecessor;


            if (pkt->cmd != MemCmd::WriteReq) {

                // for WriteReq, we keep the original senderState until

                // writeCompleteCallback

                delete ss;

            }


            port->hitCallback(pkt);

            trySendRetries();

        }

    }


    // We schedule an event in the same tick as hitCallback (similar to

    // makeRequest) rather than calling completeIssue directly to reduce

    // function calls to complete issue. This can only happen if the max

    // outstanding requests is less than the number of slots in the

    // uncoalesced table and makeRequest is not called again.

    if (uncoalescedTable.packetAvailable() && !issueEvent.scheduled()) {

        schedule(issueEvent, curTick());

    }


    RubySystem *rs = m_ruby_system;

    if (m_ruby_system->getWarmupEnabled()) {

        rs->m_cache_recorder->enqueueNextFetchRequest();

    } else if (m_ruby_system->getCooldownEnabled()) {

        rs->m_cache_recorder->enqueueNextFlushRequest();

    } else {

        testDrainComplete();

    }

}


void


GPUCoalescer::recordStats(CoalescedRequest* crequest,

                                MachineType mach,

                                Cycles initialRequestTime,

                                Cycles forwardRequestTime,

                                Cycles firstResponseTime,

                                bool isRegion, bool mshrHitUnderMiss)

{

    RubyRequestType type = crequest->getRubyType();


    if (mshrHitUnderMiss) {

        // Add the number of mshr hits under misses to the

        // TCP demand hits stat.

        // We don't need to profile misses since they will be

        // profiled at the TCP. Only the MSHR hits under misses

        // needs to be profiled here

        PacketPtr pkt = crequest->getFirstPkt();

        if (!pkt->isGLCSet() &&

                !pkt->isSLCSet()) {

            m_dataCache_ptr->profileDemandHit();

        }


        // Since the request hit in the mshr, update mshr stats

        if (type == RubyRequestType_LD) {

            stats.m_mshr_ld_hits_under_miss++;

        }

    } else  {

        if (type == RubyRequestType_LD) {

            stats.m_mshr_ld_misses++;

        } else {

            stats.m_mshr_st_misses++;

        }

    }

}


GPUCoalescer::GPUCoalescerStats::GPUCoalescerStats(statistics::Group *parent)

    : statistics::Group(parent),

    ADD_STAT(m_mshr_ld_hits_under_miss,

            "Number of load requests that hit in the coalescer MSHR"),

    ADD_STAT(m_mshr_ld_misses,

            "Number of load requests that miss in the coalescer MSHR"),

    ADD_STAT(m_mshr_st_misses,

            "Number of store requests that miss in the coalescer MSHR"),

    ADD_STAT(m_mshr_accesses,

            "Number of mshr accesses",

            m_mshr_ld_hits_under_miss + m_mshr_ld_misses

            + m_mshr_st_misses)

{

}


} // namespace ruby

} // namespace gem5

AbstractController.hh

CacheMemory.hh

DataBlock.hh

GPUCoalescer.hh

MessageBuffer.hh

Profiler.hh

RubyRequest.hh

RubySystem.hh

RubyTester.hh

SubBlock.hh

DPRINTF
#define DPRINTF(x,...)
Definition trace.hh:209

data
const char data[]
Definition circlebuf.test.cc:48

gem5::Clocked::curCycle
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Definition clocked_object.hh:195

gem5::Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition clocked_object.hh:227

gem5::Clocked::clockPeriod
Tick clockPeriod() const
Definition clocked_object.hh:217

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79

gem5::MemCmd::MemSyncReq
@ MemSyncReq
Definition packet.hh:124

gem5::MemCmd::WriteReq
@ WriteReq
Definition packet.hh:90

gem5::Packet::isRead
bool isRead() const
Definition packet.hh:593

gem5::Packet::getAddr
Addr getAddr() const
Definition packet.hh:807

gem5::Packet::isAtomicOp
bool isAtomicOp() const
Definition packet.hh:846

gem5::Packet::isGLCSet
bool isGLCSet() const
Accessor functions for the cache bypass flags.
Definition packet.hh:1113

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Packet::getPtr
T * getPtr()
get a pointer to the data ptr.
Definition packet.hh:1225

gem5::Packet::setData
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
Definition packet.hh:1293

gem5::Packet::isWrite
bool isWrite() const
Definition packet.hh:594

gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition packet.hh:377

gem5::Packet::getSize
unsigned getSize() const
Definition packet.hh:817

gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition packet.hh:372

gem5::Packet::isFlush
bool isFlush() const
Definition packet.hh:624

gem5::Packet::isSLCSet
bool isSLCSet() const
Definition packet.hh:1114

gem5::Port
Ports are used to interface objects to each other.
Definition port.hh:62

gem5::ruby::CoalescedRequest
Definition GPUCoalescer.hh:110

gem5::ruby::CoalescedRequest::setIssueTime
void setIssueTime(Cycles _issueTime)
Definition GPUCoalescer.hh:120

gem5::ruby::CoalescedRequest::insertPacket
void insertPacket(PacketPtr pkt)
Definition GPUCoalescer.hh:118

gem5::ruby::CoalescedRequest::setRubyType
void setRubyType(RubyRequestType type)
Definition GPUCoalescer.hh:121

gem5::ruby::CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition GPUCoalescer.hh:124

gem5::ruby::CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition GPUCoalescer.hh:126

gem5::ruby::CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition GPUCoalescer.hh:127

gem5::ruby::DataBlock
Definition DataBlock.hh:62

gem5::ruby::GPUCoalescer
Definition GPUCoalescer.hh:217

gem5::ruby::GPUCoalescer::getRequestType
virtual RubyRequestType getRequestType(PacketPtr pkt)
Definition GPUCoalescer.cc:675

gem5::ruby::GPUCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
Definition GPUCoalescer.cc:458

gem5::ruby::GPUCoalescer::writeCallback
void writeCallback(Addr address, DataBlock &data)
Definition GPUCoalescer.cc:402

gem5::ruby::GPUCoalescer::m_IssueToInitialDelayHist
std::vector< statistics::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Definition GPUCoalescer.hh:529

gem5::ruby::GPUCoalescer::evictionCallback
void evictionCallback(Addr address)
Definition GPUCoalescer.cc:962

gem5::ruby::GPUCoalescer::kernelCallback
void kernelCallback(int wavefront_id)
Definition GPUCoalescer.cc:968

gem5::ruby::GPUCoalescer::atomicCallback
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Definition GPUCoalescer.cc:978

gem5::ruby::GPUCoalescer::issueMemSyncRequest
virtual void issueMemSyncRequest(PacketPtr pkt)
Definition GPUCoalescer.hh:389

gem5::ruby::GPUCoalescer::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition GPUCoalescer.cc:332

gem5::ruby::GPUCoalescer::m_max_outstanding_requests
int m_max_outstanding_requests
Definition GPUCoalescer.hh:428

gem5::ruby::GPUCoalescer::~GPUCoalescer
~GPUCoalescer()
Definition GPUCoalescer.cc:290

gem5::ruby::GPUCoalescer::m_missMachLatencyHist
std::vector< statistics::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
Definition GPUCoalescer.hh:524

gem5::ruby::GPUCoalescer::m_latencyHist
statistics::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Definition GPUCoalescer.hh:514

gem5::ruby::GPUCoalescer::resetStats
void resetStats() override
Callback to reset stats.
Definition GPUCoalescer.cc:359

gem5::ruby::GPUCoalescer::completeIssue
void completeIssue()
Definition GPUCoalescer.cc:891

gem5::ruby::GPUCoalescer::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition GPUCoalescer.cc:295

gem5::ruby::GPUCoalescer::hitCallback
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion, bool externalHit, bool mshrHitUnderMiss)
Definition GPUCoalescer.cc:570

gem5::ruby::GPUCoalescer::recordStats
void recordStats(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion, bool mshrHitUnderMiss)
Definition GPUCoalescer.cc:1053

gem5::ruby::GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition GPUCoalescer.hh:239

gem5::ruby::GPUCoalescer::printProgress
void printProgress(std::ostream &out) const
Definition GPUCoalescer.cc:382

gem5::ruby::GPUCoalescer::coalescingWindow
int coalescingWindow
Definition GPUCoalescer.hh:436

gem5::ruby::GPUCoalescer::coalescedReqs
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
Definition GPUCoalescer.hh:452

gem5::ruby::GPUCoalescer::m_outstanding_count
int m_outstanding_count
Definition GPUCoalescer.hh:460

gem5::ruby::GPUCoalescer::uncoalescedTable
UncoalescedTable uncoalescedTable
Definition GPUCoalescer.hh:441

gem5::ruby::GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition GPUCoalescer.cc:388

gem5::ruby::GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition GPUCoalescer.hh:462

gem5::ruby::GPUCoalescer::issueRequest
virtual void issueRequest(CoalescedRequest *crequest)=0

gem5::ruby::GPUCoalescer::m_missLatencyHist
statistics::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Definition GPUCoalescer.hh:519

gem5::ruby::GPUCoalescer::coalescePacket
bool coalescePacket(PacketPtr pkt)
Definition GPUCoalescer.cc:797

gem5::ruby::GPUCoalescer::m_InitialToForwardDelayHist
std::vector< statistics::Histogram * > m_InitialToForwardDelayHist
Definition GPUCoalescer.hh:530

gem5::ruby::GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition GPUCoalescer.hh:429

gem5::ruby::GPUCoalescer::m_FirstResponseToCompletionDelayHist
std::vector< statistics::Histogram * > m_FirstResponseToCompletionDelayHist
Definition GPUCoalescer.hh:532

gem5::ruby::GPUCoalescer::m_ForwardToFirstResponseDelayHist
std::vector< statistics::Histogram * > m_ForwardToFirstResponseDelayHist
Definition GPUCoalescer.hh:531

gem5::ruby::GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition GPUCoalescer.cc:704

gem5::ruby::GPUCoalescer::readCallback
void readCallback(Addr address, DataBlock &data)
Definition GPUCoalescer.cc:488

gem5::ruby::GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition GPUCoalescer.cc:1007

gem5::ruby::GPUCoalescer::stats
gem5::ruby::GPUCoalescer::GPUCoalescerStats stats

gem5::ruby::GPUCoalescer::pendingWriteInsts
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
Definition GPUCoalescer.hh:457

gem5::ruby::GPUCoalescer::m_typeLatencyHist
std::vector< statistics::Histogram * > m_typeLatencyHist
Definition GPUCoalescer.hh:515

gem5::ruby::GPUCoalescer::GPUCoalescer
GPUCoalescer(const Params &)
Definition GPUCoalescer.cc:208

gem5::ruby::GPUCoalescer::print
void print(std::ostream &out) const
Definition GPUCoalescer.cc:776

gem5::ruby::GPUCoalescer::coalescedTable
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
Definition GPUCoalescer.hh:448

gem5::ruby::GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition GPUCoalescer.hh:463

gem5::ruby::GPUCoalescer::m_missTypeLatencyHist
std::vector< statistics::Histogram * > m_missTypeLatencyHist
Definition GPUCoalescer.hh:520

gem5::ruby::GPUCoalescer::gmTokenPort
GMTokenPort gmTokenPort
Definition GPUCoalescer.hh:553

gem5::ruby::GPUCoalescer::m_missTypeMachLatencyHist
std::vector< std::vector< statistics::Histogram * > > m_missTypeMachLatencyHist
Definition GPUCoalescer.hh:526

gem5::ruby::GPUCoalescer::empty
bool empty() const
Definition GPUCoalescer.cc:669

gem5::ruby::GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition GPUCoalescer.hh:431

gem5::ruby::GPUCoalescer::wakeup
void wakeup()
Definition GPUCoalescer.cc:306

gem5::ruby::GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition GPUCoalescer.hh:425

gem5::ruby::GPUCoalescer::getDynInst
GPUDynInstPtr getDynInst(PacketPtr pkt) const
Definition GPUCoalescer.cc:784

gem5::ruby::GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition GPUCoalescer.hh:472

gem5::ruby::PendingWriteInst
Definition GPUCoalescer.hh:141

gem5::ruby::PendingWriteInst::addPendingReq
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
Definition GPUCoalescer.hh:153

gem5::ruby::PendingWriteInst::ackWriteCompletion
void ackWriteCompletion(bool usingRubyTester)
Definition GPUCoalescer.hh:177

gem5::ruby::PendingWriteInst::getNumPendingStores
int getNumPendingStores()
Definition GPUCoalescer.hh:198

gem5::ruby::PendingWriteInst::receiveWriteCompleteAck
bool receiveWriteCompleteAck()
Definition GPUCoalescer.hh:168

gem5::ruby::RubyPort::MemResponsePort
Definition RubyPort.hh:83

gem5::ruby::RubyPort::MemResponsePort::hitCallback
void hitCallback(PacketPtr pkt)
Definition RubyPort.cc:619

gem5::ruby::RubyPort::ruby_hit_callback
void ruby_hit_callback(PacketPtr pkt)
Definition RubyPort.cc:486

gem5::ruby::RubyPort::makeLineAddress
Addr makeLineAddress(Addr addr) const
Definition RubyPort.cc:783

gem5::ruby::RubyPort::testDrainComplete
void testDrainComplete()
Definition RubyPort.cc:585

gem5::ruby::RubyPort::printAddress
std::string printAddress(Addr addr) const
Definition RubyPort.cc:789

gem5::ruby::RubyPort::m_ruby_system
RubySystem * m_ruby_system
Definition RubyPort.hh:207

gem5::ruby::RubyPort::RubyPort
RubyPort(const Params &p)
Definition RubyPort.cc:61

gem5::ruby::RubyPort::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition RubyPort.cc:104

gem5::ruby::RubyPort::m_version
uint32_t m_version
Definition RubyPort.hh:208

gem5::ruby::RubyPort::ruby_eviction_callback
void ruby_eviction_callback(Addr address)
Definition RubyPort.cc:731

gem5::ruby::RubyPort::trySendRetries
void trySendRetries()
Definition RubyPort.cc:559

gem5::ruby::RubyPort::getOffset
Addr getOffset(Addr addr) const
Definition RubyPort.cc:777

gem5::ruby::RubyPort::m_usingRubyTester
bool m_usingRubyTester
Definition RubyPort.hh:211

gem5::ruby::RubySystem
Definition RubySystem.hh:65

gem5::ruby::UncoalescedTable::setPacketsRemaining
void setPacketsRemaining(InstSeqNum seqNum, int count)
Definition GPUCoalescer.cc:106

gem5::ruby::UncoalescedTable::reqTypeMap
std::map< InstSeqNum, RubyRequestType > reqTypeMap
Definition GPUCoalescer.hh:106

gem5::ruby::UncoalescedTable::insertPacket
void insertPacket(PacketPtr pkt)
Definition GPUCoalescer.cc:68

gem5::ruby::UncoalescedTable::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition GPUCoalescer.cc:175

gem5::ruby::UncoalescedTable::areRequestsDone
bool areRequestsDone(const InstSeqNum instSeqNum)
Definition GPUCoalescer.cc:162

gem5::ruby::UncoalescedTable::packetAvailable
bool packetAvailable()
Definition GPUCoalescer.cc:86

gem5::ruby::UncoalescedTable::insertReqType
void insertReqType(PacketPtr pkt, RubyRequestType type)
Definition GPUCoalescer.cc:78

gem5::ruby::UncoalescedTable::instMap
std::map< InstSeqNum, PerInstPackets > instMap
Definition GPUCoalescer.hh:102

gem5::ruby::UncoalescedTable::coalescer
GPUCoalescer * coalescer
Definition GPUCoalescer.hh:96

gem5::ruby::UncoalescedTable::updateResources
void updateResources()
Definition GPUCoalescer.cc:125

gem5::ruby::UncoalescedTable::UncoalescedTable
UncoalescedTable(GPUCoalescer *gc)
Definition GPUCoalescer.cc:62

gem5::ruby::UncoalescedTable::initPacketsRemaining
void initPacketsRemaining(InstSeqNum seqNum, int count)
Definition GPUCoalescer.cc:92

gem5::ruby::UncoalescedTable::getPacketsRemaining
int getPacketsRemaining(InstSeqNum seqNum)
Definition GPUCoalescer.cc:100

gem5::ruby::UncoalescedTable::checkDeadlock
void checkDeadlock(Tick threshold)
Definition GPUCoalescer.cc:186

gem5::ruby::UncoalescedTable::getInstPackets
PerInstPackets * getInstPackets(int offset)
Definition GPUCoalescer.cc:112

gem5::ruby::UncoalescedTable::instPktsRemaining
std::map< InstSeqNum, int > instPktsRemaining
Definition GPUCoalescer.hh:104

gem5::statistics::Group
Statistics container.
Definition group.hh:93

gem5::statistics::Histogram
A simple histogram stat.
Definition statistics.hh:2126

std::deque
STL deque class.
Definition stl.hh:44

std::vector
STL vector class.
Definition stl.hh:37

compiler.hh

ADD_STAT
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75

gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition eventq.hh:1012

gem5::EventBase::Progress_Event_Pri
static const Priority Progress_Event_Pri
Progress events come at the end.
Definition eventq.hh:229

panic
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220

fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268

gem5::statistics::Group::Group
Group()=delete

logging.hh

warn
#define warn(...)
Definition logging.hh:288

gem5::ArmISA::len
Bitfield< 18, 16 > len
Definition misc_types.hh:531

gem5::ArmISA::i
Bitfield< 7 > i
Definition misc_types.hh:67

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::ArmISA::rs
Bitfield< 9, 8 > rs
Definition misc_types.hh:466

gem5::ArmISA::c
Bitfield< 29 > c
Definition misc_types.hh:53

gem5::ArmISA::ss
Bitfield< 21 > ss
Definition misc_types.hh:60

gem5::MipsISA::p
Bitfield< 0 > p
Definition pra_constants.hh:326

gem5::X86ISA::count
count
Definition misc.hh:738

gem5::ruby
Definition Check.hh:45

gem5::ruby::PerInstPackets
std::list< PacketPtr > PerInstPackets
Definition GPUCoalescer.hh:65

gem5::ruby::operator<<
std::ostream & operator<<(std::ostream &os, const BoolVec &myvector)
Definition BoolVec.cc:49

gem5::statistics
Definition statistics.cc:57

gem5
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36

gem5::safe_cast
T safe_cast(U &&ref_or_ptr)
Definition cast.hh:74

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::curTick
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

gem5::PacketPtr
Packet * PacketPtr
Definition thread_context.hh:70

gem5::InstSeqNum
uint64_t InstSeqNum
Definition inst_seq.hh:40

packet.hh
Declaration of the Packet class.

shader.hh

str.hh

gem5::ComputeUnit::DataPort::SenderState
Definition compute_unit.hh:543

gem5::ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition compute_unit.hh:545

gem5::ruby::GPUCoalescer::GPUCoalescerStats::m_mshr_ld_misses
statistics::Scalar m_mshr_ld_misses
Definition GPUCoalescer.hh:479

gem5::ruby::GPUCoalescer::GPUCoalescerStats::m_mshr_ld_hits_under_miss
statistics::Scalar m_mshr_ld_hits_under_miss
Definition GPUCoalescer.hh:478

gem5::ruby::GPUCoalescer::GPUCoalescerStats::m_mshr_st_misses
statistics::Scalar m_mshr_st_misses
Definition GPUCoalescer.hh:481

gem5::ruby::GPUCoalescer::GPUCoalescerStats::m_mshr_accesses
statistics::Formula m_mshr_accesses
Definition GPUCoalescer.hh:483

gem5::ruby::GPUCoalescer::GPUCoalescerStats::GPUCoalescerStats
GPUCoalescerStats(statistics::Group *parent)
Definition GPUCoalescer.cc:1087

gem5::ruby::RubyPort::SenderState
Definition RubyPort.hh:152

name
const std::string & name()
Definition trace.cc:48