release/current/GPUCoalescer_8hh_source.html

/*

 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions are met:

 *

 * 1. Redistributions of source code must retain the above copyright notice,

 * this list of conditions and the following disclaimer.

 *

 * 2. Redistributions in binary form must reproduce the above copyright notice,

 * this list of conditions and the following disclaimer in the documentation

 * and/or other materials provided with the distribution.

 *

 * 3. Neither the name of the copyright holder nor the names of its

 * contributors may be used to endorse or promote products derived from this

 * software without specific prior written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

 * POSSIBILITY OF SUCH DAMAGE.

 */


#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__

#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__


#include <iostream>

#include <unordered_map>


#include "base/statistics.hh"

#include "gpu-compute/gpu_dyn_inst.hh"

#include "gpu-compute/misc.hh"

#include "mem/request.hh"

#include "mem/ruby/common/Address.hh"

#include "mem/ruby/common/Consumer.hh"

#include "mem/ruby/protocol/PrefetchBit.hh"

#include "mem/ruby/protocol/RubyAccessMode.hh"

#include "mem/ruby/protocol/RubyRequestType.hh"

#include "mem/ruby/protocol/SequencerRequestType.hh"

#include "mem/ruby/system/Sequencer.hh"

#include "mem/token_port.hh"


namespace gem5

{


struct RubyGPUCoalescerParams;


namespace ruby

{


class DataBlock;

class CacheMsg;

struct MachineID;

class CacheMemory;


// List of packets that belongs to a specific instruction.

typedef std::list<PacketPtr> PerInstPackets;


class UncoalescedTable

{

  public:

    UncoalescedTable(GPUCoalescer *gc);

    ~UncoalescedTable() {}


    void insertPacket(PacketPtr pkt);

    void insertReqType(PacketPtr pkt, RubyRequestType type);

    bool packetAvailable();

    void printRequestTable(std::stringstream& ss);


    // Modify packets remaining map. Init sets value iff the seqNum has not

    // yet been seen before. get/set act as a regular getter/setter.

    void initPacketsRemaining(InstSeqNum seqNum, int count);

    int getPacketsRemaining(InstSeqNum seqNum);

    void setPacketsRemaining(InstSeqNum seqNum, int count);


    // Returns a pointer to the list of packets corresponding to an

    // instruction in the instruction map or nullptr if there are no

    // instructions at the offset.

    PerInstPackets* getInstPackets(int offset);

    void updateResources();

    bool areRequestsDone(const InstSeqNum instSeqNum);


    // Check if a packet hasn't been removed from instMap in too long.

    // Panics if a deadlock is detected and returns nothing otherwise.

    void checkDeadlock(Tick threshold);


  private:

    GPUCoalescer *coalescer;


    // Maps an instructions unique sequence number to a queue of packets

    // which need responses. This data structure assumes the sequence number

    // is monotonically increasing (which is true for CU class) in order to

    // issue packets in age order.

    std::map<InstSeqNum, PerInstPackets> instMap;


    std::map<InstSeqNum, int> instPktsRemaining;


    std::map<InstSeqNum, RubyRequestType> reqTypeMap;

};


class CoalescedRequest

{

  public:


    CoalescedRequest(uint64_t _seqNum)

        : seqNum(_seqNum), issueTime(Cycles(0)),

          rubyType(RubyRequestType_NULL)

    {}


    ~CoalescedRequest() {}


    void insertPacket(PacketPtr pkt) { pkts.push_back(pkt); }

    void setSeqNum(uint64_t _seqNum) { seqNum = _seqNum; }

    void setIssueTime(Cycles _issueTime) { issueTime = _issueTime; }

    void setRubyType(RubyRequestType type) { rubyType = type; }


    uint64_t getSeqNum() const { return seqNum; }

    PacketPtr getFirstPkt() const { return pkts[0]; }

    Cycles getIssueTime() const { return issueTime; }

    RubyRequestType getRubyType() const { return rubyType; }

    std::vector<PacketPtr>& getPackets() { return pkts; }


  private:

    uint64_t seqNum;

    Cycles issueTime;

    RubyRequestType rubyType;

    std::vector<PacketPtr> pkts;

};


// PendingWriteInst tracks the number of outstanding Ruby requests

// per write instruction. Once all requests associated with one instruction

// are completely done in Ruby, we call back the requestor to mark

// that this instruction is complete.


class PendingWriteInst

{

  public:


    PendingWriteInst()

        : numPendingStores(0),

          originalPort(nullptr),

          gpuDynInstPtr(nullptr)

    {}


    ~PendingWriteInst()

    {}


    void


    addPendingReq(RubyPort::MemResponsePort* port, GPUDynInstPtr inst,

                  bool usingRubyTester)

    {

        assert(port);

        originalPort = port;


        if (!usingRubyTester) {

            gpuDynInstPtr = inst;

        }


        numPendingStores++;

    }


    // return true if no more ack is expected

    bool


    receiveWriteCompleteAck()

    {

        assert(numPendingStores > 0);

        numPendingStores--;

        return (numPendingStores == 0) ? true : false;

    }


    // ack the original requestor that this write instruction is complete

    void


    ackWriteCompletion(bool usingRubyTester)

    {

        assert(numPendingStores == 0);


        // make a response packet

        PacketPtr pkt = new Packet(std::make_shared<Request>(),

                                   MemCmd::WriteCompleteResp);


        if (!usingRubyTester) {

            assert(gpuDynInstPtr);

            ComputeUnit::DataPort::SenderState* ss =

                    new ComputeUnit::DataPort::SenderState

                                            (gpuDynInstPtr, 0, nullptr);

            pkt->senderState = ss;

        }


        // send the ack response to the requestor

        originalPort->sendTimingResp(pkt);

    }


    int


    getNumPendingStores() {

        return numPendingStores;

    }


  private:

    // the number of stores waiting for writeCompleteCallback

    int numPendingStores;

    // The original port that sent one of packets associated with this

    // write instruction. We may have more than one packet per instruction,

    // which implies multiple ports per instruction. However, we need

    // only 1 of the ports to call back the CU. Therefore, here we keep

    // track the port that sent the first packet of this instruction.

    RubyPort::MemResponsePort* originalPort;

    // similar to the originalPort, this gpuDynInstPtr is set only for

    // the first packet of this instruction.

    GPUDynInstPtr gpuDynInstPtr;

};


class GPUCoalescer : public RubyPort

{

  public:


    class GMTokenPort : public TokenResponsePort

    {

      public:


        GMTokenPort(const std::string& name,

                    PortID id = InvalidPortID)

            : TokenResponsePort(name, id)

        { }


        ~GMTokenPort() { }


      protected:

        Tick recvAtomic(PacketPtr) { return Tick(0); }

        void recvFunctional(PacketPtr) { }

        bool recvTimingReq(PacketPtr) { return false; }


        AddrRangeList getAddrRanges() const

        {

            AddrRangeList ranges;

            return ranges;

        }


    };


    typedef RubyGPUCoalescerParams Params;

    GPUCoalescer(const Params &);

    ~GPUCoalescer();


    Port &getPort(const std::string &if_name,

                  PortID idx = InvalidPortID) override;


    // Public Methods

    void wakeup(); // Used only for deadlock detection

    void printRequestTable(std::stringstream& ss);


    void printProgress(std::ostream& out) const;

    void resetStats() override;

    void collateStats();


    // each store request needs two callbacks:

    //  (1) writeCallback is called when the store is received and processed

    //      by TCP. This writeCallback does not guarantee the store is actually

    //      completed at its destination cache or memory. writeCallback helps

    //      release hardware resources (e.g., its entry in coalescedTable)

    //      allocated for the store so that subsequent requests will not be

    //      blocked unnecessarily due to hardware resource constraints.

    //  (2) writeCompleteCallback is called when the store is fully completed

    //      at its destination cache or memory. writeCompleteCallback

    //      guarantees that the store is fully completed. This callback

    //      will decrement hardware counters in CU

    void writeCallback(Addr address, DataBlock& data);


    void writeCallback(Addr address,

                       MachineType mach,

                       DataBlock& data);


    void writeCallback(Addr address,

                       MachineType mach,

                       DataBlock& data,

                       Cycles initialRequestTime,

                       Cycles forwardRequestTime,

                       Cycles firstResponseTime,

                       bool isRegion);


    void writeCallback(Addr address,

                       MachineType mach,

                       DataBlock& data,

                       Cycles initialRequestTime,

                       Cycles forwardRequestTime,

                       Cycles firstResponseTime);


    void writeCompleteCallback(Addr address,

                               uint64_t instSeqNum,

                               MachineType mach);


    void readCallback(Addr address, DataBlock& data);


    void readCallback(Addr address,

                      MachineType mach,

                      DataBlock& data);


    void readCallback(Addr address,

                      MachineType mach,

                      DataBlock& data,

                      Cycles initialRequestTime,

                      Cycles forwardRequestTime,

                      Cycles firstResponseTime);


    void readCallback(Addr address,

                      MachineType mach,

                      DataBlock& data,

                      Cycles initialRequestTime,

                      Cycles forwardRequestTime,

                      Cycles firstResponseTime,

                      bool isRegion);


    /* atomics need their own callback because the data

       might be const coming from SLICC */

    virtual void atomicCallback(Addr address,

                                MachineType mach,

                                const DataBlock& data);


    RequestStatus makeRequest(PacketPtr pkt) override;

    int outstandingCount() const override { return m_outstanding_count; }


    bool


    isDeadlockEventScheduled() const override

    {

        return deadlockCheckEvent.scheduled();

    }


    void


    descheduleDeadlockEvent() override

    {

        deschedule(deadlockCheckEvent);

    }


    bool empty() const;


    void print(std::ostream& out) const;


    void evictionCallback(Addr address);

    void completeIssue();


    void insertKernel(int wavefront_id, PacketPtr pkt);


    GMTokenPort& getGMTokenPort() { return gmTokenPort; }


    statistics::Histogram& getOutstandReqHist() { return m_outstandReqHist; }


    statistics::Histogram& getLatencyHist() { return m_latencyHist; }


    statistics::Histogram& getTypeLatencyHist(uint32_t t)

    { return *m_typeLatencyHist[t]; }


    statistics::Histogram& getMissLatencyHist()

    { return m_missLatencyHist; }


    statistics::Histogram& getMissTypeLatencyHist(uint32_t t)

    { return *m_missTypeLatencyHist[t]; }


    statistics::Histogram& getMissMachLatencyHist(uint32_t t) const

    { return *m_missMachLatencyHist[t]; }


    statistics::Histogram&


    getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const

    { return *m_missTypeMachLatencyHist[r][t]; }


    statistics::Histogram& getIssueToInitialDelayHist(uint32_t t) const

    { return *m_IssueToInitialDelayHist[t]; }


    statistics::Histogram&


    getInitialToForwardDelayHist(const MachineType t) const

    { return *m_InitialToForwardDelayHist[t]; }


    statistics::Histogram&


    getForwardRequestToFirstResponseHist(const MachineType t) const

    { return *m_ForwardToFirstResponseDelayHist[t]; }


    statistics::Histogram&


    getFirstResponseToCompletionDelayHist(const MachineType t) const

    { return *m_FirstResponseToCompletionDelayHist[t]; }


  protected:

    bool tryCacheAccess(Addr addr, RubyRequestType type,

                        Addr pc, RubyAccessMode access_mode,

                        int size, DataBlock*& data_ptr);


    // since the two following issue functions are protocol-specific,

    // they must be implemented in a derived coalescer

    virtual void issueRequest(CoalescedRequest* crequest) = 0;

    virtual void issueMemSyncRequest(PacketPtr pkt) {}


    void kernelCallback(int wavefront_id);


    void hitCallback(CoalescedRequest* crequest,

                     MachineType mach,

                     DataBlock& data,

                     bool success,

                     Cycles initialRequestTime,

                     Cycles forwardRequestTime,

                     Cycles firstResponseTime,

                     bool isRegion);

    void recordMissLatency(CoalescedRequest* crequest,

                           MachineType mach,

                           Cycles initialRequestTime,

                           Cycles forwardRequestTime,

                           Cycles firstResponseTime,

                           bool success, bool isRegion);

    void completeHitCallback(std::vector<PacketPtr> & mylist);


    virtual RubyRequestType getRequestType(PacketPtr pkt);


    GPUDynInstPtr getDynInst(PacketPtr pkt) const;


    // Attempt to remove a packet from the uncoalescedTable and coalesce

    // with a previous request from the same instruction. If there is no

    // previous instruction and the max number of outstanding requests has

    // not be reached, a new coalesced request is created and added to the

    // "target" list of the coalescedTable.

    bool coalescePacket(PacketPtr pkt);


    EventFunctionWrapper issueEvent;


  protected:

    int m_max_outstanding_requests;

    Cycles m_deadlock_threshold;


    CacheMemory* m_dataCache_ptr;

    CacheMemory* m_instCache_ptr;


    // coalescingWindow is the maximum number of instructions that are

    // allowed to be coalesced in a single cycle.

    int coalescingWindow;


    // The uncoalescedTable contains several "columns" which hold memory

    // request packets for an instruction. The maximum size is the number of

    // columns * the wavefront size.

    UncoalescedTable uncoalescedTable;


    // An MSHR-like struct for holding coalesced requests. The requests in

    // this table may or may not be outstanding in the memory hierarchy. The

    // maximum size is equal to the maximum outstanding requests for a CU

    // (typically the number of blocks in TCP). If there are duplicates of

    // an address, the are serviced in age order.

    std::map<Addr, std::deque<CoalescedRequest*>> coalescedTable;

    // Map of instruction sequence number to coalesced requests that get

    // created in coalescePacket, used in completeIssue to send the fully

    // coalesced request

    std::unordered_map<uint64_t, std::deque<CoalescedRequest*>> coalescedReqs;


    // a map btw an instruction sequence number and PendingWriteInst

    // this is used to do a final call back for each write when it is

    // completely done in the memory system

    std::unordered_map<uint64_t, PendingWriteInst> pendingWriteInsts;


    // Global outstanding request count, across all request tables

    int m_outstanding_count;

    bool m_deadlock_check_scheduled;

    std::unordered_map<int, PacketPtr> kernelEndList;

    std::vector<int> newKernelEnds;


    int m_store_waiting_on_load_cycles;

    int m_store_waiting_on_store_cycles;

    int m_load_waiting_on_store_cycles;

    int m_load_waiting_on_load_cycles;


    bool m_runningGarnetStandalone;


    EventFunctionWrapper deadlockCheckEvent;

    bool assumingRfOCoherence;


// TODO - Need to update the following stats once the VIPER protocol

//        is re-integrated.

//    // m5 style stats for TCP hit/miss counts

//    statistics::Scalar GPU_TCPLdHits;

//    statistics::Scalar GPU_TCPLdTransfers;

//    statistics::Scalar GPU_TCCLdHits;

//    statistics::Scalar GPU_LdMiss;

//

//    statistics::Scalar GPU_TCPStHits;

//    statistics::Scalar GPU_TCPStTransfers;

//    statistics::Scalar GPU_TCCStHits;

//    statistics::Scalar GPU_StMiss;

//

//    statistics::Scalar CP_TCPLdHits;

//    statistics::Scalar CP_TCPLdTransfers;

//    statistics::Scalar CP_TCCLdHits;

//    statistics::Scalar CP_LdMiss;

//

//    statistics::Scalar CP_TCPStHits;

//    statistics::Scalar CP_TCPStTransfers;

//    statistics::Scalar CP_TCCStHits;

//    statistics::Scalar CP_StMiss;


    statistics::Histogram m_outstandReqHist;


    statistics::Histogram m_latencyHist;

    std::vector<statistics::Histogram *> m_typeLatencyHist;


    statistics::Histogram m_missLatencyHist;

    std::vector<statistics::Histogram *> m_missTypeLatencyHist;


    std::vector<statistics::Histogram *> m_missMachLatencyHist;

    std::vector<std::vector<statistics::Histogram *>>

        m_missTypeMachLatencyHist;


    std::vector<statistics::Histogram *> m_IssueToInitialDelayHist;

    std::vector<statistics::Histogram *> m_InitialToForwardDelayHist;

    std::vector<statistics::Histogram *> m_ForwardToFirstResponseDelayHist;

    std::vector<statistics::Histogram *> m_FirstResponseToCompletionDelayHist;


// TODO - Need to update the following stats once the VIPER protocol

//        is re-integrated.

//    statistics::Distribution numHopDelays;

//    statistics::Distribution tcpToTccDelay;

//    statistics::Distribution tccToSdDelay;

//    statistics::Distribution sdToSdDelay;

//    statistics::Distribution sdToTccDelay;

//    statistics::Distribution tccToTcpDelay;

//

//    statistics::Average avgTcpToTcc;

//    statistics::Average avgTccToSd;

//    statistics::Average avgSdToSd;

//    statistics::Average avgSdToTcc;

//    statistics::Average avgTccToTcp;


  private:

    // Token port is used to send/receive tokens to/from GPU's global memory

    // pipeline across the port boundary. There is one per <wave size> data

    // ports in the CU.

    GMTokenPort gmTokenPort;


    // Private copy constructor and assignment operator

    GPUCoalescer(const GPUCoalescer& obj);

    GPUCoalescer& operator=(const GPUCoalescer& obj);

};


inline std::ostream&

operator<<(std::ostream& out, const GPUCoalescer& obj)

{

    obj.print(out);

    out << std::flush;

    return out;

}


} // namespace ruby

} // namespace gem5


#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__

Address.hh

Consumer.hh

Sequencer.hh

data
const char data[]
Definition circlebuf.test.cc:48

gem5::Cycles
Cycles is a wrapper class for representing cycle counts, i.e.
Definition types.hh:79

gem5::EventFunctionWrapper
Definition eventq.hh:1137

gem5::MemCmd::WriteCompleteResp
@ WriteCompleteResp
Definition packet.hh:92

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295

gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition packet.hh:545

gem5::Port
Ports are used to interface objects to each other.
Definition port.hh:62

gem5::Port::id
const PortID id
A numeric identifier to distinguish ports in a vector, and set to InvalidPortID in case this port is ...
Definition port.hh:79

gem5::Port::name
const std::string name() const
Return port name (for DPRINTF).
Definition port.hh:111

gem5::ResponsePort::sendTimingResp
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the request port by calling its corresponding receive function.
Definition port.hh:454

gem5::TokenResponsePort
Definition token_port.hh:92

gem5::ruby::CacheMemory
Definition CacheMemory.hh:71

gem5::ruby::CoalescedRequest
Definition GPUCoalescer.hh:110

gem5::ruby::CoalescedRequest::CoalescedRequest
CoalescedRequest(uint64_t _seqNum)
Definition GPUCoalescer.hh:112

gem5::ruby::CoalescedRequest::getSeqNum
uint64_t getSeqNum() const
Definition GPUCoalescer.hh:123

gem5::ruby::CoalescedRequest::seqNum
uint64_t seqNum
Definition GPUCoalescer.hh:130

gem5::ruby::CoalescedRequest::pkts
std::vector< PacketPtr > pkts
Definition GPUCoalescer.hh:133

gem5::ruby::CoalescedRequest::setSeqNum
void setSeqNum(uint64_t _seqNum)
Definition GPUCoalescer.hh:119

gem5::ruby::CoalescedRequest::~CoalescedRequest
~CoalescedRequest()
Definition GPUCoalescer.hh:116

gem5::ruby::CoalescedRequest::setIssueTime
void setIssueTime(Cycles _issueTime)
Definition GPUCoalescer.hh:120

gem5::ruby::CoalescedRequest::insertPacket
void insertPacket(PacketPtr pkt)
Definition GPUCoalescer.hh:118

gem5::ruby::CoalescedRequest::rubyType
RubyRequestType rubyType
Definition GPUCoalescer.hh:132

gem5::ruby::CoalescedRequest::getIssueTime
Cycles getIssueTime() const
Definition GPUCoalescer.hh:125

gem5::ruby::CoalescedRequest::setRubyType
void setRubyType(RubyRequestType type)
Definition GPUCoalescer.hh:121

gem5::ruby::CoalescedRequest::getFirstPkt
PacketPtr getFirstPkt() const
Definition GPUCoalescer.hh:124

gem5::ruby::CoalescedRequest::getRubyType
RubyRequestType getRubyType() const
Definition GPUCoalescer.hh:126

gem5::ruby::CoalescedRequest::issueTime
Cycles issueTime
Definition GPUCoalescer.hh:131

gem5::ruby::CoalescedRequest::getPackets
std::vector< PacketPtr > & getPackets()
Definition GPUCoalescer.hh:127

gem5::ruby::DataBlock
Definition DataBlock.hh:62

gem5::ruby::GPUCoalescer::GMTokenPort
Definition GPUCoalescer.hh:220

gem5::ruby::GPUCoalescer::GMTokenPort::GMTokenPort
GMTokenPort(const std::string &name, PortID id=InvalidPortID)
Definition GPUCoalescer.hh:222

gem5::ruby::GPUCoalescer::GMTokenPort::~GMTokenPort
~GMTokenPort()
Definition GPUCoalescer.hh:226

gem5::ruby::GPUCoalescer::GMTokenPort::recvAtomic
Tick recvAtomic(PacketPtr)
Receive an atomic request packet from the peer.
Definition GPUCoalescer.hh:229

gem5::ruby::GPUCoalescer::GMTokenPort::recvFunctional
void recvFunctional(PacketPtr)
Receive a functional request packet from the peer.
Definition GPUCoalescer.hh:230

gem5::ruby::GPUCoalescer::GMTokenPort::getAddrRanges
AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition GPUCoalescer.hh:232

gem5::ruby::GPUCoalescer::GMTokenPort::recvTimingReq
bool recvTimingReq(PacketPtr)
Receive a timing request from the peer.
Definition GPUCoalescer.hh:231

gem5::ruby::GPUCoalescer
Definition GPUCoalescer.hh:217

gem5::ruby::GPUCoalescer::getRequestType
virtual RubyRequestType getRequestType(PacketPtr pkt)
Definition GPUCoalescer.cc:641

gem5::ruby::GPUCoalescer::writeCompleteCallback
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
Definition GPUCoalescer.cc:456

gem5::ruby::GPUCoalescer::writeCallback
void writeCallback(Addr address, DataBlock &data)
Definition GPUCoalescer.cc:400

gem5::ruby::GPUCoalescer::getFirstResponseToCompletionDelayHist
statistics::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
Definition GPUCoalescer.hh:373

gem5::ruby::GPUCoalescer::m_IssueToInitialDelayHist
std::vector< statistics::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Definition GPUCoalescer.hh:507

gem5::ruby::GPUCoalescer::operator=
GPUCoalescer & operator=(const GPUCoalescer &obj)

gem5::ruby::GPUCoalescer::m_store_waiting_on_load_cycles
int m_store_waiting_on_load_cycles
Definition GPUCoalescer.hh:455

gem5::ruby::GPUCoalescer::evictionCallback
void evictionCallback(Addr address)
Definition GPUCoalescer.cc:928

gem5::ruby::GPUCoalescer::kernelCallback
void kernelCallback(int wavefront_id)
Definition GPUCoalescer.cc:934

gem5::ruby::GPUCoalescer::getInitialToForwardDelayHist
statistics::Histogram & getInitialToForwardDelayHist(const MachineType t) const
Definition GPUCoalescer.hh:365

gem5::ruby::GPUCoalescer::atomicCallback
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Definition GPUCoalescer.cc:944

gem5::ruby::GPUCoalescer::issueMemSyncRequest
virtual void issueMemSyncRequest(PacketPtr pkt)
Definition GPUCoalescer.hh:384

gem5::ruby::GPUCoalescer::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition GPUCoalescer.cc:330

gem5::ruby::GPUCoalescer::m_max_outstanding_requests
int m_max_outstanding_requests
Definition GPUCoalescer.hh:418

gem5::ruby::GPUCoalescer::~GPUCoalescer
~GPUCoalescer()
Definition GPUCoalescer.cc:288

gem5::ruby::GPUCoalescer::getGMTokenPort
GMTokenPort & getGMTokenPort()
Definition GPUCoalescer.hh:341

gem5::ruby::GPUCoalescer::m_missMachLatencyHist
std::vector< statistics::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
Definition GPUCoalescer.hh:502

gem5::ruby::GPUCoalescer::getIssueToInitialDelayHist
statistics::Histogram & getIssueToInitialDelayHist(uint32_t t) const
Definition GPUCoalescer.hh:361

gem5::ruby::GPUCoalescer::m_latencyHist
statistics::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
Definition GPUCoalescer.hh:492

gem5::ruby::GPUCoalescer::collateStats
void collateStats()

gem5::ruby::GPUCoalescer::resetStats
void resetStats() override
Callback to reset stats.
Definition GPUCoalescer.cc:357

gem5::ruby::GPUCoalescer::getOutstandReqHist
statistics::Histogram & getOutstandReqHist()
Definition GPUCoalescer.hh:343

gem5::ruby::GPUCoalescer::completeIssue
void completeIssue()
Definition GPUCoalescer.cc:857

gem5::ruby::GPUCoalescer::getPort
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Definition GPUCoalescer.cc:293

gem5::ruby::GPUCoalescer::getForwardRequestToFirstResponseHist
statistics::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Definition GPUCoalescer.hh:369

gem5::ruby::GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition GPUCoalescer.hh:239

gem5::ruby::GPUCoalescer::printProgress
void printProgress(std::ostream &out) const
Definition GPUCoalescer.cc:380

gem5::ruby::GPUCoalescer::coalescingWindow
int coalescingWindow
Definition GPUCoalescer.hh:426

gem5::ruby::GPUCoalescer::hitCallback
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Definition GPUCoalescer.cc:543

gem5::ruby::GPUCoalescer::coalescedReqs
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
Definition GPUCoalescer.hh:442

gem5::ruby::GPUCoalescer::m_outstanding_count
int m_outstanding_count
Definition GPUCoalescer.hh:450

gem5::ruby::GPUCoalescer::uncoalescedTable
UncoalescedTable uncoalescedTable
Definition GPUCoalescer.hh:431

gem5::ruby::GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition GPUCoalescer.cc:386

gem5::ruby::GPUCoalescer::getTypeLatencyHist
statistics::Histogram & getTypeLatencyHist(uint32_t t)
Definition GPUCoalescer.hh:346

gem5::ruby::GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition GPUCoalescer.hh:452

gem5::ruby::GPUCoalescer::issueRequest
virtual void issueRequest(CoalescedRequest *crequest)=0

gem5::ruby::GPUCoalescer::getMissLatencyHist
statistics::Histogram & getMissLatencyHist()
Definition GPUCoalescer.hh:349

gem5::ruby::GPUCoalescer::tryCacheAccess
bool tryCacheAccess(Addr addr, RubyRequestType type, Addr pc, RubyAccessMode access_mode, int size, DataBlock *&data_ptr)

gem5::ruby::GPUCoalescer::isDeadlockEventScheduled
bool isDeadlockEventScheduled() const override
Definition GPUCoalescer.hh:321

gem5::ruby::GPUCoalescer::m_missLatencyHist
statistics::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Definition GPUCoalescer.hh:497

gem5::ruby::GPUCoalescer::m_deadlock_check_scheduled
bool m_deadlock_check_scheduled
Definition GPUCoalescer.hh:451

gem5::ruby::GPUCoalescer::getMissTypeMachLatencyHist
statistics::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Definition GPUCoalescer.hh:358

gem5::ruby::GPUCoalescer::coalescePacket
bool coalescePacket(PacketPtr pkt)
Definition GPUCoalescer.cc:763

gem5::ruby::GPUCoalescer::m_load_waiting_on_load_cycles
int m_load_waiting_on_load_cycles
Definition GPUCoalescer.hh:458

gem5::ruby::GPUCoalescer::m_store_waiting_on_store_cycles
int m_store_waiting_on_store_cycles
Definition GPUCoalescer.hh:456

gem5::ruby::GPUCoalescer::m_InitialToForwardDelayHist
std::vector< statistics::Histogram * > m_InitialToForwardDelayHist
Definition GPUCoalescer.hh:508

gem5::ruby::GPUCoalescer::m_deadlock_threshold
Cycles m_deadlock_threshold
Definition GPUCoalescer.hh:419

gem5::ruby::GPUCoalescer::m_FirstResponseToCompletionDelayHist
std::vector< statistics::Histogram * > m_FirstResponseToCompletionDelayHist
Definition GPUCoalescer.hh:510

gem5::ruby::GPUCoalescer::m_ForwardToFirstResponseDelayHist
std::vector< statistics::Histogram * > m_ForwardToFirstResponseDelayHist
Definition GPUCoalescer.hh:509

gem5::ruby::GPUCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt) override
Definition GPUCoalescer.cc:670

gem5::ruby::GPUCoalescer::assumingRfOCoherence
bool assumingRfOCoherence
Definition GPUCoalescer.hh:463

gem5::ruby::GPUCoalescer::readCallback
void readCallback(Addr address, DataBlock &data)
Definition GPUCoalescer.cc:486

gem5::ruby::GPUCoalescer::completeHitCallback
void completeHitCallback(std::vector< PacketPtr > &mylist)
Definition GPUCoalescer.cc:973

gem5::ruby::GPUCoalescer::recordMissLatency
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
Definition GPUCoalescer.cc:1019

gem5::ruby::GPUCoalescer::pendingWriteInsts
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
Definition GPUCoalescer.hh:447

gem5::ruby::GPUCoalescer::m_typeLatencyHist
std::vector< statistics::Histogram * > m_typeLatencyHist
Definition GPUCoalescer.hh:493

gem5::ruby::GPUCoalescer::GPUCoalescer
GPUCoalescer(const Params &)
Definition GPUCoalescer.cc:207

gem5::ruby::GPUCoalescer::print
void print(std::ostream &out) const
Definition GPUCoalescer.cc:742

gem5::ruby::GPUCoalescer::getMissMachLatencyHist
statistics::Histogram & getMissMachLatencyHist(uint32_t t) const
Definition GPUCoalescer.hh:354

gem5::ruby::GPUCoalescer::m_load_waiting_on_store_cycles
int m_load_waiting_on_store_cycles
Definition GPUCoalescer.hh:457

gem5::ruby::GPUCoalescer::coalescedTable
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
Definition GPUCoalescer.hh:438

gem5::ruby::GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition GPUCoalescer.hh:453

gem5::ruby::GPUCoalescer::m_missTypeLatencyHist
std::vector< statistics::Histogram * > m_missTypeLatencyHist
Definition GPUCoalescer.hh:498

gem5::ruby::GPUCoalescer::gmTokenPort
GMTokenPort gmTokenPort
Definition GPUCoalescer.hh:531

gem5::ruby::GPUCoalescer::m_missTypeMachLatencyHist
std::vector< std::vector< statistics::Histogram * > > m_missTypeMachLatencyHist
Definition GPUCoalescer.hh:504

gem5::ruby::GPUCoalescer::empty
bool empty() const
Definition GPUCoalescer.cc:635

gem5::ruby::GPUCoalescer::outstandingCount
int outstandingCount() const override
Definition GPUCoalescer.hh:318

gem5::ruby::GPUCoalescer::getMissTypeLatencyHist
statistics::Histogram & getMissTypeLatencyHist(uint32_t t)
Definition GPUCoalescer.hh:351

gem5::ruby::GPUCoalescer::m_instCache_ptr
CacheMemory * m_instCache_ptr
Definition GPUCoalescer.hh:422

gem5::ruby::GPUCoalescer::getLatencyHist
statistics::Histogram & getLatencyHist()
Definition GPUCoalescer.hh:345

gem5::ruby::GPUCoalescer::m_runningGarnetStandalone
bool m_runningGarnetStandalone
Definition GPUCoalescer.hh:460

gem5::ruby::GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition GPUCoalescer.hh:421

gem5::ruby::GPUCoalescer::m_outstandReqHist
statistics::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Definition GPUCoalescer.hh:489

gem5::ruby::GPUCoalescer::descheduleDeadlockEvent
void descheduleDeadlockEvent() override
Definition GPUCoalescer.hh:327

gem5::ruby::GPUCoalescer::wakeup
void wakeup()
Definition GPUCoalescer.cc:304

gem5::ruby::GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition GPUCoalescer.hh:415

gem5::ruby::GPUCoalescer::getDynInst
GPUDynInstPtr getDynInst(PacketPtr pkt) const
Definition GPUCoalescer.cc:750

gem5::ruby::GPUCoalescer::GPUCoalescer
GPUCoalescer(const GPUCoalescer &obj)

gem5::ruby::GPUCoalescer::deadlockCheckEvent
EventFunctionWrapper deadlockCheckEvent
Definition GPUCoalescer.hh:462

gem5::ruby::PendingWriteInst
Definition GPUCoalescer.hh:141

gem5::ruby::PendingWriteInst::addPendingReq
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
Definition GPUCoalescer.hh:153

gem5::ruby::PendingWriteInst::ackWriteCompletion
void ackWriteCompletion(bool usingRubyTester)
Definition GPUCoalescer.hh:177

gem5::ruby::PendingWriteInst::getNumPendingStores
int getNumPendingStores()
Definition GPUCoalescer.hh:198

gem5::ruby::PendingWriteInst::receiveWriteCompleteAck
bool receiveWriteCompleteAck()
Definition GPUCoalescer.hh:168

gem5::ruby::PendingWriteInst::PendingWriteInst
PendingWriteInst()
Definition GPUCoalescer.hh:143

gem5::ruby::PendingWriteInst::~PendingWriteInst
~PendingWriteInst()
Definition GPUCoalescer.hh:149

gem5::ruby::PendingWriteInst::originalPort
RubyPort::MemResponsePort * originalPort
Definition GPUCoalescer.hh:210

gem5::ruby::PendingWriteInst::numPendingStores
int numPendingStores
Definition GPUCoalescer.hh:204

gem5::ruby::PendingWriteInst::gpuDynInstPtr
GPUDynInstPtr gpuDynInstPtr
Definition GPUCoalescer.hh:213

gem5::ruby::RubyPort::MemResponsePort
Definition RubyPort.hh:83

gem5::ruby::RubyPort
Definition RubyPort.hh:65

gem5::ruby::UncoalescedTable
Definition GPUCoalescer.hh:68

gem5::ruby::UncoalescedTable::setPacketsRemaining
void setPacketsRemaining(InstSeqNum seqNum, int count)
Definition GPUCoalescer.cc:105

gem5::ruby::UncoalescedTable::reqTypeMap
std::map< InstSeqNum, RubyRequestType > reqTypeMap
Definition GPUCoalescer.hh:106

gem5::ruby::UncoalescedTable::insertPacket
void insertPacket(PacketPtr pkt)
Definition GPUCoalescer.cc:67

gem5::ruby::UncoalescedTable::printRequestTable
void printRequestTable(std::stringstream &ss)
Definition GPUCoalescer.cc:174

gem5::ruby::UncoalescedTable::areRequestsDone
bool areRequestsDone(const InstSeqNum instSeqNum)
Definition GPUCoalescer.cc:161

gem5::ruby::UncoalescedTable::packetAvailable
bool packetAvailable()
Definition GPUCoalescer.cc:85

gem5::ruby::UncoalescedTable::insertReqType
void insertReqType(PacketPtr pkt, RubyRequestType type)
Definition GPUCoalescer.cc:77

gem5::ruby::UncoalescedTable::instMap
std::map< InstSeqNum, PerInstPackets > instMap
Definition GPUCoalescer.hh:102

gem5::ruby::UncoalescedTable::coalescer
GPUCoalescer * coalescer
Definition GPUCoalescer.hh:96

gem5::ruby::UncoalescedTable::~UncoalescedTable
~UncoalescedTable()
Definition GPUCoalescer.hh:71

gem5::ruby::UncoalescedTable::updateResources
void updateResources()
Definition GPUCoalescer.cc:124

gem5::ruby::UncoalescedTable::UncoalescedTable
UncoalescedTable(GPUCoalescer *gc)
Definition GPUCoalescer.cc:61

gem5::ruby::UncoalescedTable::initPacketsRemaining
void initPacketsRemaining(InstSeqNum seqNum, int count)
Definition GPUCoalescer.cc:91

gem5::ruby::UncoalescedTable::getPacketsRemaining
int getPacketsRemaining(InstSeqNum seqNum)
Definition GPUCoalescer.cc:99

gem5::ruby::UncoalescedTable::checkDeadlock
void checkDeadlock(Tick threshold)
Definition GPUCoalescer.cc:185

gem5::ruby::UncoalescedTable::getInstPackets
PerInstPackets * getInstPackets(int offset)
Definition GPUCoalescer.cc:111

gem5::ruby::UncoalescedTable::instPktsRemaining
std::map< InstSeqNum, int > instPktsRemaining
Definition GPUCoalescer.hh:104

gem5::statistics::Histogram
A simple histogram stat.
Definition statistics.hh:2126

std::list
STL list class.
Definition stl.hh:51

std::vector
STL vector class.
Definition stl.hh:37

misc.hh

gpu_dyn_inst.hh

gem5::EventManager::deschedule
void deschedule(Event &event)
Definition eventq.hh:1021

gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458

gem5::ArmISA::t
Bitfield< 5 > t
Definition misc_types.hh:71

gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition types.hh:144

gem5::ArmISA::ss
Bitfield< 21 > ss
Definition misc_types.hh:60

gem5::MipsISA::pc
Bitfield< 4 > pc
Definition pra_constants.hh:243

gem5::MipsISA::r
r
Definition pra_constants.hh:98

gem5::X86ISA::count
count
Definition misc.hh:738

gem5::X86ISA::addr
Bitfield< 3 > addr
Definition types.hh:84

gem5::X86ISA::type
type
Definition misc.hh:762

gem5::ruby::PerInstPackets
std::list< PacketPtr > PerInstPackets
Definition GPUCoalescer.hh:65

gem5::ruby::operator<<
std::ostream & operator<<(std::ostream &os, const BoolVec &myvector)
Definition BoolVec.cc:49

gem5
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36

gem5::InvalidPortID
const PortID InvalidPortID
Definition types.hh:246

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147

gem5::PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245

gem5::Tick
uint64_t Tick
Tick count type.
Definition types.hh:58

gem5::InstSeqNum
uint64_t InstSeqNum
Definition inst_seq.hh:40

request.hh
Declaration of a request, the overall memory request consisting of the parts of the request that are ...

statistics.hh
Declaration of Statistics objects.

gem5::ComputeUnit::DataPort::SenderState
Definition compute_unit.hh:531

token_port.hh