38#include "debug/GPUCoalescer.hh" 
   39#include "debug/MemoryAccess.hh" 
   40#include "debug/ProtocolTrace.hh" 
   41#include "debug/RubyPort.hh" 
   42#include "debug/RubyStats.hh" 
   53#include "params/RubyGPUCoalescer.hh" 
   69    uint64_t seqNum = pkt->
req->getReqInstSeqNum();
 
  109    auto instMapIter = 
instMap.begin();
 
  110    std::advance(instMapIter, 
offset);
 
  112    return &(instMapIter->second);
 
  125            assert(iter->second.empty());
 
  146            ,inst.first, inst.second.size());
 
  147        if (inst.first == instSeqNum) { 
return false; }
 
  156    ss << 
"Listing pending packets from " << 
instMap.size() << 
" instructions";
 
  160           << inst.second.size() << 
" pending packets" << std::endl;
 
  170        for (
auto &pkt : it.second) {
 
  171            if (current_time - pkt->req->time() > threshold) {
 
  172                std::stringstream 
ss;
 
  175                panic(
"Possible Deadlock detected. Aborting!\n" 
  176                     "version: %d request.paddr: 0x%x uncoalescedTable: %d " 
  177                     "current time: %u issue_time: %d difference: %d\n" 
  179                      pkt->getAddr(), 
instMap.size(), current_time,
 
  180                      pkt->req->time(), current_time - pkt->req->time(),
 
  189      issueEvent([this]{ 
completeIssue(); }, 
"Issue coalesced request",
 
  191      uncoalescedTable(
this),
 
  192      deadlockCheckEvent([
this]{ wakeup(); }, 
"GPUCoalescer deadlock check"),
 
  193      gmTokenPort(
name() + 
".gmTokenPort")
 
  195    m_store_waiting_on_load_cycles = 0;
 
  196    m_store_waiting_on_store_cycles = 0;
 
  197    m_load_waiting_on_store_cycles = 0;
 
  198    m_load_waiting_on_load_cycles = 0;
 
  200    m_outstanding_count = 0;
 
  202    coalescingWindow = 
p.max_coalesces_per_cycle;
 
  204    m_max_outstanding_requests = 0;
 
  205    m_instCache_ptr = 
nullptr;
 
  206    m_dataCache_ptr = 
nullptr;
 
  208    m_instCache_ptr = 
p.icache;
 
  209    m_dataCache_ptr = 
p.dcache;
 
  210    m_max_outstanding_requests = 
p.max_outstanding_requests;
 
  211    m_deadlock_threshold = 
p.deadlock_threshold;
 
  213    assert(m_max_outstanding_requests > 0);
 
  214    assert(m_deadlock_threshold > 0);
 
  215    assert(m_instCache_ptr);
 
  216    assert(m_dataCache_ptr);
 
  218    m_runningGarnetStandalone = 
p.garnet_standalone;
 
  224    m_outstandReqHist.init(10);
 
  225    m_latencyHist.init(10);
 
  226    m_missLatencyHist.init(10);
 
  228    for (
int i = 0; 
i < RubyRequestType_NUM; 
i++) {
 
  230        m_typeLatencyHist[
i]->init(10);
 
  233        m_missTypeLatencyHist[
i]->init(10);
 
  236    for (
int i = 0; 
i < MachineType_NUM; 
i++) {
 
  237        m_missMachLatencyHist.push_back(
new statistics::Histogram());
 
  238        m_missMachLatencyHist[
i]->init(10);
 
  240        m_IssueToInitialDelayHist.push_back(
new statistics::Histogram());
 
  241        m_IssueToInitialDelayHist[
i]->init(10);
 
  243        m_InitialToForwardDelayHist.push_back(
new statistics::Histogram());
 
  244        m_InitialToForwardDelayHist[
i]->init(10);
 
  246        m_ForwardToFirstResponseDelayHist.push_back(
 
  247            new statistics::Histogram());
 
  248        m_ForwardToFirstResponseDelayHist[
i]->init(10);
 
  250        m_FirstResponseToCompletionDelayHist.push_back(
 
  251            new statistics::Histogram());
 
  252        m_FirstResponseToCompletionDelayHist[
i]->init(10);
 
  255    for (
int i = 0; 
i < RubyRequestType_NUM; 
i++) {
 
  256        m_missTypeMachLatencyHist.push_back(
 
  259        for (
int j = 0; 
j < MachineType_NUM; 
j++) {
 
  260            m_missTypeMachLatencyHist[
i].push_back(
 
  261                new statistics::Histogram());
 
  262            m_missTypeMachLatencyHist[
i][
j]->init(10);
 
  275    if (if_name == 
"gmTokenPort") {
 
  288        for (
auto& req : requestList.second) {
 
  290                std::stringstream 
ss;
 
  292                warn(
"GPUCoalescer %d Possible deadlock detected!\n%s\n",
 
  294                panic(
"Aborting due to deadlock!\n");
 
  313       << 
" outstanding requests in the coalesced table\n";
 
  316        for (
auto& request : requestList.second) {
 
  318               << 
"\tInstruction sequence number: " 
  319               << request->getSeqNum() << 
"\n" 
  321               << RubyRequestType_to_string(request->getRubyType()) << 
"\n" 
  322               << 
"\t\tNumber of associated packets: " 
  323               << request->getPackets().size() << 
"\n" 
  324               << 
"\t\tIssue time: " 
  326               << 
"\t\tDifference from current tick: " 
  340    for (
int i = 0; 
i < RubyRequestType_NUM; 
i++) {
 
  343        for (
int j = 0; 
j < MachineType_NUM; 
j++) {
 
  348    for (
int i = 0; 
i < MachineType_NUM; 
i++) {
 
  396                         Cycles initialRequestTime,
 
  397                         Cycles forwardRequestTime,
 
  401                  initialRequestTime, forwardRequestTime, firstResponseTime,
 
  409                         Cycles initialRequestTime,
 
  410                         Cycles forwardRequestTime,
 
  420                forwardRequestTime, firstResponseTime, isRegion);
 
  440            " instSeqNum = %d\n", address, instSeqNum);
 
  449                    "reqsAllIssued=%d\n", reqsAllIssued,
 
  482                        Cycles initialRequestTime,
 
  483                        Cycles forwardRequestTime,
 
  488                 initialRequestTime, forwardRequestTime, firstResponseTime,
 
  496                        Cycles initialRequestTime,
 
  497                        Cycles forwardRequestTime,
 
  505    fatal_if(crequest->getRubyType() != RubyRequestType_LD,
 
  506             "readCallback received non-read type response\n");
 
  510    while (crequest->getRubyType() == RubyRequestType_LD) {
 
  512                    forwardRequestTime, firstResponseTime, isRegion);
 
  536                       Cycles initialRequestTime,
 
  537                       Cycles forwardRequestTime,
 
  543    [[maybe_unused]] 
Addr request_line_address =
 
  560            pktList.size(), request_line_address);
 
  561    for (
auto& pkt : pktList) {
 
  562        request_address = pkt->
getAddr();
 
  563        if (pkt->
getPtr<uint8_t>()) {
 
  564            if ((
type == RubyRequestType_LD) ||
 
  565                (
type == RubyRequestType_ATOMIC) ||
 
  566                (
type == RubyRequestType_ATOMIC_RETURN) ||
 
  567                (
type == RubyRequestType_IFETCH) ||
 
  568                (
type == RubyRequestType_RMW_Read) ||
 
  569                (
type == RubyRequestType_Locked_RMW_Read) ||
 
  570                (
type == RubyRequestType_Load_Linked)) {
 
  579                    "WARNING.  Data not transfered from Ruby to M5 for type " \
 
  581                    RubyRequestType_to_string(
type));
 
  600    RubyRequestType req_type = RubyRequestType_NULL;
 
  603    assert(!pkt->
req->isLLSC());
 
  604    assert(!pkt->
req->isLockedRMW());
 
  605    assert(!pkt->
req->isInstFetch());
 
  608    if (pkt->
req->isAtomicReturn()) {
 
  609        req_type = RubyRequestType_ATOMIC_RETURN;
 
  610    } 
else if (pkt->
req->isAtomicNoReturn()) {
 
  611        req_type = RubyRequestType_ATOMIC_NO_RETURN;
 
  612    } 
else if (pkt->
isRead()) {
 
  613        req_type = RubyRequestType_LD;
 
  615        req_type = RubyRequestType_ST;
 
  617        panic(
"Unsupported ruby packet type\n");
 
  629    assert(pkt->
req->hasInstSeqNum());
 
  648            for (
int i = 0; 
i < TheGpuISA::NumVecElemPerVecReg; 
i++) {
 
  674    return RequestStatus_Issued;
 
  677template <
class KEY, 
class VALUE>
 
  679operator<<(std::ostream &out, 
const std::unordered_map<KEY, VALUE> &map)
 
  682    for (
auto i = map.begin(); 
i != map.end(); ++
i)
 
  683        out << 
" " << 
i->first << 
"=" << 
i->second;
 
  701            safe_cast<RubyPort::SenderState*>(pkt->
senderState);
 
  704        safe_cast<ComputeUnit::DataPort::SenderState*>
 
  713    uint64_t seqNum = pkt->
req->getReqInstSeqNum();
 
  721        auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
 
  724        if (citer != creqQueue.end()) {
 
  725            (*citer)->insertPacket(pkt);
 
  769                    " the pending write instruction list\n", seqNum,
 
  773                    safe_cast<RubyPort::SenderState*>(pkt->
senderState);
 
  817        } 
else if (pkt_list->empty()) {
 
  824            InstSeqNum seq_num = pkt_list->front()->req->getReqInstSeqNum();
 
  828            size_t pkt_list_size = pkt_list->size();
 
  840                for (
auto creq : creqs) {
 
  842                            RubyRequestType_to_string(creq->getRubyType()),
 
  849            assert(pkt_list_size >= pkt_list->size());
 
  850            size_t pkt_list_diff = pkt_list_size - pkt_list->size();
 
  853            num_remaining -= pkt_list_diff;
 
  854            assert(num_remaining >= 0);
 
  858                    "Coalesced %d pkts for seqNum %d, %d remaining\n",
 
  859                    pkt_list_diff, seq_num, num_remaining);
 
  869    for (
int i = 0; 
i < 
len; 
i++) {
 
  901    fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
 
  902              crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
 
  903              crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
 
  904             "atomicCallback saw non-atomic type response\n");
 
  923    for (
auto& pkt : mylist) {
 
  925            safe_cast<RubyPort::SenderState *>(pkt->senderState);
 
  927        assert(port != NULL);
 
  929        pkt->senderState = 
ss->predecessor;
 
  956                                Cycles initialRequestTime,
 
  957                                Cycles forwardRequestTime,
 
  959                                bool success, 
bool isRegion)
 
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
SenderState * senderState
This packet's sender state.
T * getPtr()
get a pointer to the data ptr.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
RequestPtr req
A pointer to the original request.
MemCmd cmd
The command field of the packet.
Ports are used to interface objects to each other.
void sendTokens(int num_tokens)
Return num_tokens tokens back to the request port.
void setIssueTime(Cycles _issueTime)
void insertPacket(PacketPtr pkt)
void setRubyType(RubyRequestType type)
PacketPtr getFirstPkt() const
RubyRequestType getRubyType() const
std::vector< PacketPtr > & getPackets()
virtual RubyRequestType getRequestType(PacketPtr pkt)
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
void writeCallback(Addr address, DataBlock &data)
std::vector< statistics::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
void evictionCallback(Addr address)
void kernelCallback(int wavefront_id)
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
virtual void issueMemSyncRequest(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
int m_max_outstanding_requests
GMTokenPort & getGMTokenPort()
std::vector< statistics::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
statistics::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void resetStats() override
Callback to reset stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
RubyGPUCoalescerParams Params
void printProgress(std::ostream &out) const
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
UncoalescedTable uncoalescedTable
void insertKernel(int wavefront_id, PacketPtr pkt)
std::unordered_map< int, PacketPtr > kernelEndList
virtual void issueRequest(CoalescedRequest *crequest)=0
statistics::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
bool coalescePacket(PacketPtr pkt)
std::vector< statistics::Histogram * > m_InitialToForwardDelayHist
Cycles m_deadlock_threshold
std::vector< statistics::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< statistics::Histogram * > m_ForwardToFirstResponseDelayHist
RequestStatus makeRequest(PacketPtr pkt) override
void readCallback(Addr address, DataBlock &data)
void completeHitCallback(std::vector< PacketPtr > &mylist)
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
std::vector< statistics::Histogram * > m_typeLatencyHist
GPUCoalescer(const Params &)
void print(std::ostream &out) const
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< int > newKernelEnds
std::vector< statistics::Histogram * > m_missTypeLatencyHist
std::vector< std::vector< statistics::Histogram * > > m_missTypeMachLatencyHist
EventFunctionWrapper issueEvent
GPUDynInstPtr getDynInst(PacketPtr pkt) const
EventFunctionWrapper deadlockCheckEvent
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
void ackWriteCompletion(bool usingRubyTester)
int getNumPendingStores()
bool receiveWriteCompleteAck()
void hitCallback(PacketPtr pkt)
void ruby_hit_callback(PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void ruby_eviction_callback(Addr address)
void setPacketsRemaining(InstSeqNum seqNum, int count)
void insertPacket(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
bool areRequestsDone(const InstSeqNum instSeqNum)
std::map< InstSeqNum, PerInstPackets > instMap
UncoalescedTable(GPUCoalescer *gc)
void initPacketsRemaining(InstSeqNum seqNum, int count)
int getPacketsRemaining(InstSeqNum seqNum)
void checkDeadlock(Tick threshold)
PerInstPackets * getInstPackets(int offset)
std::map< InstSeqNum, int > instPktsRemaining
void reset()
Reset stat value to default.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority Progress_Event_Pri
Progress events come at the end.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Addr makeLineAddress(Addr addr)
Addr getOffset(Addr addr)
std::string printAddress(Addr addr)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::ostream & operator<<(std::ostream &os, const ArmSemihosting::InPlaceArg &ipa)
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
Declaration of the Packet class.
GPUDynInstPtr _gpuDynInst
const std::string & name()