release/v20-0-0-0/VIPERCoalescer_8cc_source.html

 /*
  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * For use for simulation and test purposes only
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its
  * contributors may be used to endorse or promote products derived from this
  * software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */

 #include "base/logging.hh"
 #include "base/str.hh"
 #include "config/the_isa.hh"

 #if THE_ISA == X86_ISA
 #include "arch/x86/insts/microldstop.hh"

 #endif // X86_ISA
 #include "mem/ruby/system/VIPERCoalescer.hh"

 #include "cpu/testers/rubytest/RubyTester.hh"
 #include "debug/GPUCoalescer.hh"
 #include "debug/MemoryAccess.hh"
 #include "mem/packet.hh"
 #include "mem/ruby/common/SubBlock.hh"
 #include "mem/ruby/network/MessageBuffer.hh"
 #include "mem/ruby/profiler/Profiler.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
 #include "mem/ruby/slicc_interface/RubyRequest.hh"
 #include "mem/ruby/structures/CacheMemory.hh"
 #include "mem/ruby/system/GPUCoalescer.hh"
 #include "mem/ruby/system/RubySystem.hh"
 #include "params/VIPERCoalescer.hh"

 using namespace std;

 VIPERCoalescer *
 VIPERCoalescerParams::create()
 {
     return new VIPERCoalescer(this);
 }

 VIPERCoalescer::VIPERCoalescer(const Params *p)
     : GPUCoalescer(p)
 {
     m_max_wb_per_cycle=p->max_wb_per_cycle;
     m_max_inv_per_cycle=p->max_inv_per_cycle;
     m_outstanding_inv = 0;
     m_outstanding_wb = 0;
 }

 VIPERCoalescer::~VIPERCoalescer()
 {
 }

 // Analyzes the packet to see if this request can be coalesced.
 // If request can be coalesced, this request is added to the reqCoalescer table
 // and makeRequest returns RequestStatus_Issued;
 // If this is the first request to a cacheline, request is added to both
 // newRequests queue and to the reqCoalescer table; makeRequest
 // returns RequestStatus_Issued.
 // If there is a pending request to this cacheline and this request
 // can't be coalesced, RequestStatus_Aliased is returned and
 // the packet needs to be reissued.
 RequestStatus
 VIPERCoalescer::makeRequest(PacketPtr pkt)
 {
     if (m_outstanding_wb | m_outstanding_inv) {
         DPRINTF(GPUCoalescer,
                 "There are %d Writebacks and %d Invalidatons\n",
                 m_outstanding_wb, m_outstanding_inv);
     }
     // Are we in the middle of a release
     if ((m_outstanding_wb) > 0) {
         if (pkt->req->isKernel()) {
             // Everythign is fine
             // Barriers and Kernel End scan coalesce
             // If it is a Kerenl Begin flush the cache
             if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
                 invL1();
             }

             if (pkt->req->isRelease()) {
                 insertKernel(pkt->req->contextId(), pkt);
             }

             return RequestStatus_Issued;
         }
 //        return RequestStatus_Aliased;
     } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
         // Flush Dirty Data on Kernel End
         // isKernel + isRelease
         insertKernel(pkt->req->contextId(), pkt);
         wbL1();
         if (m_outstanding_wb == 0) {
             for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
                 newKernelEnds.push_back(it->first);
             }
             completeIssue();
         }
         return RequestStatus_Issued;
     }
     RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
     if (requestStatus!=RequestStatus_Issued) {
         // Request not isssued
         // enqueue Retry
         DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
         return requestStatus;
     } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
         // Invalidate clean Data on Kernel Begin
         // isKernel + isAcquire
         invL1();
     } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
         // Deschedule the AtomicAcqRel and
         // Flush and Invalidate the L1 cache
         invwbL1();
         if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
             DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
             deschedule(issueEvent);
         }
     } else if (pkt->req->isRelease()) {
         // Deschedule the StoreRel and
         // Flush the L1 cache
         wbL1();
         if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
             DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
             deschedule(issueEvent);
         }
     } else if (pkt->req->isAcquire()) {
         // LoadAcq or AtomicAcq
         // Invalidate the L1 cache
         invL1();
     }
     // Request was successful
     if (m_outstanding_wb == 0) {
         if (!issueEvent.scheduled()) {
             DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
             schedule(issueEvent, curTick());
         }
     }
     return RequestStatus_Issued;
 }

 void
 VIPERCoalescer::wbCallback(Addr addr)
 {
     m_outstanding_wb--;
     // if L1 Flush Complete
     // attemnpt to schedule issueEvent
     assert(((int) m_outstanding_wb) >= 0);
     if (m_outstanding_wb == 0) {
         for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
             newKernelEnds.push_back(it->first);
         }
         completeIssue();
     }
     trySendRetries();
 }

 void
 VIPERCoalescer::invCallback(Addr addr)
 {
     m_outstanding_inv--;
     // if L1 Flush Complete
     // attemnpt to schedule issueEvent
     // This probably won't happen, since
     // we dont wait on cache invalidations
     if (m_outstanding_wb == 0) {
         for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
             newKernelEnds.push_back(it->first);
         }
         completeIssue();
     }
     trySendRetries();
 }

 void
 VIPERCoalescer::invL1()
 {
     int size = m_dataCache_ptr->getNumBlocks();
     DPRINTF(GPUCoalescer,
             "There are %d Invalidations outstanding before Cache Walk\n",
             m_outstanding_inv);
     // Walk the cache
     for (int i = 0; i < size; i++) {
         Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
         // Evict Read-only data
         RubyRequestType request_type = RubyRequestType_REPLACEMENT;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
             clockEdge(), addr, (uint8_t*) 0, 0, 0,
             request_type, RubyAccessMode_Supervisor,
             nullptr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
                             m_controller->mandatoryQueueLatency(request_type));
         assert(latency > 0);
         m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
         m_outstanding_inv++;
     }
     DPRINTF(GPUCoalescer,
             "There are %d Invalidatons outstanding after Cache Walk\n",
             m_outstanding_inv);
 }

 void
 VIPERCoalescer::wbL1()
 {
     int size = m_dataCache_ptr->getNumBlocks();
     DPRINTF(GPUCoalescer,
             "There are %d Writebacks outstanding before Cache Walk\n",
             m_outstanding_wb);
     // Walk the cache
     for (int i = 0; i < size; i++) {
         Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
         // Write dirty data back
         RubyRequestType request_type = RubyRequestType_FLUSH;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
             clockEdge(), addr, (uint8_t*) 0, 0, 0,
             request_type, RubyAccessMode_Supervisor,
             nullptr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
                             m_controller->mandatoryQueueLatency(request_type));
         assert(latency > 0);
         m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
         m_outstanding_wb++;
     }
     DPRINTF(GPUCoalescer,
             "There are %d Writebacks outstanding after Cache Walk\n",
             m_outstanding_wb);
 }

 void
 VIPERCoalescer::invwbL1()
 {
     int size = m_dataCache_ptr->getNumBlocks();
     // Walk the cache
     for (int i = 0; i < size; i++) {
         Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
         // Evict Read-only data
         RubyRequestType request_type = RubyRequestType_REPLACEMENT;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
             clockEdge(), addr, (uint8_t*) 0, 0, 0,
             request_type, RubyAccessMode_Supervisor,
             nullptr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
                             m_controller->mandatoryQueueLatency(request_type));
         assert(latency > 0);
         m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
         m_outstanding_inv++;
     }
     // Walk the cache
     for (int i = 0; i< size; i++) {
         Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
         // Write dirty data back
         RubyRequestType request_type = RubyRequestType_FLUSH;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
             clockEdge(), addr, (uint8_t*) 0, 0, 0,
             request_type, RubyAccessMode_Supervisor,
             nullptr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
                 m_controller->mandatoryQueueLatency(request_type));
         assert(latency > 0);
         m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
         m_outstanding_wb++;
     }
 }
CacheMemory.hh

str.hh

DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:225

logging.hh

GPUCoalescer::insertKernel
void insertKernel(int wavefront_id, PacketPtr pkt)
Definition: GPUCoalescer.cc:293

GPUCoalescer::issueEvent
EventFunctionWrapper issueEvent
Definition: GPUCoalescer.hh:256

ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63

AbstractController.hh

RubyPort::m_controller
AbstractController * m_controller
Definition: RubyPort.hh:189

addr
ip6_addr_t addr
Definition: inet.hh:330

RubyPort::trySendRetries
void trySendRetries()
Definition: RubyPort.cc:449

VIPERCoalescer::wbCallback
void wbCallback(Addr address)
Definition: VIPERCoalescer.cc:168

std
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587

VIPERCoalescer
Definition: VIPERCoalescer.hh:54

SubBlock.hh

VIPERCoalescer::m_outstanding_wb
uint64_t m_outstanding_wb
Definition: VIPERCoalescer.hh:68

GPUCoalescer::completeIssue
void completeIssue()
Definition: GPUCoalescer.cc:995

VIPERCoalescer::m_outstanding_inv
uint64_t m_outstanding_inv
Definition: VIPERCoalescer.hh:67

Profiler.hh

Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:321

GPUCoalescer::Params
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:97

VIPERCoalescer::wbL1
void wbL1()
Writeback L1 cache (Release)
Definition: VIPERCoalescer.cc:235

VIPERCoalescer::m_max_wb_per_cycle
uint64_t m_max_wb_per_cycle
Definition: VIPERCoalescer.hh:70

Clocked::cyclesToTicks
Tick cyclesToTicks(Cycles c) const
Definition: clocked_object.hh:224

curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:44

GPUCoalescer
Definition: GPUCoalescer.hh:94

VIPERCoalescer::invCallback
void invCallback(Addr address)
Definition: VIPERCoalescer.cc:184

GPUCoalescer::m_dataCache_ptr
CacheMemory * m_dataCache_ptr
Definition: GPUCoalescer.hh:264

RubyTester.hh

Tick
uint64_t Tick
Tick count type.
Definition: types.hh:61

VIPERCoalescer.hh

GPUCoalescer::makeRequest
virtual RequestStatus makeRequest(PacketPtr pkt) override
Definition: GPUCoalescer.cc:692

VIPERCoalescer::invL1
void invL1()
Invalidate L1 cache (Acquire)
Definition: VIPERCoalescer.cc:204

VIPERCoalescer::makeRequest
RequestStatus makeRequest(PacketPtr pkt)
Definition: VIPERCoalescer.cc:89

EventManager::deschedule
void deschedule(Event &event)
Definition: eventq.hh:943

RubySystem.hh

EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:934

microldstop.hh

Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140

Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249

GPUCoalescer::kernelEndList
std::unordered_map< int, PacketPtr > kernelEndList
Definition: GPUCoalescer.hh:281

Clocked::clockEdge
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
Definition: clocked_object.hh:174

CacheMemory::getAddressAtIdx
Addr getAddressAtIdx(int idx) const
Definition: CacheMemory.cc:153

RubyRequest.hh

Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:459

RubyPort::m_mandatory_q_ptr
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:190

GPUCoalescer.hh

GPUCoalescer::newKernelEnds
std::vector< int > newKernelEnds
Definition: GPUCoalescer.hh:282

packet.hh
Declaration of the Packet class.

AbstractController::mandatoryQueueLatency
virtual Cycles mandatoryQueueLatency(const RubyRequestType &param_type)
Definition: AbstractController.hh:112

VIPERCoalescer::~VIPERCoalescer
~VIPERCoalescer()
Definition: VIPERCoalescer.cc:75

MessageBuffer.hh

VIPERCoalescer::invwbL1
void invwbL1()
Invalidate and Writeback L1 cache (Acquire&Release)
Definition: VIPERCoalescer.cc:266

VIPERCoalescer::m_max_inv_per_cycle
uint64_t m_max_inv_per_cycle
Definition: VIPERCoalescer.hh:69

MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323

CacheMemory::getNumBlocks
int getNumBlocks() const
Definition: CacheMemory.hh:155

MessageBuffer::enqueue
void enqueue(MsgPtr message, Tick curTime, Tick delta)
Definition: MessageBuffer.cc:162

VIPERCoalescer::VIPERCoalescer
VIPERCoalescer(const Params *)
Definition: VIPERCoalescer.cc:66