38#include "debug/GPUCoalescer.hh"
39#include "debug/MemoryAccess.hh"
40#include "debug/ProtocolTrace.hh"
41#include "debug/RubyPort.hh"
42#include "debug/RubyStats.hh"
53#include "params/RubyGPUCoalescer.hh"
69 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
79 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
117 auto instMapIter =
instMap.begin();
118 std::advance(instMapIter,
offset);
120 return &(instMapIter->second);
133 assert(iter->second.empty());
146 if (
reqTypeMap[seq_num] != RubyRequestType_FLUSH) {
148 "Returning token seqNum %d\n", seq_num);
166 ,inst.first, inst.second.size());
167 if (inst.first == instSeqNum) {
return false; }
176 ss <<
"Listing pending packets from " <<
instMap.size() <<
" instructions";
180 << inst.second.size() <<
" pending packets" << std::endl;
190 for (
auto &pkt : it.second) {
191 if (current_time - pkt->req->time() > threshold) {
192 std::stringstream
ss;
195 panic(
"Possible Deadlock detected. Aborting!\n"
196 "version: %d request.paddr: 0x%x uncoalescedTable: %d "
197 "current time: %u issue_time: %d difference: %d\n"
199 pkt->getAddr(),
instMap.size(), current_time,
200 pkt->req->time(), current_time - pkt->req->time(),
209 issueEvent([this]{
completeIssue(); },
"Issue coalesced request",
211 uncoalescedTable(
this),
212 deadlockCheckEvent([
this]{ wakeup(); },
"GPUCoalescer deadlock check"),
213 gmTokenPort(
name() +
".gmTokenPort")
215 m_store_waiting_on_load_cycles = 0;
216 m_store_waiting_on_store_cycles = 0;
217 m_load_waiting_on_store_cycles = 0;
218 m_load_waiting_on_load_cycles = 0;
220 m_outstanding_count = 0;
222 coalescingWindow =
p.max_coalesces_per_cycle;
224 m_max_outstanding_requests = 0;
225 m_instCache_ptr =
nullptr;
226 m_dataCache_ptr =
nullptr;
228 m_instCache_ptr =
p.icache;
229 m_dataCache_ptr =
p.dcache;
230 m_max_outstanding_requests =
p.max_outstanding_requests;
231 m_deadlock_threshold =
p.deadlock_threshold;
233 assert(m_max_outstanding_requests > 0);
234 assert(m_deadlock_threshold > 0);
235 assert(m_instCache_ptr);
236 assert(m_dataCache_ptr);
238 m_runningGarnetStandalone =
p.garnet_standalone;
244 m_outstandReqHist.init(10);
245 m_latencyHist.init(10);
246 m_missLatencyHist.init(10);
248 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
250 m_typeLatencyHist[
i]->init(10);
253 m_missTypeLatencyHist[
i]->init(10);
256 for (
int i = 0;
i < MachineType_NUM;
i++) {
257 m_missMachLatencyHist.push_back(
new statistics::Histogram());
258 m_missMachLatencyHist[
i]->init(10);
260 m_IssueToInitialDelayHist.push_back(
new statistics::Histogram());
261 m_IssueToInitialDelayHist[
i]->init(10);
263 m_InitialToForwardDelayHist.push_back(
new statistics::Histogram());
264 m_InitialToForwardDelayHist[
i]->init(10);
266 m_ForwardToFirstResponseDelayHist.push_back(
267 new statistics::Histogram());
268 m_ForwardToFirstResponseDelayHist[
i]->init(10);
270 m_FirstResponseToCompletionDelayHist.push_back(
271 new statistics::Histogram());
272 m_FirstResponseToCompletionDelayHist[
i]->init(10);
275 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
276 m_missTypeMachLatencyHist.push_back(
279 for (
int j = 0; j < MachineType_NUM; j++) {
280 m_missTypeMachLatencyHist[
i].push_back(
281 new statistics::Histogram());
282 m_missTypeMachLatencyHist[
i][j]->init(10);
295 if (if_name ==
"gmTokenPort") {
308 for (
auto& req : requestList.second) {
310 std::stringstream
ss;
312 warn(
"GPUCoalescer %d Possible deadlock detected!\n%s\n",
314 panic(
"Aborting due to deadlock!\n");
333 <<
" outstanding requests in the coalesced table\n";
336 for (
auto& request : requestList.second) {
338 <<
"\tInstruction sequence number: "
339 << request->getSeqNum() <<
"\n"
341 << RubyRequestType_to_string(request->getRubyType()) <<
"\n"
342 <<
"\t\tNumber of associated packets: "
343 << request->getPackets().size() <<
"\n"
344 <<
"\t\tIssue time: "
346 <<
"\t\tDifference from current tick: "
361 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
364 for (
int j = 0; j < MachineType_NUM; j++) {
369 for (
int i = 0;
i < MachineType_NUM;
i++) {
417 Cycles initialRequestTime,
418 Cycles forwardRequestTime,
422 initialRequestTime, forwardRequestTime, firstResponseTime,
430 Cycles initialRequestTime,
431 Cycles forwardRequestTime,
441 forwardRequestTime, firstResponseTime, isRegion);
461 " instSeqNum = %d\n", address, instSeqNum);
470 "reqsAllIssued=%d\n", reqsAllIssued,
503 Cycles initialRequestTime,
504 Cycles forwardRequestTime,
509 initialRequestTime, forwardRequestTime, firstResponseTime,
517 Cycles initialRequestTime,
518 Cycles forwardRequestTime,
526 fatal_if(crequest->getRubyType() != RubyRequestType_LD,
527 "readCallback received non-read type response\n");
530 forwardRequestTime, firstResponseTime, isRegion);
547 Cycles initialRequestTime,
548 Cycles forwardRequestTime,
554 [[maybe_unused]]
Addr request_line_address =
571 uint8_t* log =
nullptr;
573 pktList.size(), request_line_address);
576 for (
auto& pkt : pktList) {
579 request_address = pkt->
getAddr();
587 if (pkt->
getPtr<uint8_t>()) {
591 case RubyRequestType_ATOMIC_NO_RETURN:
594 case RubyRequestType_ST:
596 case RubyRequestType_LD:
599 case RubyRequestType_ATOMIC_RETURN:
609 log =
data.popAtomicLogEntryFront();
615 panic(
"Unsupported ruby packet type:%s\n",
616 RubyRequestType_to_string(
type));
621 "WARNING. Data not transfered from Ruby to M5 for type " \
623 RubyRequestType_to_string(
type));
626 assert(
data.numAtomicLogEntries() == 0);
643 RubyRequestType req_type = RubyRequestType_NULL;
646 assert(!pkt->
req->isLLSC());
647 assert(!pkt->
req->isLockedRMW());
648 assert(!pkt->
req->isInstFetch());
650 if (pkt->
req->isAtomicReturn()) {
651 req_type = RubyRequestType_ATOMIC_RETURN;
652 }
else if (pkt->
req->isAtomicNoReturn()) {
653 req_type = RubyRequestType_ATOMIC_NO_RETURN;
654 }
else if (pkt->
isRead()) {
655 req_type = RubyRequestType_LD;
657 req_type = RubyRequestType_ST;
659 req_type = RubyRequestType_FLUSH;
661 panic(
"Unsupported ruby packet type\n");
678 assert(pkt->
req->hasInstSeqNum());
698 for (
int i = 0;
i < TheGpuISA::NumVecElemPerVecReg;
i++) {
726 return RequestStatus_Issued;
729template <
class KEY,
class VALUE>
731operator<<(std::ostream &out,
const std::unordered_map<KEY, VALUE> &map)
734 for (
auto i = map.begin();
i != map.end(); ++
i)
735 out <<
" " <<
i->first <<
"=" <<
i->second;
765 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
773 auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
776 if (citer != creqQueue.end()) {
777 (*citer)->insertPacket(pkt);
821 " the pending write instruction list\n", seqNum,
869 }
else if (pkt_list->empty()) {
876 InstSeqNum seq_num = pkt_list->front()->req->getReqInstSeqNum();
880 size_t pkt_list_size = pkt_list->size();
892 for (
auto creq : creqs) {
894 RubyRequestType_to_string(creq->getRubyType()),
901 assert(pkt_list_size >= pkt_list->size());
902 size_t pkt_list_diff = pkt_list_size - pkt_list->size();
905 num_remaining -= pkt_list_diff;
906 assert(num_remaining >= 0);
910 "Coalesced %d pkts for seqNum %d, %d remaining\n",
911 pkt_list_diff, seq_num, num_remaining);
921 for (
int i = 0;
i <
len;
i++) {
953 fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
954 crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
955 crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
956 "atomicCallback saw non-atomic type response\n");
975 for (
auto& pkt : mylist) {
984 assert(port != NULL);
986 pkt->senderState =
ss->predecessor;
1012 rs->m_cache_recorder->enqueueNextFlushRequest();
1021 Cycles initialRequestTime,
1022 Cycles forwardRequestTime,
1023 Cycles firstResponseTime,
1024 bool success,
bool isRegion)
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
SenderState * senderState
This packet's sender state.
T * getPtr()
get a pointer to the data ptr.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
RequestPtr req
A pointer to the original request.
MemCmd cmd
The command field of the packet.
Ports are used to interface objects to each other.
void sendTokens(int num_tokens)
Return num_tokens tokens back to the request port.
void enqueueNextFetchRequest()
Function for fetching warming up the memory and the caches.
void setIssueTime(Cycles _issueTime)
void insertPacket(PacketPtr pkt)
void setRubyType(RubyRequestType type)
PacketPtr getFirstPkt() const
RubyRequestType getRubyType() const
std::vector< PacketPtr > & getPackets()
virtual RubyRequestType getRequestType(PacketPtr pkt)
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
void writeCallback(Addr address, DataBlock &data)
std::vector< statistics::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
void evictionCallback(Addr address)
void kernelCallback(int wavefront_id)
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
virtual void issueMemSyncRequest(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
int m_max_outstanding_requests
GMTokenPort & getGMTokenPort()
std::vector< statistics::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
statistics::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void resetStats() override
Callback to reset stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
RubyGPUCoalescerParams Params
void printProgress(std::ostream &out) const
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
UncoalescedTable uncoalescedTable
void insertKernel(int wavefront_id, PacketPtr pkt)
std::unordered_map< int, PacketPtr > kernelEndList
virtual void issueRequest(CoalescedRequest *crequest)=0
statistics::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
bool coalescePacket(PacketPtr pkt)
std::vector< statistics::Histogram * > m_InitialToForwardDelayHist
Cycles m_deadlock_threshold
std::vector< statistics::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< statistics::Histogram * > m_ForwardToFirstResponseDelayHist
RequestStatus makeRequest(PacketPtr pkt) override
void readCallback(Addr address, DataBlock &data)
void completeHitCallback(std::vector< PacketPtr > &mylist)
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
std::vector< statistics::Histogram * > m_typeLatencyHist
GPUCoalescer(const Params &)
void print(std::ostream &out) const
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< int > newKernelEnds
std::vector< statistics::Histogram * > m_missTypeLatencyHist
std::vector< std::vector< statistics::Histogram * > > m_missTypeMachLatencyHist
EventFunctionWrapper issueEvent
GPUDynInstPtr getDynInst(PacketPtr pkt) const
EventFunctionWrapper deadlockCheckEvent
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
void ackWriteCompletion(bool usingRubyTester)
int getNumPendingStores()
bool receiveWriteCompleteAck()
void hitCallback(PacketPtr pkt)
void ruby_hit_callback(PacketPtr pkt)
RubySystem * m_ruby_system
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void ruby_eviction_callback(Addr address)
static bool getWarmupEnabled()
CacheRecorder * m_cache_recorder
static bool getCooldownEnabled()
void setPacketsRemaining(InstSeqNum seqNum, int count)
std::map< InstSeqNum, RubyRequestType > reqTypeMap
void insertPacket(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
bool areRequestsDone(const InstSeqNum instSeqNum)
void insertReqType(PacketPtr pkt, RubyRequestType type)
std::map< InstSeqNum, PerInstPackets > instMap
UncoalescedTable(GPUCoalescer *gc)
void initPacketsRemaining(InstSeqNum seqNum, int count)
int getPacketsRemaining(InstSeqNum seqNum)
void checkDeadlock(Tick threshold)
PerInstPackets * getInstPackets(int offset)
std::map< InstSeqNum, int > instPktsRemaining
void reset()
Reset stat value to default.
bool scheduled() const
Determine if the current event is scheduled.
void schedule(Event &event, Tick when)
static const Priority Progress_Event_Pri
Progress events come at the end.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Addr makeLineAddress(Addr addr)
Addr getOffset(Addr addr)
std::ostream & operator<<(std::ostream &os, const BoolVec &myvector)
std::string printAddress(Addr addr)
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
T safe_cast(U &&ref_or_ptr)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
Declaration of the Packet class.
GPUDynInstPtr _gpuDynInst
const std::string & name()