38#include "debug/GPUCoalescer.hh"
39#include "debug/MemoryAccess.hh"
40#include "debug/ProtocolTrace.hh"
41#include "debug/RubyHitMiss.hh"
42#include "debug/RubyPort.hh"
43#include "debug/RubyStats.hh"
54#include "params/RubyGPUCoalescer.hh"
70 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
80 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
118 auto instMapIter =
instMap.begin();
119 std::advance(instMapIter,
offset);
121 return &(instMapIter->second);
134 assert(iter->second.empty());
145 if (!
coalescer->getRubySystem()->getWarmupEnabled() &&
146 !
coalescer->getRubySystem()->getCooldownEnabled()) {
147 if (
reqTypeMap[seq_num] != RubyRequestType_FLUSH) {
149 "Returning token seqNum %d\n", seq_num);
150 coalescer->getGMTokenPort().sendTokens(1);
167 ,inst.first, inst.second.size());
168 if (inst.first == instSeqNum) {
return false; }
177 ss <<
"Listing pending packets from " <<
instMap.size() <<
" instructions";
180 ss <<
"\tAddr: " <<
coalescer->printAddress(inst.first) <<
" with "
181 << inst.second.size() <<
" pending packets" << std::endl;
191 for (
auto &pkt : it.second) {
192 if (current_time - pkt->req->time() > threshold) {
193 std::stringstream
ss;
196 panic(
"Possible Deadlock detected. Aborting!\n"
197 "version: %d request.paddr: 0x%x uncoalescedTable: %d "
198 "current time: %u issue_time: %d difference: %d\n"
199 "Request Tables:\n\n%s",
coalescer->getId(),
200 pkt->getAddr(),
instMap.size(), current_time,
201 pkt->req->time(), current_time - pkt->req->time(),
212 uncoalescedTable(
this),
213 deadlockCheckEvent([
this]{ wakeup(); },
"GPUCoalescer deadlock check"),
215 gmTokenPort(
name() +
".gmTokenPort")
217 m_store_waiting_on_load_cycles = 0;
218 m_store_waiting_on_store_cycles = 0;
219 m_load_waiting_on_store_cycles = 0;
220 m_load_waiting_on_load_cycles = 0;
222 m_outstanding_count = 0;
224 coalescingWindow =
p.max_coalesces_per_cycle;
226 m_max_outstanding_requests = 0;
227 m_instCache_ptr =
nullptr;
228 m_dataCache_ptr =
nullptr;
230 m_instCache_ptr =
p.icache;
231 m_dataCache_ptr =
p.dcache;
232 m_max_outstanding_requests =
p.max_outstanding_requests;
233 m_deadlock_threshold =
p.deadlock_threshold;
235 assert(m_max_outstanding_requests > 0);
236 assert(m_deadlock_threshold > 0);
237 assert(m_instCache_ptr);
238 assert(m_dataCache_ptr);
240 m_runningGarnetStandalone =
p.garnet_standalone;
246 m_outstandReqHist.init(10);
247 m_latencyHist.init(10);
248 m_missLatencyHist.init(10);
250 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
252 m_typeLatencyHist[
i]->init(10);
255 m_missTypeLatencyHist[
i]->init(10);
258 for (
int i = 0;
i < MachineType_NUM;
i++) {
259 m_missMachLatencyHist.push_back(
new statistics::Histogram());
260 m_missMachLatencyHist[
i]->init(10);
262 m_IssueToInitialDelayHist.push_back(
new statistics::Histogram());
263 m_IssueToInitialDelayHist[
i]->init(10);
265 m_InitialToForwardDelayHist.push_back(
new statistics::Histogram());
266 m_InitialToForwardDelayHist[
i]->init(10);
268 m_ForwardToFirstResponseDelayHist.push_back(
269 new statistics::Histogram());
270 m_ForwardToFirstResponseDelayHist[
i]->init(10);
272 m_FirstResponseToCompletionDelayHist.push_back(
273 new statistics::Histogram());
274 m_FirstResponseToCompletionDelayHist[
i]->init(10);
277 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
278 m_missTypeMachLatencyHist.push_back(
279 std::vector<statistics::Histogram *>());
281 for (
int j = 0; j < MachineType_NUM; j++) {
282 m_missTypeMachLatencyHist[
i].push_back(
283 new statistics::Histogram());
284 m_missTypeMachLatencyHist[
i][j]->init(10);
297 if (if_name ==
"gmTokenPort") {
310 for (
auto& req : requestList.second) {
312 std::stringstream
ss;
314 warn(
"GPUCoalescer %d Possible deadlock detected!\n%s\n",
316 panic(
"Aborting due to deadlock!\n");
335 <<
" outstanding requests in the coalesced table\n";
338 for (
auto& request : requestList.second) {
340 <<
"\tInstruction sequence number: "
341 << request->getSeqNum() <<
"\n"
343 << RubyRequestType_to_string(request->getRubyType()) <<
"\n"
344 <<
"\t\tNumber of associated packets: "
345 << request->getPackets().size() <<
"\n"
346 <<
"\t\tIssue time: "
348 <<
"\t\tDifference from current tick: "
363 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
366 for (
int j = 0; j < MachineType_NUM; j++) {
371 for (
int i = 0;
i < MachineType_NUM;
i++) {
419 Cycles initialRequestTime,
420 Cycles forwardRequestTime,
424 initialRequestTime, forwardRequestTime, firstResponseTime,
432 Cycles initialRequestTime,
433 Cycles forwardRequestTime,
443 forwardRequestTime, firstResponseTime, isRegion,
false,
false);
463 " instSeqNum = %d\n", address, instSeqNum);
472 "reqsAllIssued=%d\n", reqsAllIssued,
497 bool externalHit =
false)
506 Cycles initialRequestTime,
507 Cycles forwardRequestTime,
509 bool externalHit =
false)
513 initialRequestTime, forwardRequestTime, firstResponseTime,
521 Cycles initialRequestTime,
522 Cycles forwardRequestTime,
525 bool externalHit =
false)
531 fatal_if(crequest->getRubyType() != RubyRequestType_LD,
532 "readCallback received non-read type response\n");
534 bool mshr_hit_under_miss =
false;
539 while (crequest->getRubyType() == RubyRequestType_LD) {
541 crequest->getIssueTime(), forwardRequestTime, firstResponseTime,
542 isRegion, externalHit, mshr_hit_under_miss);
554 if (!is_request_local) {
558 mshr_hit_under_miss =
true;
574 Cycles initialRequestTime,
575 Cycles forwardRequestTime,
578 bool externalHit =
false,
579 bool mshrHitUnderMiss =
false)
583 [[maybe_unused]]
Addr request_line_address =
590 DPRINTF(RubyHitMiss,
"GPU TCP Cache %s at %#x\n",
591 externalHit ?
"hit" :
"miss",
605 uint8_t* log =
nullptr;
607 pktList.size(), request_line_address);
610 for (
auto& pkt : pktList) {
613 request_address = pkt->
getAddr();
621 if (pkt->
getPtr<uint8_t>()) {
625 case RubyRequestType_ATOMIC_NO_RETURN:
628 case RubyRequestType_ST:
630 case RubyRequestType_LD:
633 case RubyRequestType_ATOMIC_RETURN:
643 log =
data.popAtomicLogEntryFront();
649 panic(
"Unsupported ruby packet type:%s\n",
650 RubyRequestType_to_string(type));
655 "WARNING. Data not transfered from Ruby to M5 for type " \
657 RubyRequestType_to_string(type));
660 assert(
data.numAtomicLogEntries() == 0);
677 RubyRequestType req_type = RubyRequestType_NULL;
680 assert(!pkt->
req->isLLSC());
681 assert(!pkt->
req->isLockedRMW());
682 assert(!pkt->
req->isInstFetch());
684 if (pkt->
req->isAtomicReturn()) {
685 req_type = RubyRequestType_ATOMIC_RETURN;
686 }
else if (pkt->
req->isAtomicNoReturn()) {
687 req_type = RubyRequestType_ATOMIC_NO_RETURN;
688 }
else if (pkt->
isRead()) {
689 req_type = RubyRequestType_LD;
691 req_type = RubyRequestType_ST;
693 req_type = RubyRequestType_FLUSH;
695 panic(
"Unsupported ruby packet type\n");
712 assert(pkt->
req->hasInstSeqNum());
732 for (
int i = 0;
i < TheGpuISA::NumVecElemPerVecReg;
i++) {
760 return RequestStatus_Issued;
763template <
class KEY,
class VALUE>
765operator<<(std::ostream &out,
const std::unordered_map<KEY, VALUE> &map)
768 for (
auto i = map.begin();
i != map.end(); ++
i)
769 out <<
" " <<
i->first <<
"=" <<
i->second;
799 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
807 auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
810 if (citer != creqQueue.end()) {
811 (*citer)->insertPacket(pkt);
855 " the pending write instruction list\n", seqNum,
903 }
else if (pkt_list->empty()) {
910 InstSeqNum seq_num = pkt_list->front()->req->getReqInstSeqNum();
914 size_t pkt_list_size = pkt_list->size();
926 for (
auto creq : creqs) {
928 RubyRequestType_to_string(creq->getRubyType()),
935 assert(pkt_list_size >= pkt_list->size());
936 size_t pkt_list_diff = pkt_list_size - pkt_list->size();
939 num_remaining -= pkt_list_diff;
940 assert(num_remaining >= 0);
944 "Coalesced %d pkts for seqNum %d, %d remaining\n",
945 pkt_list_diff, seq_num, num_remaining);
955 for (
int i = 0;
i <
len;
i++) {
987 fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
988 crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
989 crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
990 "atomicCallback saw non-atomic type response\n");
993 crequest->getIssueTime(),
Cycles(0),
Cycles(0),
false,
false);
1009 for (
auto& pkt : mylist) {
1018 assert(port != NULL);
1020 pkt->senderState =
ss->predecessor;
1044 rs->m_cache_recorder->enqueueNextFetchRequest();
1046 rs->m_cache_recorder->enqueueNextFlushRequest();
1055 Cycles initialRequestTime,
1056 Cycles forwardRequestTime,
1057 Cycles firstResponseTime,
1058 bool isRegion,
bool mshrHitUnderMiss)
1062 if (mshrHitUnderMiss) {
1075 if (type == RubyRequestType_LD) {
1076 stats.m_mshr_ld_hits_under_miss++;
1079 if (type == RubyRequestType_LD) {
1080 stats.m_mshr_ld_misses++;
1082 stats.m_mshr_st_misses++;
1090 "Number of load requests that hit in the coalescer MSHR"),
1092 "Number of load requests that miss in the coalescer MSHR"),
1094 "Number of store requests that miss in the coalescer MSHR"),
1096 "Number of mshr accesses",
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
Tick cyclesToTicks(Cycles c) const
Cycles is a wrapper class for representing cycle counts, i.e.
bool isGLCSet() const
Accessor functions for the cache bypass flags.
SenderState * senderState
This packet's sender state.
T * getPtr()
get a pointer to the data ptr.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
RequestPtr req
A pointer to the original request.
MemCmd cmd
The command field of the packet.
Ports are used to interface objects to each other.
void setIssueTime(Cycles _issueTime)
void insertPacket(PacketPtr pkt)
void setRubyType(RubyRequestType type)
PacketPtr getFirstPkt() const
RubyRequestType getRubyType() const
std::vector< PacketPtr > & getPackets()
virtual RubyRequestType getRequestType(PacketPtr pkt)
void writeCompleteCallback(Addr address, uint64_t instSeqNum, MachineType mach)
void writeCallback(Addr address, DataBlock &data)
std::vector< statistics::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
void evictionCallback(Addr address)
void kernelCallback(int wavefront_id)
virtual void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
virtual void issueMemSyncRequest(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
int m_max_outstanding_requests
std::vector< statistics::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
statistics::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void resetStats() override
Callback to reset stats.
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion, bool externalHit, bool mshrHitUnderMiss)
void recordStats(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion, bool mshrHitUnderMiss)
RubyGPUCoalescerParams Params
void printProgress(std::ostream &out) const
std::unordered_map< uint64_t, std::deque< CoalescedRequest * > > coalescedReqs
UncoalescedTable uncoalescedTable
void insertKernel(int wavefront_id, PacketPtr pkt)
std::unordered_map< int, PacketPtr > kernelEndList
virtual void issueRequest(CoalescedRequest *crequest)=0
statistics::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
bool coalescePacket(PacketPtr pkt)
std::vector< statistics::Histogram * > m_InitialToForwardDelayHist
Cycles m_deadlock_threshold
std::vector< statistics::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< statistics::Histogram * > m_ForwardToFirstResponseDelayHist
RequestStatus makeRequest(PacketPtr pkt) override
void readCallback(Addr address, DataBlock &data)
void completeHitCallback(std::vector< PacketPtr > &mylist)
gem5::ruby::GPUCoalescer::GPUCoalescerStats stats
std::unordered_map< uint64_t, PendingWriteInst > pendingWriteInsts
std::vector< statistics::Histogram * > m_typeLatencyHist
GPUCoalescer(const Params &)
void print(std::ostream &out) const
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< int > newKernelEnds
std::vector< statistics::Histogram * > m_missTypeLatencyHist
std::vector< std::vector< statistics::Histogram * > > m_missTypeMachLatencyHist
CacheMemory * m_dataCache_ptr
EventFunctionWrapper issueEvent
GPUDynInstPtr getDynInst(PacketPtr pkt) const
EventFunctionWrapper deadlockCheckEvent
void addPendingReq(RubyPort::MemResponsePort *port, GPUDynInstPtr inst, bool usingRubyTester)
void ackWriteCompletion(bool usingRubyTester)
int getNumPendingStores()
bool receiveWriteCompleteAck()
void hitCallback(PacketPtr pkt)
void ruby_hit_callback(PacketPtr pkt)
Addr makeLineAddress(Addr addr) const
std::string printAddress(Addr addr) const
RubySystem * m_ruby_system
RubyPort(const Params &p)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
void ruby_eviction_callback(Addr address)
Addr getOffset(Addr addr) const
void setPacketsRemaining(InstSeqNum seqNum, int count)
std::map< InstSeqNum, RubyRequestType > reqTypeMap
void insertPacket(PacketPtr pkt)
void printRequestTable(std::stringstream &ss)
bool areRequestsDone(const InstSeqNum instSeqNum)
void insertReqType(PacketPtr pkt, RubyRequestType type)
std::map< InstSeqNum, PerInstPackets > instMap
UncoalescedTable(GPUCoalescer *gc)
void initPacketsRemaining(InstSeqNum seqNum, int count)
int getPacketsRemaining(InstSeqNum seqNum)
void checkDeadlock(Tick threshold)
PerInstPackets * getInstPackets(int offset)
std::map< InstSeqNum, int > instPktsRemaining
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void schedule(Event &event, Tick when)
static const Priority Progress_Event_Pri
Progress events come at the end.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
std::list< PacketPtr > PerInstPackets
std::ostream & operator<<(std::ostream &os, const BoolVec &myvector)
Copyright (c) 2024 Arm Limited All rights reserved.
T safe_cast(U &&ref_or_ptr)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
uint64_t Tick
Tick count type.
Declaration of the Packet class.
GPUDynInstPtr _gpuDynInst
statistics::Scalar m_mshr_ld_misses
statistics::Scalar m_mshr_ld_hits_under_miss
statistics::Scalar m_mshr_st_misses
statistics::Formula m_mshr_accesses
GPUCoalescerStats(statistics::Group *parent)
const std::string & name()