36 #include "config/the_isa.hh" 38 #if THE_ISA == X86_ISA 45 #include "debug/GPUCoalescer.hh" 46 #include "debug/MemoryAccess.hh" 47 #include "debug/ProtocolTrace.hh" 48 #include "debug/RubyPort.hh" 49 #include "debug/RubyStats.hh" 60 #include "params/RubyGPUCoalescer.hh" 65 RubyGPUCoalescerParams::create()
73 HSAScope accessScope = HSAScope_UNSPECIFIED;
74 if (req->isScoped()) {
75 if (req->isWavefrontScope()) {
76 accessScope = HSAScope_WAVEFRONT;
77 }
else if (req->isWorkgroupScope()) {
78 accessScope = HSAScope_WORKGROUP;
79 }
else if (req->isDeviceScope()) {
80 accessScope = HSAScope_DEVICE;
81 }
else if (req->isSystemScope()) {
82 accessScope = HSAScope_SYSTEM;
84 fatal(
"Bad scope type");
93 HSASegment accessSegment = HSASegment_GLOBAL;
95 if (req->isGlobalSegment()) {
96 accessSegment = HSASegment_GLOBAL;
97 }
else if (req->isGroupSegment()) {
98 accessSegment = HSASegment_GROUP;
99 }
else if (req->isPrivateSegment()) {
100 accessSegment = HSASegment_PRIVATE;
101 }
else if (req->isKernargSegment()) {
102 accessSegment = HSASegment_KERNARG;
103 }
else if (req->isReadonlySegment()) {
104 accessSegment = HSASegment_READONLY;
105 }
else if (req->isSpillSegment()) {
106 accessSegment = HSASegment_SPILL;
107 }
else if (req->isArgSegment()) {
108 accessSegment = HSASegment_ARG;
110 fatal(
"Bad segment type");
113 return accessSegment;
124 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
126 instMap[seqNum].push_back(pkt);
140 if (offset >=
instMap.size()) {
144 auto instMapIter =
instMap.begin();
145 std::advance(instMapIter, offset);
147 return &(instMapIter->second);
154 if (iter->second.empty()) {
166 ss <<
"UncoalescedTable contains " <<
instMap.size()
167 <<
" address entries." << std::endl;
169 ss <<
"Addr 0x" << std::hex << inst.first << std::dec
170 <<
" with " << inst.second.size() <<
" packets" 181 for (
auto &pkt : it.second) {
182 if (current_time - pkt->req->time() > threshold) {
183 std::stringstream
ss;
186 panic(
"Possible Deadlock detected. Aborting!\n" 187 "version: %d request.paddr: 0x%x uncoalescedTable: %d " 188 "current time: %u issue_time: %d difference: %d\n" 190 pkt->getAddr(), instMap.size(), current_time,
191 pkt->req->time(), current_time - pkt->req->time(),
200 issueEvent([this]{
completeIssue(); },
"Issue coalesced request",
240 if (if_name ==
"gmTokenPort") {
253 for (
auto& req : requestList.second) {
255 std::stringstream
ss;
260 panic(
"Possible Deadlock detected. Aborting!\n" 261 "version: %d request.paddr: 0x%x coalescedTable: %d " 262 "current time: %u issue_time: %d difference: %d\n" 264 req->getFirstPkt()->getAddr(),
289 <<
" address entries." << std::endl;
291 ss <<
"Addr 0x" << std::hex << requestList.first << std::dec
293 for (
auto& request : requestList.second) {
294 ss << RubyRequestType_to_string(request->getRubyType())
295 <<
" pkts-" << request->getPackets().size()
296 <<
" issued-" << request->getIssueTime() <<
" seqNum-" 297 << request->getSeqNum() <<
"; ";
308 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
311 for (
int j = 0;
j < MachineType_NUM;
j++) {
316 for (
int i = 0;
i < MachineType_NUM;
i++) {
343 kernelEndList.size());
364 Cycles initialRequestTime,
365 Cycles forwardRequestTime,
369 initialRequestTime, forwardRequestTime, firstResponseTime,
377 Cycles initialRequestTime,
378 Cycles forwardRequestTime,
387 hitCallback(crequest, mach, data,
true, crequest->getIssueTime(),
388 forwardRequestTime, firstResponseTime, isRegion);
419 Cycles initialRequestTime,
420 Cycles forwardRequestTime,
425 initialRequestTime, forwardRequestTime, firstResponseTime,
433 Cycles initialRequestTime,
434 Cycles forwardRequestTime,
442 fatal_if(crequest->getRubyType() != RubyRequestType_LD,
443 "readCallback received non-read type response\n");
447 while (crequest->getRubyType() == RubyRequestType_LD) {
448 hitCallback(crequest, mach, data,
true, crequest->getIssueTime(),
449 forwardRequestTime, firstResponseTime, isRegion);
473 Cycles initialRequestTime,
474 Cycles forwardRequestTime,
496 pktList.size(), request_line_address);
497 for (
auto& pkt : pktList) {
498 request_address = pkt->
getAddr();
499 if (pkt->
getPtr<uint8_t>()) {
500 if ((type == RubyRequestType_LD) ||
501 (type == RubyRequestType_ATOMIC) ||
502 (type == RubyRequestType_ATOMIC_RETURN) ||
503 (type == RubyRequestType_IFETCH) ||
504 (type == RubyRequestType_RMW_Read) ||
505 (type == RubyRequestType_Locked_RMW_Read) ||
506 (type == RubyRequestType_Load_Linked)) {
515 "WARNING. Data not transfered from Ruby to M5 for type " \
517 RubyRequestType_to_string(type));
552 RubyRequestType req_type = RubyRequestType_NULL;
555 assert(!pkt->
req->isLLSC());
556 assert(!pkt->
req->isLockedRMW());
557 assert(!pkt->
req->isInstFetch());
560 if (pkt->
req->isAtomicReturn()) {
561 req_type = RubyRequestType_ATOMIC_RETURN;
562 }
else if (pkt->
req->isAtomicNoReturn()) {
563 req_type = RubyRequestType_ATOMIC_NO_RETURN;
564 }
else if (pkt->
isRead()) {
565 req_type = RubyRequestType_LD;
567 req_type = RubyRequestType_ST;
571 panic(
"Unsupported ruby packet type\n");
586 if (pkt->
req->isKernel()) {
587 if (pkt->
req->isAcquire()){
590 return RequestStatus_Issued;
591 }
else if (pkt->
req->isRelease()) {
598 if (pkt->
req->hasContextId()) {
599 wf_id = pkt->
req->contextId();
606 return RequestStatus_Issued;
612 (pkt->
req->isRelease() || pkt->
req->isAcquire())) {
622 if (pkt->
req->hasContextId()) {
623 wf_id = pkt->
req->contextId();
630 return RequestStatus_Issued;
634 return RequestStatus_Issued;
644 return RequestStatus_Issued;
653 if (pkt != NULL && pkt->
req->hasContextId()) {
654 proc_id = pkt->
req->contextId();
659 if (pkt->
req->hasPC()) {
660 pc = pkt->
req->getPC();
683 uint32_t tableSize = crequest->
getPackets().size();
684 for (
int i = 0;
i < tableSize;
i++) {
686 uint32_t tmpOffset = (tmpPkt->
getAddr()) - line_addr;
687 uint32_t tmpSize = tmpPkt->
getSize();
691 atomicOps.push_back(tmpAtomicOp);
692 }
else if (tmpPkt->
isWrite()) {
693 dataBlock.setData(tmpPkt->
getPtr<uint8_t>(),
696 for (
int j = 0;
j < tmpSize;
j++) {
697 accessMask[tmpOffset +
j] =
true;
700 std::shared_ptr<RubyRequest> msg;
705 RubyAccessMode_Supervisor, pkt,
706 PrefetchBit_No, proc_id, 100,
707 blockSize, accessMask,
708 dataBlock, atomicOps,
709 accessScope, accessSegment);
714 RubyAccessMode_Supervisor, pkt,
715 PrefetchBit_No, proc_id, 100,
716 blockSize, accessMask,
718 accessScope, accessSegment);
720 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %s\n",
723 RubyRequestType_to_string(crequest->
getRubyType()));
726 "there should not be any I-Fetch requests in the GPU Coalescer");
742 template <
class KEY,
class VALUE>
744 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
747 for (
auto i = map.begin();
i != map.end(); ++
i)
748 out <<
" " <<
i->first <<
"=" <<
i->second;
765 DPRINTF(RubyStats,
"Recorded statistic: %s\n",
766 SequencerRequestType_to_string(requestType));
772 uint64_t seqNum = pkt->
req->getReqInstSeqNum();
780 auto citer = std::find_if(creqQueue.begin(), creqQueue.end(),
783 if (citer != creqQueue.end()) {
784 (*citer)->insertPacket(pkt);
807 RubyRequestType_to_string(creq->
getRubyType()), seqNum);
859 for (
int i = 0;
i <
len;
i++) {
891 fatal_if((crequest->getRubyType() != RubyRequestType_ATOMIC &&
892 crequest->getRubyType() != RubyRequestType_ATOMIC_RETURN &&
893 crequest->getRubyType() != RubyRequestType_ATOMIC_NO_RETURN),
894 "atomicCallback saw non-atomic type response\n");
913 if (myMachID == senderMachID) {
927 if (myMachID == senderMachID) {
941 for (
auto& pkt : mylist) {
945 assert(port != NULL);
949 port->hitCallback(pkt);
968 Cycles initialRequestTime,
969 Cycles forwardRequestTime,
971 bool success,
bool isRegion)
976 assert(completion_time >= issued_time);
977 Cycles total_lat = completion_time - issued_time;
980 if (mach == MachineType_TCP) {
981 if (type == RubyRequestType_LD) {
986 }
else if (mach == MachineType_L1Cache_wCC) {
987 if (type == RubyRequestType_LD) {
992 }
else if (mach == MachineType_TCC) {
993 if (type == RubyRequestType_LD) {
999 if (type == RubyRequestType_LD) {
1011 if (total_lat !=
Cycles(0)) {
1015 if (mach != MachineType_NUM) {
1019 if ((issued_time <= initialRequestTime) &&
1020 (initialRequestTime <= forwardRequestTime) &&
1021 (forwardRequestTime <= firstResponseTime) &&
1022 (firstResponseTime <= completion_time)) {
1025 initialRequestTime - issued_time);
1027 forwardRequestTime - initialRequestTime);
1029 firstResponseTime - forwardRequestTime);
1031 completion_time - firstResponseTime);
1037 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1039 success ?
"Done" :
"SC_Failed",
"",
"",
1055 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1063 for (
int i = 0;
i < MachineType_NUM;
i++) {
1080 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1083 for (
int j = 0;
j < MachineType_NUM;
j++) {
1092 .
desc(
"loads that hit in the TCP")
1095 .
name(
name() +
".gpu_tcp_ld_transfers")
1096 .
desc(
"TCP to TCP load transfers")
1100 .
desc(
"loads that hit in the TCC")
1104 .
desc(
"loads that miss in the GPU")
1109 .
desc(
"stores that hit in the TCP")
1112 .
name(
name() +
".gpu_tcp_st_transfers")
1113 .
desc(
"TCP to TCP store transfers")
1117 .
desc(
"stores that hit in the TCC")
1121 .
desc(
"stores that miss in the GPU")
1127 .
desc(
"loads that hit in the TCP")
1130 .
name(
name() +
".cp_tcp_ld_transfers")
1131 .
desc(
"TCP to TCP load transfers")
1135 .
desc(
"loads that hit in the TCC")
1139 .
desc(
"loads that miss in the GPU")
1144 .
desc(
"stores that hit in the TCP")
1147 .
name(
name() +
".cp_tcp_st_transfers")
1148 .
desc(
"TCP to TCP store transfers")
1152 .
desc(
"stores that hit in the TCC")
1156 .
desc(
"stores that miss in the GPU")
GMTokenPort & getGMTokenPort()
#define panic(...)
This implements a cprintf based panic() function.
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
void insertKernel(int wavefront_id, PacketPtr pkt)
Ports are used to interface objects to each other.
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
const uint8_t * getData(int offset, int len) const
Stats::Scalar GPU_TCPStHits
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
Cycles is a wrapper class for representing cycle counts, i.e.
#define fatal(...)
This implements a cprintf based fatal() function.
UncoalescedTable(GPUCoalescer *gc)
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
GPUCoalescer(const Params *)
bool coalescePacket(PacketPtr pkt)
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::shared_ptr< Request > RequestPtr
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Stats::Scalar GPU_TCCStHits
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
EventFunctionWrapper deadlockCheckEvent
void kernelCallback(int wavfront_id)
Stats::Scalar GPU_TCPLdTransfers
Histogram & init(size_type size)
Set the parameters of this histogram.
Overload hash function for BasicBlockRange type.
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void checkDeadlock(Tick threshold)
void recordMissLatency(CoalescedRequest *crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
std::vector< PacketPtr > & getPackets()
T * getPtr()
get a pointer to the data ptr.
Stats::Scalar CP_TCPStTransfers
RequestPtr req
A pointer to the original request.
RubyGPUCoalescerParams Params
static const Priority Progress_Event_Pri
Progress events come at the end.
Tick cyclesToTicks(Cycles c) const
void printRequestTable(std::stringstream &ss)
Tick curTick()
The current simulated tick.
void printProgress(std::ostream &out) const
void setRubyType(RubyRequestType type)
Cycles getIssueTime() const
std::map< uint64_t, PerInstPackets > instMap
void setIssueTime(Cycles _issueTime)
void ruby_eviction_callback(Addr address)
CacheMemory * m_dataCache_ptr
SenderState * predecessor
void resetStats() override
Callback to reset stats.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
void readCallback(Addr address, DataBlock &data)
uint64_t Tick
Tick count type.
void mergeFrom(const DataBlock &data)
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
PerInstPackets * getInstPackets(int offset)
Stats::Scalar GPU_TCCLdHits
virtual RequestStatus makeRequest(PacketPtr pkt) override
int m_store_waiting_on_load_cycles
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
virtual void issueRequest(CoalescedRequest *crequest)
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void schedule(Event &event, Tick when)
Addr getOffset(Addr addr)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void ruby_hit_callback(PacketPtr pkt)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void sendTokens(int num_tokens)
Return num_tokens tokens back to the master port.
Addr makeLineAddress(Addr addr)
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
std::string printAddress(Addr addr)
void reset()
Reset stat value to default.
HSAScope reqScopeToHSAScope(const RequestPtr &req)
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
RubyRequestType getRubyType() const
bool scheduled() const
Determine if the current event is scheduled.
void insertPacket(PacketPtr pkt)
std::vector< Stats::Histogram * > m_typeLatencyHist
void insertPacket(PacketPtr pkt)
MessageBuffer * m_mandatory_q_ptr
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
virtual const std::string name() const
std::vector< int > newKernelEnds
PacketPtr getFirstPkt() const
Declaration of the Packet class.
virtual RubyRequestType getRequestType(PacketPtr pkt)
SenderState * senderState
This packet's sender state.
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
virtual Cycles mandatoryQueueLatency(const RubyRequestType ¶m_type)
void printRequestTable(std::stringstream &ss)
CacheMemory * m_instCache_ptr
void regStats() override
Callback to set stat parameters.
void setData(const uint8_t *data, int offset, int len)
UncoalescedTable uncoalescedTable
void hitCallback(CoalescedRequest *crequest, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Cycles m_deadlock_threshold
virtual void regStats()
Callback to set stat parameters.
MachineType machineIDToMachineType(MachineID machID)
void completeHitCallback(std::vector< PacketPtr > &mylist)
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
static uint32_t getBlockSizeBytes()
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Stats::Scalar CP_TCCLdHits