38 #include "config/the_isa.hh" 40 #if THE_ISA == X86_ISA 47 #include "debug/GPUCoalescer.hh" 48 #include "debug/MemoryAccess.hh" 49 #include "debug/ProtocolTrace.hh" 50 #include "debug/RubyPort.hh" 51 #include "debug/RubyStats.hh" 62 #include "params/RubyGPUCoalescer.hh" 67 RubyGPUCoalescerParams::create()
75 HSAScope accessScope = HSAScope_UNSPECIFIED;
76 if (req->isScoped()) {
77 if (req->isWavefrontScope()) {
78 accessScope = HSAScope_WAVEFRONT;
79 }
else if (req->isWorkgroupScope()) {
80 accessScope = HSAScope_WORKGROUP;
81 }
else if (req->isDeviceScope()) {
82 accessScope = HSAScope_DEVICE;
83 }
else if (req->isSystemScope()) {
84 accessScope = HSAScope_SYSTEM;
86 fatal(
"Bad scope type");
95 HSASegment accessSegment = HSASegment_GLOBAL;
97 if (req->isGlobalSegment()) {
98 accessSegment = HSASegment_GLOBAL;
99 }
else if (req->isGroupSegment()) {
100 accessSegment = HSASegment_GROUP;
101 }
else if (req->isPrivateSegment()) {
102 accessSegment = HSASegment_PRIVATE;
103 }
else if (req->isKernargSegment()) {
104 accessSegment = HSASegment_KERNARG;
105 }
else if (req->isReadonlySegment()) {
106 accessSegment = HSASegment_READONLY;
107 }
else if (req->isSpillSegment()) {
108 accessSegment = HSASegment_SPILL;
109 }
else if (req->isArgSegment()) {
110 accessSegment = HSASegment_ARG;
112 fatal(
"Bad segment type");
115 return accessSegment;
120 issueEvent([this]{
completeIssue(); },
"Issue coalesced request",
161 int total_outstanding = 0;
165 for (; read != read_end; ++read) {
170 panic(
"Possible Deadlock detected. Aborting!\n" 171 "version: %d request.paddr: 0x%x m_readRequestTable: %d " 172 "current time: %u issue_time: %d difference: %d\n",
m_version,
180 for (; write != write_end; ++write) {
185 panic(
"Possible Deadlock detected. Aborting!\n" 186 "version: %d request.paddr: 0x%x m_writeRequestTable: %d " 187 "current time: %u issue_time: %d difference: %d\n",
m_version,
211 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
214 for (
int j = 0;
j < MachineType_NUM;
j++) {
219 for (
int i = 0;
i < MachineType_NUM;
i++) {
240 return RequestStatus_BufferFull;
244 request_type != RubyRequestType_Locked_RMW_Write) {
245 return RequestStatus_Aliased;
248 if ((request_type == RubyRequestType_ST) ||
249 (request_type == RubyRequestType_ATOMIC) ||
250 (request_type == RubyRequestType_ATOMIC_RETURN) ||
251 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
252 (request_type == RubyRequestType_RMW_Read) ||
253 (request_type == RubyRequestType_RMW_Write) ||
254 (request_type == RubyRequestType_Load_Linked) ||
255 (request_type == RubyRequestType_Store_Conditional) ||
256 (request_type == RubyRequestType_Locked_RMW_Read) ||
257 (request_type == RubyRequestType_Locked_RMW_Write) ||
258 (request_type == RubyRequestType_FLUSH)) {
264 return RequestStatus_Aliased;
270 return RequestStatus_Aliased;
277 return RequestStatus_Aliased;
283 return RequestStatus_Aliased;
287 return RequestStatus_Ready;
305 kernelEndList.size());
315 pkt->
req->isLockedRMW() ||
329 if ((request_type == RubyRequestType_ST) ||
330 (request_type == RubyRequestType_ATOMIC) ||
331 (request_type == RubyRequestType_ATOMIC_RETURN) ||
332 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
333 (request_type == RubyRequestType_RMW_Read) ||
334 (request_type == RubyRequestType_RMW_Write) ||
335 (request_type == RubyRequestType_Load_Linked) ||
336 (request_type == RubyRequestType_Store_Conditional) ||
337 (request_type == RubyRequestType_Locked_RMW_Read) ||
338 (request_type == RubyRequestType_Locked_RMW_Write) ||
339 (request_type == RubyRequestType_FLUSH)) {
345 RequestTable::iterator
i = r.first;
349 "Inserting write request for paddr %#x for type %d\n",
350 pkt->
req->getPaddr(), i->second->m_type);
361 RequestTable::iterator
i = r.first;
365 "Inserting read request for paddr %#x for type %d\n",
366 pkt->
req->getPaddr(), i->second->m_type);
396 if ((srequest->
m_type == RubyRequestType_ST) ||
397 (srequest->
m_type == RubyRequestType_RMW_Read) ||
398 (srequest->
m_type == RubyRequestType_RMW_Write) ||
399 (srequest->
m_type == RubyRequestType_Load_Linked) ||
400 (srequest->
m_type == RubyRequestType_Store_Conditional) ||
401 (srequest->
m_type == RubyRequestType_Locked_RMW_Read) ||
402 (srequest->
m_type == RubyRequestType_Locked_RMW_Write)) {
420 if (request->
m_type == RubyRequestType_Store_Conditional) {
426 request->
pkt->
req->setExtraData(0);
433 request->
pkt->
req->setExtraData(1);
439 }
else if (request->
m_type == RubyRequestType_Load_Linked) {
473 Cycles initialRequestTime,
474 Cycles forwardRequestTime,
478 initialRequestTime, forwardRequestTime, firstResponseTime,
486 Cycles initialRequestTime,
487 Cycles forwardRequestTime,
503 assert((request->m_type == RubyRequestType_ST) ||
504 (request->m_type == RubyRequestType_ATOMIC) ||
505 (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
506 (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
507 (request->m_type == RubyRequestType_RMW_Read) ||
508 (request->m_type == RubyRequestType_RMW_Write) ||
509 (request->m_type == RubyRequestType_Load_Linked) ||
510 (request->m_type == RubyRequestType_Store_Conditional) ||
511 (request->m_type == RubyRequestType_Locked_RMW_Read) ||
512 (request->m_type == RubyRequestType_Locked_RMW_Write) ||
513 (request->m_type == RubyRequestType_FLUSH));
526 if (request->m_type == RubyRequestType_Locked_RMW_Read) {
528 }
else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
533 request->issue_time, forwardRequestTime, firstResponseTime,
555 Cycles initialRequestTime,
556 Cycles forwardRequestTime,
561 initialRequestTime, forwardRequestTime, firstResponseTime,
569 Cycles initialRequestTime,
570 Cycles forwardRequestTime,
585 assert((request->m_type == RubyRequestType_LD) ||
586 (request->m_type == RubyRequestType_IFETCH));
589 request->issue_time, forwardRequestTime, firstResponseTime,
598 Cycles initialRequestTime,
599 Cycles forwardRequestTime,
610 if (type == RubyRequestType_IFETCH) {
628 for (
int i = 0;
i <
len; ++
i) {
630 assert(type ==
reqCoalescer[request_line_address][
i].primaryType);
631 request_address = pkt->
getAddr();
633 if (pkt->
getPtr<uint8_t>()) {
634 if ((type == RubyRequestType_LD) ||
635 (type == RubyRequestType_ATOMIC) ||
636 (type == RubyRequestType_ATOMIC_RETURN) ||
637 (type == RubyRequestType_IFETCH) ||
638 (type == RubyRequestType_RMW_Read) ||
639 (type == RubyRequestType_Locked_RMW_Read) ||
640 (type == RubyRequestType_Load_Linked)) {
649 "WARNING. Data not transfered from Ruby to M5 for type " \
651 RubyRequestType_to_string(type));
667 mylist.push_back(pkt);
700 if (pkt->
req->isKernel()) {
701 if (pkt->
req->isAcquire()){
704 return RequestStatus_Issued;
705 }
else if (pkt->
req->isRelease()) {
712 if (pkt->
req->hasContextId()) {
713 wf_id = pkt->
req->contextId();
720 return RequestStatus_Issued;
728 return RequestStatus_BufferFull;
731 RubyRequestType primary_type = RubyRequestType_NULL;
732 RubyRequestType secondary_type = RubyRequestType_NULL;
744 primary_type = RubyRequestType_Store_Conditional;
747 primary_type = RubyRequestType_Load_Linked;
749 secondary_type = RubyRequestType_ATOMIC;
750 }
else if (pkt->
req->isLockedRMW()) {
758 primary_type = RubyRequestType_Locked_RMW_Write;
761 primary_type = RubyRequestType_Locked_RMW_Read;
763 secondary_type = RubyRequestType_ST;
768 primary_type = RubyRequestType_ATOMIC;
769 secondary_type = RubyRequestType_ATOMIC;
772 if (pkt->
req->isInstFetch()) {
773 primary_type = secondary_type = RubyRequestType_IFETCH;
775 #if THE_ISA == X86_ISA 776 uint32_t flags = pkt->
req->getFlags();
777 bool storeCheck = flags &
780 bool storeCheck =
false;
783 primary_type = RubyRequestType_RMW_Read;
784 secondary_type = RubyRequestType_ST;
786 primary_type = secondary_type = RubyRequestType_LD;
793 primary_type = secondary_type = RubyRequestType_ST;
795 primary_type = secondary_type = RubyRequestType_FLUSH;
796 }
else if (pkt->
req->isRelease() || pkt->
req->isAcquire()) {
806 if (pkt->
req->hasContextId()) {
807 wf_id = pkt->
req->contextId();
814 return RequestStatus_Issued;
818 return RequestStatus_Issued;
821 panic(
"Unsupported ruby packet type\n");
831 if (status != RequestStatus_Ready)
848 }
else if (primary_type !=
851 return RequestStatus_Aliased;
852 }
else if (pkt->
req->isLockedRMW() ||
855 return RequestStatus_Aliased;
856 }
else if (pkt->
req->hasContextId() && pkt->
req->isRelease() &&
857 pkt->
req->contextId() !=
860 return RequestStatus_Aliased;
864 reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
868 return RequestStatus_Issued;
876 if (pkt != NULL && pkt->
req->hasContextId()) {
877 proc_id = pkt->
req->contextId();
882 if (pkt->
req->hasPC()) {
883 pc = pkt->
req->getPC();
907 for (
int i = 0;
i < tableSize;
i++) {
909 uint32_t tmpOffset = (tmpPkt->
getAddr()) - line_addr;
910 uint32_t tmpSize = tmpPkt->
getSize();
914 atomicOps.push_back(tmpAtomicOp);
915 }
else if (tmpPkt->
isWrite()) {
916 dataBlock.setData(tmpPkt->
getPtr<uint8_t>(),
919 for (
int j = 0;
j < tmpSize;
j++) {
920 accessMask[tmpOffset +
j] =
true;
923 std::shared_ptr<RubyRequest> msg;
928 RubyAccessMode_Supervisor, pkt,
929 PrefetchBit_No, proc_id, 100,
930 blockSize, accessMask,
931 dataBlock, atomicOps,
932 accessScope, accessSegment);
937 RubyAccessMode_Supervisor, pkt,
938 PrefetchBit_No, proc_id, 100,
939 blockSize, accessMask,
941 accessScope, accessSegment);
943 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %s\n",
946 RubyRequestType_to_string(secondary_type));
948 fatal_if(secondary_type == RubyRequestType_IFETCH,
949 "there should not be any I-Fetch requests in the GPU Coalescer");
959 template <
class KEY,
class VALUE>
961 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
964 for (
auto i = map.begin();
i != map.end(); ++
i)
965 out <<
" " <<
i->first <<
"=" <<
i->second;
991 DPRINTF(RubyStats,
"Recorded statistic: %s\n",
992 SequencerRequestType_to_string(requestType));
1004 for (
int i = 0;
i <
len; ++
i) {
1012 i, pkt->
req->getPaddr());
1018 panic(
"GPUCoalescer::makeRequest should never be called if the " 1019 "request is already outstanding\n");
1029 for (
int i = 0;
i <
len;
i++) {
1068 assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1069 (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1070 (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1076 srequest->issue_time,
Cycles(0),
Cycles(0),
true,
false);
1084 for (
int i = 0; i <
len; ++
i) {
1086 assert(srequest->m_type ==
1088 request_address = (pkt->
getAddr());
1090 if (pkt->
getPtr<uint8_t>() &&
1091 srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1097 "WARNING. Data not transfered from Ruby to M5 for type " \
1099 RubyRequestType_to_string(srequest->m_type));
1115 mylist.push_back(pkt);
1127 if (myMachID == senderMachID) {
1141 if (myMachID == senderMachID) {
1155 for (
int i = 0;
i <
len; ++
i) {
1159 assert(port != NULL);
1163 port->hitCallback(mylist[
i]);
1176 return request->
pkt;
1182 Cycles initialRequestTime,
1183 Cycles forwardRequestTime,
1184 Cycles firstResponseTime,
1185 bool success,
bool isRegion)
1190 assert(completion_time >= issued_time);
1191 Cycles total_lat = completion_time - issued_time;
1194 if (mach == MachineType_TCP) {
1195 if (type == RubyRequestType_LD) {
1200 }
else if (mach == MachineType_L1Cache_wCC) {
1201 if (type == RubyRequestType_LD) {
1206 }
else if (mach == MachineType_TCC) {
1207 if (type == RubyRequestType_LD) {
1213 if (type == RubyRequestType_LD) {
1225 if (total_lat !=
Cycles(0)) {
1229 if (mach != MachineType_NUM) {
1233 if ((issued_time <= initialRequestTime) &&
1234 (initialRequestTime <= forwardRequestTime) &&
1235 (forwardRequestTime <= firstResponseTime) &&
1236 (firstResponseTime <= completion_time)) {
1239 initialRequestTime - issued_time);
1241 forwardRequestTime - initialRequestTime);
1243 firstResponseTime - forwardRequestTime);
1245 completion_time - firstResponseTime);
1251 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1253 success ?
"Done" :
"SC_Failed",
"",
"",
1269 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1277 for (
int i = 0;
i < MachineType_NUM;
i++) {
1294 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1297 for (
int j = 0;
j < MachineType_NUM;
j++) {
1306 .
desc(
"loads that hit in the TCP")
1309 .
name(
name() +
".gpu_tcp_ld_transfers")
1310 .
desc(
"TCP to TCP load transfers")
1314 .
desc(
"loads that hit in the TCC")
1318 .
desc(
"loads that miss in the GPU")
1323 .
desc(
"stores that hit in the TCP")
1326 .
name(
name() +
".gpu_tcp_st_transfers")
1327 .
desc(
"TCP to TCP store transfers")
1331 .
desc(
"stores that hit in the TCC")
1335 .
desc(
"stores that miss in the GPU")
1341 .
desc(
"loads that hit in the TCP")
1344 .
name(
name() +
".cp_tcp_ld_transfers")
1345 .
desc(
"TCP to TCP load transfers")
1349 .
desc(
"loads that hit in the TCC")
1353 .
desc(
"loads that miss in the GPU")
1358 .
desc(
"stores that hit in the TCP")
1361 .
name(
name() +
".cp_tcp_st_transfers")
1362 .
desc(
"TCP to TCP store transfers")
1366 .
desc(
"stores that hit in the TCC")
1370 .
desc(
"stores that miss in the GPU")
#define panic(...)
This implements a cprintf based panic() function.
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
static const Priority Progress_Event_Pri
Progress events come at the end.
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
const uint8_t * getData(int offset, int len) const
Stats::Scalar GPU_TCPStHits
Cycles is a wrapper class for representing cycle counts, i.e.
#define fatal(...)
This implements a cprintf based fatal() function.
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
RequestTable m_readRequestTable
std::shared_ptr< Request > RequestPtr
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Stats::Scalar GPU_TCCStHits
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
EventFunctionWrapper deadlockCheckEvent
void kernelCallback(int wavfront_id)
Stats::Scalar GPU_TCPLdTransfers
virtual void regStats()
Callback to set stat parameters.
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
void clearLocked(Addr addr)
Histogram & init(size_type size)
Set the parameters of this histogram.
Overload hash function for BasicBlockRange type.
bool isLocked(Addr addr, int context)
Stats::Scalar CP_TCPLdTransfers
virtual RequestStatus makeRequest(PacketPtr pkt)
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
T * getPtr()
get a pointer to the data ptr.
Stats::Scalar CP_TCPStTransfers
RequestPtr req
A pointer to the original request.
RubyGPUCoalescerParams Params
Tick cyclesToTicks(Cycles c) const
bool areNSlotsAvailable(unsigned int n, Tick curTime)
Tick curTick()
The current simulated tick.
void printProgress(std::ostream &out) const
bool scheduled() const
Determine if the current event is scheduled.
void ruby_eviction_callback(Addr address)
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
SenderState * predecessor
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
uint64_t Tick
Tick count type.
RubyRequestType primaryType
void mergeFrom(const DataBlock &data)
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
int m_store_waiting_on_load_cycles
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Addr getOffset(Addr addr)
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void ruby_hit_callback(PacketPtr pkt)
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
virtual const std::string name() const
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
std::string printAddress(Addr addr)
void reset()
Reset stat value to default.
HSAScope reqScopeToHSAScope(const RequestPtr &req)
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
void blockOnQueue(Addr, MessageBuffer *)
void regStats()
Callback to set stat parameters.
std::vector< Stats::Histogram * > m_typeLatencyHist
MessageBuffer * m_mandatory_q_ptr
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
bool isTagPresent(Addr address) const
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
SenderState * senderState
This packet's sender state.
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
virtual Cycles mandatoryQueueLatency(const RubyRequestType ¶m_type)
void setLocked(Addr addr, int context)
RequestTable m_writeRequestTable
void schedule(Event &event, Tick when)
CacheMemory * m_instCache_ptr
void resetStats()
Callback to reset stats.
void setData(const uint8_t *data, int offset, int len)
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
static const int NumArgumentRegs M5_VAR_USED
MachineType machineIDToMachineType(MachineID machID)
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
bool isBlocked(Addr) const
static uint32_t getBlockSizeBytes()
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Stats::Scalar CP_TCCLdHits