36 #include "config/the_isa.hh" 38 #if THE_ISA == X86_ISA 45 #include "debug/GPUCoalescer.hh" 46 #include "debug/MemoryAccess.hh" 47 #include "debug/ProtocolTrace.hh" 48 #include "debug/RubyPort.hh" 49 #include "debug/RubyStats.hh" 60 #include "params/RubyGPUCoalescer.hh" 65 RubyGPUCoalescerParams::create()
73 HSAScope accessScope = HSAScope_UNSPECIFIED;
74 if (req->isScoped()) {
75 if (req->isWavefrontScope()) {
76 accessScope = HSAScope_WAVEFRONT;
77 }
else if (req->isWorkgroupScope()) {
78 accessScope = HSAScope_WORKGROUP;
79 }
else if (req->isDeviceScope()) {
80 accessScope = HSAScope_DEVICE;
81 }
else if (req->isSystemScope()) {
82 accessScope = HSAScope_SYSTEM;
84 fatal(
"Bad scope type");
93 HSASegment accessSegment = HSASegment_GLOBAL;
95 if (req->isGlobalSegment()) {
96 accessSegment = HSASegment_GLOBAL;
97 }
else if (req->isGroupSegment()) {
98 accessSegment = HSASegment_GROUP;
99 }
else if (req->isPrivateSegment()) {
100 accessSegment = HSASegment_PRIVATE;
101 }
else if (req->isKernargSegment()) {
102 accessSegment = HSASegment_KERNARG;
103 }
else if (req->isReadonlySegment()) {
104 accessSegment = HSASegment_READONLY;
105 }
else if (req->isSpillSegment()) {
106 accessSegment = HSASegment_SPILL;
107 }
else if (req->isArgSegment()) {
108 accessSegment = HSASegment_ARG;
110 fatal(
"Bad segment type");
113 return accessSegment;
118 issueEvent([this]{
completeIssue(); },
"Issue coalesced request",
159 int total_outstanding = 0;
163 for (; read != read_end; ++read) {
168 panic(
"Possible Deadlock detected. Aborting!\n" 169 "version: %d request.paddr: 0x%x m_readRequestTable: %d " 170 "current time: %u issue_time: %d difference: %d\n",
m_version,
178 for (; write != write_end; ++write) {
183 panic(
"Possible Deadlock detected. Aborting!\n" 184 "version: %d request.paddr: 0x%x m_writeRequestTable: %d " 185 "current time: %u issue_time: %d difference: %d\n",
m_version,
209 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
212 for (
int j = 0;
j < MachineType_NUM;
j++) {
217 for (
int i = 0;
i < MachineType_NUM;
i++) {
238 return RequestStatus_BufferFull;
242 request_type != RubyRequestType_Locked_RMW_Write) {
243 return RequestStatus_Aliased;
246 if ((request_type == RubyRequestType_ST) ||
247 (request_type == RubyRequestType_ATOMIC) ||
248 (request_type == RubyRequestType_ATOMIC_RETURN) ||
249 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
250 (request_type == RubyRequestType_RMW_Read) ||
251 (request_type == RubyRequestType_RMW_Write) ||
252 (request_type == RubyRequestType_Load_Linked) ||
253 (request_type == RubyRequestType_Store_Conditional) ||
254 (request_type == RubyRequestType_Locked_RMW_Read) ||
255 (request_type == RubyRequestType_Locked_RMW_Write) ||
256 (request_type == RubyRequestType_FLUSH)) {
262 return RequestStatus_Aliased;
268 return RequestStatus_Aliased;
275 return RequestStatus_Aliased;
281 return RequestStatus_Aliased;
285 return RequestStatus_Ready;
303 kernelEndList.size());
313 pkt->
req->isLockedRMW() ||
316 int total_outstanding M5_VAR_USED =
327 if ((request_type == RubyRequestType_ST) ||
328 (request_type == RubyRequestType_ATOMIC) ||
329 (request_type == RubyRequestType_ATOMIC_RETURN) ||
330 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
331 (request_type == RubyRequestType_RMW_Read) ||
332 (request_type == RubyRequestType_RMW_Write) ||
333 (request_type == RubyRequestType_Load_Linked) ||
334 (request_type == RubyRequestType_Store_Conditional) ||
335 (request_type == RubyRequestType_Locked_RMW_Read) ||
336 (request_type == RubyRequestType_Locked_RMW_Write) ||
337 (request_type == RubyRequestType_FLUSH)) {
343 RequestTable::iterator
i = r.first;
347 "Inserting write request for paddr %#x for type %d\n",
348 pkt->
req->getPaddr(), i->second->m_type);
359 RequestTable::iterator
i = r.first;
363 "Inserting read request for paddr %#x for type %d\n",
364 pkt->
req->getPaddr(), i->second->m_type);
394 if ((srequest->
m_type == RubyRequestType_ST) ||
395 (srequest->
m_type == RubyRequestType_RMW_Read) ||
396 (srequest->
m_type == RubyRequestType_RMW_Write) ||
397 (srequest->
m_type == RubyRequestType_Load_Linked) ||
398 (srequest->
m_type == RubyRequestType_Store_Conditional) ||
399 (srequest->
m_type == RubyRequestType_Locked_RMW_Read) ||
400 (srequest->
m_type == RubyRequestType_Locked_RMW_Write)) {
418 if (request->
m_type == RubyRequestType_Store_Conditional) {
424 request->
pkt->
req->setExtraData(0);
431 request->
pkt->
req->setExtraData(1);
437 }
else if (request->
m_type == RubyRequestType_Load_Linked) {
471 Cycles initialRequestTime,
472 Cycles forwardRequestTime,
476 initialRequestTime, forwardRequestTime, firstResponseTime,
484 Cycles initialRequestTime,
485 Cycles forwardRequestTime,
501 assert((request->m_type == RubyRequestType_ST) ||
502 (request->m_type == RubyRequestType_ATOMIC) ||
503 (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
504 (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
505 (request->m_type == RubyRequestType_RMW_Read) ||
506 (request->m_type == RubyRequestType_RMW_Write) ||
507 (request->m_type == RubyRequestType_Load_Linked) ||
508 (request->m_type == RubyRequestType_Store_Conditional) ||
509 (request->m_type == RubyRequestType_Locked_RMW_Read) ||
510 (request->m_type == RubyRequestType_Locked_RMW_Write) ||
511 (request->m_type == RubyRequestType_FLUSH));
524 if (request->m_type == RubyRequestType_Locked_RMW_Read) {
526 }
else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
531 request->issue_time, forwardRequestTime, firstResponseTime,
553 Cycles initialRequestTime,
554 Cycles forwardRequestTime,
559 initialRequestTime, forwardRequestTime, firstResponseTime,
567 Cycles initialRequestTime,
568 Cycles forwardRequestTime,
583 assert((request->m_type == RubyRequestType_LD) ||
584 (request->m_type == RubyRequestType_IFETCH));
587 request->issue_time, forwardRequestTime, firstResponseTime,
596 Cycles initialRequestTime,
597 Cycles forwardRequestTime,
608 if (type == RubyRequestType_IFETCH) {
626 for (
int i = 0;
i <
len; ++
i) {
628 assert(type ==
reqCoalescer[request_line_address][
i].primaryType);
629 request_address = pkt->
getAddr();
631 if (pkt->
getPtr<uint8_t>()) {
632 if ((type == RubyRequestType_LD) ||
633 (type == RubyRequestType_ATOMIC) ||
634 (type == RubyRequestType_ATOMIC_RETURN) ||
635 (type == RubyRequestType_IFETCH) ||
636 (type == RubyRequestType_RMW_Read) ||
637 (type == RubyRequestType_Locked_RMW_Read) ||
638 (type == RubyRequestType_Load_Linked)) {
647 "WARNING. Data not transfered from Ruby to M5 for type " \
649 RubyRequestType_to_string(type));
665 mylist.push_back(pkt);
698 if (pkt->
req->isKernel()) {
699 if (pkt->
req->isAcquire()){
702 return RequestStatus_Issued;
703 }
else if (pkt->
req->isRelease()) {
710 if (pkt->
req->hasContextId()) {
711 wf_id = pkt->
req->contextId();
718 return RequestStatus_Issued;
726 return RequestStatus_BufferFull;
729 RubyRequestType primary_type = RubyRequestType_NULL;
730 RubyRequestType secondary_type = RubyRequestType_NULL;
742 primary_type = RubyRequestType_Store_Conditional;
745 primary_type = RubyRequestType_Load_Linked;
747 secondary_type = RubyRequestType_ATOMIC;
748 }
else if (pkt->
req->isLockedRMW()) {
756 primary_type = RubyRequestType_Locked_RMW_Write;
759 primary_type = RubyRequestType_Locked_RMW_Read;
761 secondary_type = RubyRequestType_ST;
766 primary_type = RubyRequestType_ATOMIC;
767 secondary_type = RubyRequestType_ATOMIC;
770 if (pkt->
req->isInstFetch()) {
771 primary_type = secondary_type = RubyRequestType_IFETCH;
773 #if THE_ISA == X86_ISA 774 uint32_t flags = pkt->
req->getFlags();
775 bool storeCheck = flags &
778 bool storeCheck =
false;
781 primary_type = RubyRequestType_RMW_Read;
782 secondary_type = RubyRequestType_ST;
784 primary_type = secondary_type = RubyRequestType_LD;
791 primary_type = secondary_type = RubyRequestType_ST;
793 primary_type = secondary_type = RubyRequestType_FLUSH;
794 }
else if (pkt->
req->isRelease() || pkt->
req->isAcquire()) {
804 if (pkt->
req->hasContextId()) {
805 wf_id = pkt->
req->contextId();
812 return RequestStatus_Issued;
816 return RequestStatus_Issued;
819 panic(
"Unsupported ruby packet type\n");
829 if (status != RequestStatus_Ready)
846 }
else if (primary_type !=
849 return RequestStatus_Aliased;
850 }
else if (pkt->
req->isLockedRMW() ||
853 return RequestStatus_Aliased;
854 }
else if (pkt->
req->hasContextId() && pkt->
req->isRelease() &&
855 pkt->
req->contextId() !=
858 return RequestStatus_Aliased;
862 reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
866 return RequestStatus_Issued;
874 if (pkt != NULL && pkt->
req->hasContextId()) {
875 proc_id = pkt->
req->contextId();
880 if (pkt->
req->hasPC()) {
881 pc = pkt->
req->getPC();
905 for (
int i = 0;
i < tableSize;
i++) {
907 uint32_t tmpOffset = (tmpPkt->
getAddr()) - line_addr;
908 uint32_t tmpSize = tmpPkt->
getSize();
912 atomicOps.push_back(tmpAtomicOp);
913 }
else if (tmpPkt->
isWrite()) {
914 dataBlock.setData(tmpPkt->
getPtr<uint8_t>(),
917 for (
int j = 0;
j < tmpSize;
j++) {
918 accessMask[tmpOffset +
j] =
true;
921 std::shared_ptr<RubyRequest> msg;
926 RubyAccessMode_Supervisor, pkt,
927 PrefetchBit_No, proc_id, 100,
928 blockSize, accessMask,
929 dataBlock, atomicOps,
930 accessScope, accessSegment);
935 RubyAccessMode_Supervisor, pkt,
936 PrefetchBit_No, proc_id, 100,
937 blockSize, accessMask,
939 accessScope, accessSegment);
941 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %s\n",
944 RubyRequestType_to_string(secondary_type));
946 fatal_if(secondary_type == RubyRequestType_IFETCH,
947 "there should not be any I-Fetch requests in the GPU Coalescer");
957 template <
class KEY,
class VALUE>
959 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
962 for (
auto i = map.begin();
i != map.end(); ++
i)
963 out <<
" " <<
i->first <<
"=" <<
i->second;
989 DPRINTF(RubyStats,
"Recorded statistic: %s\n",
990 SequencerRequestType_to_string(requestType));
1002 for (
int i = 0;
i <
len; ++
i) {
1010 i, pkt->
req->getPaddr());
1016 panic(
"GPUCoalescer::makeRequest should never be called if the " 1017 "request is already outstanding\n");
1027 for (
int i = 0;
i <
len;
i++) {
1066 assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1067 (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1068 (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1074 srequest->issue_time,
Cycles(0),
Cycles(0),
true,
false);
1082 for (
int i = 0; i <
len; ++
i) {
1084 assert(srequest->m_type ==
1086 request_address = (pkt->
getAddr());
1088 if (pkt->
getPtr<uint8_t>() &&
1089 srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1095 "WARNING. Data not transfered from Ruby to M5 for type " \
1097 RubyRequestType_to_string(srequest->m_type));
1113 mylist.push_back(pkt);
1125 if (myMachID == senderMachID) {
1139 if (myMachID == senderMachID) {
1153 for (
int i = 0;
i <
len; ++
i) {
1157 assert(port != NULL);
1161 port->hitCallback(mylist[
i]);
1174 return request->
pkt;
1180 Cycles initialRequestTime,
1181 Cycles forwardRequestTime,
1182 Cycles firstResponseTime,
1183 bool success,
bool isRegion)
1188 assert(completion_time >= issued_time);
1189 Cycles total_lat = completion_time - issued_time;
1192 if (mach == MachineType_TCP) {
1193 if (type == RubyRequestType_LD) {
1198 }
else if (mach == MachineType_L1Cache_wCC) {
1199 if (type == RubyRequestType_LD) {
1204 }
else if (mach == MachineType_TCC) {
1205 if (type == RubyRequestType_LD) {
1211 if (type == RubyRequestType_LD) {
1223 if (total_lat !=
Cycles(0)) {
1227 if (mach != MachineType_NUM) {
1231 if ((issued_time <= initialRequestTime) &&
1232 (initialRequestTime <= forwardRequestTime) &&
1233 (forwardRequestTime <= firstResponseTime) &&
1234 (firstResponseTime <= completion_time)) {
1237 initialRequestTime - issued_time);
1239 forwardRequestTime - initialRequestTime);
1241 firstResponseTime - forwardRequestTime);
1243 completion_time - firstResponseTime);
1249 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1251 success ?
"Done" :
"SC_Failed",
"",
"",
1267 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1275 for (
int i = 0;
i < MachineType_NUM;
i++) {
1292 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1295 for (
int j = 0;
j < MachineType_NUM;
j++) {
1304 .
desc(
"loads that hit in the TCP")
1307 .
name(
name() +
".gpu_tcp_ld_transfers")
1308 .
desc(
"TCP to TCP load transfers")
1312 .
desc(
"loads that hit in the TCC")
1316 .
desc(
"loads that miss in the GPU")
1321 .
desc(
"stores that hit in the TCP")
1324 .
name(
name() +
".gpu_tcp_st_transfers")
1325 .
desc(
"TCP to TCP store transfers")
1329 .
desc(
"stores that hit in the TCC")
1333 .
desc(
"stores that miss in the GPU")
1339 .
desc(
"loads that hit in the TCP")
1342 .
name(
name() +
".cp_tcp_ld_transfers")
1343 .
desc(
"TCP to TCP load transfers")
1347 .
desc(
"loads that hit in the TCC")
1351 .
desc(
"loads that miss in the GPU")
1356 .
desc(
"stores that hit in the TCP")
1359 .
name(
name() +
".cp_tcp_st_transfers")
1360 .
desc(
"TCP to TCP store transfers")
1364 .
desc(
"stores that hit in the TCC")
1368 .
desc(
"stores that miss in the GPU")
#define panic(...)
This implements a cprintf based panic() function.
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
const uint8_t * getData(int offset, int len) const
Stats::Scalar GPU_TCPStHits
Cycles is a wrapper class for representing cycle counts, i.e.
#define fatal(...)
This implements a cprintf based fatal() function.
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
RequestTable m_readRequestTable
std::shared_ptr< Request > RequestPtr
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Stats::Scalar GPU_TCCStHits
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
EventFunctionWrapper deadlockCheckEvent
void kernelCallback(int wavfront_id)
Stats::Scalar GPU_TCPLdTransfers
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
void clearLocked(Addr addr)
Histogram & init(size_type size)
Set the parameters of this histogram.
Overload hash function for BasicBlockRange type.
bool isLocked(Addr addr, int context)
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
T * getPtr()
get a pointer to the data ptr.
Stats::Scalar CP_TCPStTransfers
RequestPtr req
A pointer to the original request.
RubyGPUCoalescerParams Params
static const Priority Progress_Event_Pri
Progress events come at the end.
Tick cyclesToTicks(Cycles c) const
bool areNSlotsAvailable(unsigned int n, Tick curTime)
Tick curTick()
The current simulated tick.
void printProgress(std::ostream &out) const
void ruby_eviction_callback(Addr address)
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
SenderState * predecessor
void resetStats() override
Callback to reset stats.
void setData(const uint8_t *p)
Copy data into the packet from the provided pointer.
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
uint64_t Tick
Tick count type.
RubyRequestType primaryType
void mergeFrom(const DataBlock &data)
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
virtual RequestStatus makeRequest(PacketPtr pkt) override
int m_store_waiting_on_load_cycles
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
void schedule(Event &event, Tick when)
Addr getOffset(Addr addr)
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void ruby_hit_callback(PacketPtr pkt)
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
std::unordered_map< int, PacketPtr > kernelEndList
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
std::string printAddress(Addr addr)
void reset()
Reset stat value to default.
HSAScope reqScopeToHSAScope(const RequestPtr &req)
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
void blockOnQueue(Addr, MessageBuffer *)
bool scheduled() const
Determine if the current event is scheduled.
std::vector< Stats::Histogram * > m_typeLatencyHist
MessageBuffer * m_mandatory_q_ptr
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
virtual const std::string name() const
bool isTagPresent(Addr address) const
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
SenderState * senderState
This packet's sender state.
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
virtual Cycles mandatoryQueueLatency(const RubyRequestType ¶m_type)
void setLocked(Addr addr, int context)
RequestTable m_writeRequestTable
CacheMemory * m_instCache_ptr
void regStats() override
Callback to set stat parameters.
void setData(const uint8_t *data, int offset, int len)
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
virtual void regStats()
Callback to set stat parameters.
MachineType machineIDToMachineType(MachineID machID)
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
bool isBlocked(Addr) const
static uint32_t getBlockSizeBytes()
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Stats::Scalar CP_TCCLdHits