gem5  v19.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Sooraj Puthoor
34  */
35 
36 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
37 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
38 
39 #include <iostream>
40 #include <unordered_map>
41 
42 #include "base/statistics.hh"
43 #include "mem/request.hh"
46 #include "mem/ruby/protocol/HSAScope.hh"
47 #include "mem/ruby/protocol/HSASegment.hh"
48 #include "mem/ruby/protocol/PrefetchBit.hh"
49 #include "mem/ruby/protocol/RubyAccessMode.hh"
50 #include "mem/ruby/protocol/RubyRequestType.hh"
51 #include "mem/ruby/protocol/SequencerRequestType.hh"
53 
54 class DataBlock;
55 class CacheMsg;
56 class MachineID;
57 class CacheMemory;
58 
59 class RubyGPUCoalescerParams;
60 
61 HSAScope reqScopeToHSAScope(const RequestPtr &req);
62 HSASegment reqSegmentToHSASegment(const RequestPtr &req);
63 
65 {
67  RubyRequestType m_type;
69 
70  GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
71  Cycles _issue_time)
72  : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
73  {}
74 };
75 
77 {
78  public:
79  RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
80  : pkt(pkt), primaryType(p_type), secondaryType(s_type)
81  {
82  }
83 
84  RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
85  secondaryType(RubyRequestType_NULL)
86  {
87  }
88 
90  RubyRequestType primaryType;
91  RubyRequestType secondaryType;
92 };
93 
94 std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
95 
96 class GPUCoalescer : public RubyPort
97 {
98  public:
99  typedef RubyGPUCoalescerParams Params;
100  GPUCoalescer(const Params *);
101  ~GPUCoalescer();
102 
103  // Public Methods
104  void wakeup(); // Used only for deadlock detection
105 
106  void printProgress(std::ostream& out) const;
107  void resetStats();
108  void collateStats();
109  void regStats();
110 
111  void writeCallback(Addr address, DataBlock& data);
112 
113  void writeCallback(Addr address,
114  MachineType mach,
115  DataBlock& data);
116 
117  void writeCallback(Addr address,
118  MachineType mach,
119  DataBlock& data,
120  Cycles initialRequestTime,
121  Cycles forwardRequestTime,
122  Cycles firstResponseTime,
123  bool isRegion);
124 
125  void writeCallback(Addr address,
126  MachineType mach,
127  DataBlock& data,
128  Cycles initialRequestTime,
129  Cycles forwardRequestTime,
130  Cycles firstResponseTime);
131 
132  void readCallback(Addr address, DataBlock& data);
133 
134  void readCallback(Addr address,
135  MachineType mach,
136  DataBlock& data);
137 
138  void readCallback(Addr address,
139  MachineType mach,
140  DataBlock& data,
141  Cycles initialRequestTime,
142  Cycles forwardRequestTime,
143  Cycles firstResponseTime);
144 
145  void readCallback(Addr address,
146  MachineType mach,
147  DataBlock& data,
148  Cycles initialRequestTime,
149  Cycles forwardRequestTime,
150  Cycles firstResponseTime,
151  bool isRegion);
152  /* atomics need their own callback because the data
153  might be const coming from SLICC */
154  void atomicCallback(Addr address,
155  MachineType mach,
156  const DataBlock& data);
157 
158  void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
159  void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
160 
161  // Alternate implementations in VIPER Coalescer
162  virtual RequestStatus makeRequest(PacketPtr pkt);
163 
164  int outstandingCount() const { return m_outstanding_count; }
165 
166  bool
168  {
169  return deadlockCheckEvent.scheduled();
170  }
171 
172  void
174  {
175  deschedule(deadlockCheckEvent);
176  }
177 
178  bool empty() const;
179 
180  void print(std::ostream& out) const;
181  void checkCoherence(Addr address);
182 
183  void markRemoved();
184  void removeRequest(GPUCoalescerRequest* request);
185  void evictionCallback(Addr address);
186  void completeIssue();
187 
188  void insertKernel(int wavefront_id, PacketPtr pkt);
189 
190  void recordRequestType(SequencerRequestType requestType);
191  Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
192 
193  Stats::Histogram& getLatencyHist() { return m_latencyHist; }
195  { return *m_typeLatencyHist[t]; }
196 
198  { return m_missLatencyHist; }
200  { return *m_missTypeLatencyHist[t]; }
201 
203  { return *m_missMachLatencyHist[t]; }
204 
206  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
207  { return *m_missTypeMachLatencyHist[r][t]; }
208 
210  { return *m_IssueToInitialDelayHist[t]; }
211 
213  getInitialToForwardDelayHist(const MachineType t) const
214  { return *m_InitialToForwardDelayHist[t]; }
215 
217  getForwardRequestToFirstResponseHist(const MachineType t) const
218  { return *m_ForwardToFirstResponseDelayHist[t]; }
219 
221  getFirstResponseToCompletionDelayHist(const MachineType t) const
222  { return *m_FirstResponseToCompletionDelayHist[t]; }
223 
224  // Changed to protected to enable inheritance by VIPER Coalescer
225  protected:
226  bool tryCacheAccess(Addr addr, RubyRequestType type,
227  Addr pc, RubyAccessMode access_mode,
228  int size, DataBlock*& data_ptr);
229  // Alternate implementations in VIPER Coalescer
230  virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
231 
232  void kernelCallback(int wavfront_id);
233 
234  void hitCallback(GPUCoalescerRequest* request,
235  MachineType mach,
236  DataBlock& data,
237  bool success,
238  Cycles initialRequestTime,
239  Cycles forwardRequestTime,
240  Cycles firstResponseTime,
241  bool isRegion);
242  void recordMissLatency(GPUCoalescerRequest* request,
243  MachineType mach,
244  Cycles initialRequestTime,
245  Cycles forwardRequestTime,
246  Cycles firstResponseTime,
247  bool success, bool isRegion);
248  void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
249  PacketPtr mapAddrToPkt(Addr address);
250 
251 
252  RequestStatus getRequestStatus(PacketPtr pkt,
253  RubyRequestType request_type);
254  bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
255 
256  bool handleLlsc(Addr address, GPUCoalescerRequest* request);
257 
259 
260 
261  // Changed to protected to enable inheritance by VIPER Coalescer
262  protected:
265 
268 
269  // We need to track both the primary and secondary request types.
270  // The secondary request type comprises a subset of RubyRequestTypes that
271  // are understood by the L1 Controller. A primary request type can be any
272  // RubyRequestType.
273  typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
274  CoalescingTable reqCoalescer;
276 
277  typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
278  RequestTable m_writeRequestTable;
279  RequestTable m_readRequestTable;
280  // Global outstanding request count, across all request tables
283  std::unordered_map<int, PacketPtr> kernelEndList;
285 
290 
292 
295 
296  // m5 style stats for TCP hit/miss counts
301 
306 
311 
316 
319 
323 
328 
333 
339 
340 private:
341  // Private copy constructor and assignment operator
342  GPUCoalescer(const GPUCoalescer& obj);
343  GPUCoalescer& operator=(const GPUCoalescer& obj);
344 };
345 
346 inline std::ostream&
347 operator<<(std::ostream& out, const GPUCoalescer& obj)
348 {
349  obj.print(out);
350  out << std::flush;
351  return out;
352 }
353 
354 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
Stats::Histogram & getLatencyHist()
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
Definition: GPUCoalescer.hh:79
Stats::Scalar GPU_TCPStHits
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
bool isDeadlockEventScheduled() const
RubyRequestType m_type
Definition: GPUCoalescer.hh:67
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
CoalescingTable reqCoalescer
RequestTable m_readRequestTable
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
Definition: GPUCoalescer.cc:93
std::shared_ptr< Request > RequestPtr
Definition: request.hh:83
Stats::Scalar GPU_TCPLdHits
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:335
Stats::Scalar GPU_TCCStHits
EventFunctionWrapper deadlockCheckEvent
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
std::ostream & operator<<(std::ostream &out, const GPUCoalescerRequest &obj)
std::unordered_map< Addr, GPUCoalescerRequest * > RequestTable
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2508
void descheduleDeadlockEvent()
STL vector class.
Definition: stl.hh:40
Stats::Scalar CP_TCPStTransfers
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:99
std::unordered_map< Addr, std::vector< RequestDesc > > CoalescingTable
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
uint8_t type
Definition: inet.hh:333
Bitfield< 4 > pc
Stats::Scalar GPU_LdMiss
CacheMemory * m_dataCache_ptr
int outstandingCount() const
RubyRequestType primaryType
Definition: GPUCoalescer.hh:90
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
int m_store_waiting_on_load_cycles
Bitfield< 18, 16 > len
A simple histogram stat.
Definition: statistics.hh:2629
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:255
std::unordered_map< int, PacketPtr > kernelEndList
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Histogram & getOutstandReqHist()
Stats::Scalar CP_TCPLdHits
std::vector< Stats::Histogram * > m_typeLatencyHist
Stats::Histogram & getMissLatencyHist()
Stats::Histogram & getTypeLatencyHist(uint32_t t)
std::vector< int > newKernelEnds
GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, Cycles _issue_time)
Definition: GPUCoalescer.hh:70
HSAScope reqScopeToHSAScope(const RequestPtr &req)
Definition: GPUCoalescer.cc:73
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:91
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
RequestTable m_writeRequestTable
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
CacheMemory * m_instCache_ptr
PacketPtr pkt
Definition: GPUCoalescer.hh:89
int m_outstanding_count
Bitfield< 5 > t
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
const char data[]
Stats::Scalar GPU_TCPStTransfers
bool m_deadlock_check_scheduled
Stats::Scalar CP_TCCLdHits

Generated on Fri Feb 28 2020 16:27:02 for gem5 by doxygen 1.8.13