gem5  v20.0.0.3
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
35 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
36 
37 #include <iostream>
38 #include <unordered_map>
39 
40 #include "base/statistics.hh"
41 #include "mem/request.hh"
44 #include "mem/ruby/protocol/HSAScope.hh"
45 #include "mem/ruby/protocol/HSASegment.hh"
46 #include "mem/ruby/protocol/PrefetchBit.hh"
47 #include "mem/ruby/protocol/RubyAccessMode.hh"
48 #include "mem/ruby/protocol/RubyRequestType.hh"
49 #include "mem/ruby/protocol/SequencerRequestType.hh"
51 
52 class DataBlock;
53 class CacheMsg;
54 class MachineID;
55 class CacheMemory;
56 
57 class RubyGPUCoalescerParams;
58 
59 HSAScope reqScopeToHSAScope(const RequestPtr &req);
60 HSASegment reqSegmentToHSASegment(const RequestPtr &req);
61 
63 {
65  RubyRequestType m_type;
67 
68  GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
69  Cycles _issue_time)
70  : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
71  {}
72 };
73 
75 {
76  public:
77  RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
78  : pkt(pkt), primaryType(p_type), secondaryType(s_type)
79  {
80  }
81 
82  RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
83  secondaryType(RubyRequestType_NULL)
84  {
85  }
86 
88  RubyRequestType primaryType;
89  RubyRequestType secondaryType;
90 };
91 
92 std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
93 
94 class GPUCoalescer : public RubyPort
95 {
96  public:
97  typedef RubyGPUCoalescerParams Params;
98  GPUCoalescer(const Params *);
99  ~GPUCoalescer();
100 
101  // Public Methods
102  void wakeup(); // Used only for deadlock detection
103 
104  void printProgress(std::ostream& out) const;
105  void resetStats() override;
106  void collateStats();
107  void regStats() override;
108 
109  void writeCallback(Addr address, DataBlock& data);
110 
111  void writeCallback(Addr address,
112  MachineType mach,
113  DataBlock& data);
114 
115  void writeCallback(Addr address,
116  MachineType mach,
117  DataBlock& data,
118  Cycles initialRequestTime,
119  Cycles forwardRequestTime,
120  Cycles firstResponseTime,
121  bool isRegion);
122 
123  void writeCallback(Addr address,
124  MachineType mach,
125  DataBlock& data,
126  Cycles initialRequestTime,
127  Cycles forwardRequestTime,
128  Cycles firstResponseTime);
129 
130  void readCallback(Addr address, DataBlock& data);
131 
132  void readCallback(Addr address,
133  MachineType mach,
134  DataBlock& data);
135 
136  void readCallback(Addr address,
137  MachineType mach,
138  DataBlock& data,
139  Cycles initialRequestTime,
140  Cycles forwardRequestTime,
141  Cycles firstResponseTime);
142 
143  void readCallback(Addr address,
144  MachineType mach,
145  DataBlock& data,
146  Cycles initialRequestTime,
147  Cycles forwardRequestTime,
148  Cycles firstResponseTime,
149  bool isRegion);
150  /* atomics need their own callback because the data
151  might be const coming from SLICC */
152  void atomicCallback(Addr address,
153  MachineType mach,
154  const DataBlock& data);
155 
156  void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
157  void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
158 
159  // Alternate implementations in VIPER Coalescer
160  virtual RequestStatus makeRequest(PacketPtr pkt) override;
161 
162  int outstandingCount() const override { return m_outstanding_count; }
163 
164  bool
165  isDeadlockEventScheduled() const override
166  {
167  return deadlockCheckEvent.scheduled();
168  }
169 
170  void
172  {
173  deschedule(deadlockCheckEvent);
174  }
175 
176  bool empty() const;
177 
178  void print(std::ostream& out) const;
179  void checkCoherence(Addr address);
180 
181  void markRemoved();
182  void removeRequest(GPUCoalescerRequest* request);
183  void evictionCallback(Addr address);
184  void completeIssue();
185 
186  void insertKernel(int wavefront_id, PacketPtr pkt);
187 
188  void recordRequestType(SequencerRequestType requestType);
189  Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
190 
191  Stats::Histogram& getLatencyHist() { return m_latencyHist; }
193  { return *m_typeLatencyHist[t]; }
194 
196  { return m_missLatencyHist; }
198  { return *m_missTypeLatencyHist[t]; }
199 
201  { return *m_missMachLatencyHist[t]; }
202 
204  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
205  { return *m_missTypeMachLatencyHist[r][t]; }
206 
208  { return *m_IssueToInitialDelayHist[t]; }
209 
211  getInitialToForwardDelayHist(const MachineType t) const
212  { return *m_InitialToForwardDelayHist[t]; }
213 
215  getForwardRequestToFirstResponseHist(const MachineType t) const
216  { return *m_ForwardToFirstResponseDelayHist[t]; }
217 
219  getFirstResponseToCompletionDelayHist(const MachineType t) const
220  { return *m_FirstResponseToCompletionDelayHist[t]; }
221 
222  // Changed to protected to enable inheritance by VIPER Coalescer
223  protected:
224  bool tryCacheAccess(Addr addr, RubyRequestType type,
225  Addr pc, RubyAccessMode access_mode,
226  int size, DataBlock*& data_ptr);
227  // Alternate implementations in VIPER Coalescer
228  virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
229 
230  void kernelCallback(int wavfront_id);
231 
232  void hitCallback(GPUCoalescerRequest* request,
233  MachineType mach,
234  DataBlock& data,
235  bool success,
236  Cycles initialRequestTime,
237  Cycles forwardRequestTime,
238  Cycles firstResponseTime,
239  bool isRegion);
240  void recordMissLatency(GPUCoalescerRequest* request,
241  MachineType mach,
242  Cycles initialRequestTime,
243  Cycles forwardRequestTime,
244  Cycles firstResponseTime,
245  bool success, bool isRegion);
246  void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
247  PacketPtr mapAddrToPkt(Addr address);
248 
249 
250  RequestStatus getRequestStatus(PacketPtr pkt,
251  RubyRequestType request_type);
252  bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
253 
254  bool handleLlsc(Addr address, GPUCoalescerRequest* request);
255 
257 
258 
259  // Changed to protected to enable inheritance by VIPER Coalescer
260  protected:
263 
266 
267  // We need to track both the primary and secondary request types.
268  // The secondary request type comprises a subset of RubyRequestTypes that
269  // are understood by the L1 Controller. A primary request type can be any
270  // RubyRequestType.
271  typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
272  CoalescingTable reqCoalescer;
274 
275  typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
276  RequestTable m_writeRequestTable;
277  RequestTable m_readRequestTable;
278  // Global outstanding request count, across all request tables
281  std::unordered_map<int, PacketPtr> kernelEndList;
283 
288 
290 
293 
294  // m5 style stats for TCP hit/miss counts
299 
304 
309 
314 
317 
321 
326 
331 
337 
338 private:
339  // Private copy constructor and assignment operator
340  GPUCoalescer(const GPUCoalescer& obj);
341  GPUCoalescer& operator=(const GPUCoalescer& obj);
342 };
343 
344 inline std::ostream&
345 operator<<(std::ostream& out, const GPUCoalescer& obj)
346 {
347  obj.print(out);
348  out << std::flush;
349  return out;
350 }
351 
352 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
Stats::Histogram & getLatencyHist()
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
Definition: GPUCoalescer.hh:77
Stats::Scalar GPU_TCPStHits
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:81
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
RubyRequestType m_type
Definition: GPUCoalescer.hh:65
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
CoalescingTable reqCoalescer
RequestTable m_readRequestTable
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
Definition: GPUCoalescer.cc:91
std::shared_ptr< Request > RequestPtr
Definition: request.hh:81
Stats::Scalar GPU_TCPLdHits
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:330
Stats::Scalar GPU_TCCStHits
EventFunctionWrapper deadlockCheckEvent
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
std::ostream & operator<<(std::ostream &out, const GPUCoalescerRequest &obj)
std::unordered_map< Addr, GPUCoalescerRequest * > RequestTable
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2505
STL vector class.
Definition: stl.hh:37
Stats::Scalar CP_TCPStTransfers
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:97
std::unordered_map< Addr, std::vector< RequestDesc > > CoalescingTable
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
uint8_t type
Definition: inet.hh:328
Bitfield< 4 > pc
Stats::Scalar GPU_LdMiss
CacheMemory * m_dataCache_ptr
RubyRequestType primaryType
Definition: GPUCoalescer.hh:88
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
void descheduleDeadlockEvent() override
int m_store_waiting_on_load_cycles
Bitfield< 18, 16 > len
A simple histogram stat.
Definition: statistics.hh:2626
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
bool isDeadlockEventScheduled() const override
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
std::unordered_map< int, PacketPtr > kernelEndList
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Histogram & getOutstandReqHist()
Stats::Scalar CP_TCPLdHits
std::vector< Stats::Histogram * > m_typeLatencyHist
Stats::Histogram & getMissLatencyHist()
Stats::Histogram & getTypeLatencyHist(uint32_t t)
std::vector< int > newKernelEnds
GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, Cycles _issue_time)
Definition: GPUCoalescer.hh:68
HSAScope reqScopeToHSAScope(const RequestPtr &req)
Definition: GPUCoalescer.cc:71
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:89
int outstandingCount() const override
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
RequestTable m_writeRequestTable
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
CacheMemory * m_instCache_ptr
PacketPtr pkt
Definition: GPUCoalescer.hh:87
int m_outstanding_count
Bitfield< 5 > t
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
const char data[]
Stats::Scalar GPU_TCPStTransfers
bool m_deadlock_check_scheduled
Stats::Scalar CP_TCCLdHits

Generated on Fri Jul 3 2020 15:53:04 for gem5 by doxygen 1.8.13